summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2014-05-15 10:35:14 +0200
committerVijay Bellur <vbellur@redhat.com>2014-07-11 10:34:24 -0700
commit1392da3e237d8ea080573909015916e3544a6d2c (patch)
tree89f7f37e65b5d526c18e043cc7dbb51c9e19a50e /xlators/mgmt
parentad112305a1c7452b13c92238b40ded80361838f3 (diff)
cli/glusterd: Added support for dispersed volumes
Two new options have been added to the 'create' command of the cli interface: disperse [<count>] redundancy <count> Both are optional. A dispersed volume is created by specifying, at least, one of them. If 'disperse' is missing or it's present but '<count>' does not, the number of bricks enumerated in the command line is taken as the disperse count. If 'redundancy' is missing, the lowest optimal value is assumed. A configuration is considered optimal (for most workloads) when the disperse count - redundancy count is a power of 2. If the resulting redundancy is 1, the volume is created normally, but if it's greater than 1, a warning is shown to the user and he/she must answer yes/no to continue volume creation. If there isn't any optimal value for the given number of bricks, a warning is also shown and, if the user accepts, a redundancy of 1 is used. If 'redundancy' is specified and the resulting volume is not optimal, another warning is shown to the user. A distributed-disperse volume can be created using a number of bricks multiple of the disperse count. Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4 BUG: 1118629 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/7782 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c23
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c80
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h2
8 files changed, 168 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 452df759ad4..089c7d637c9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -169,6 +169,12 @@ gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
}
}
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf (err_str, err_len, "Volume %s cannot be converted "
+ "from dispersed to striped-"
+ "dispersed", volinfo->volname);
+ gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
out:
@@ -259,6 +265,12 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
}
}
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf (err_str, err_len, "Volume %s cannot be converted "
+ "from dispersed to replicated-"
+ "dispersed", volinfo->volname);
+ gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
out:
return ret;
@@ -276,6 +288,7 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
switch (volinfo->type) {
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_DISPERSE:
snprintf (err_str, err_len,
"replica count (%d) option given for non replicate "
"volume %s", replica_count, volinfo->volname);
@@ -737,6 +750,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
strcpy (vol_type, "stripe");
} else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
strcpy (vol_type, "stripe-replicate");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ strcpy (vol_type, "disperse");
} else {
strcpy (vol_type, "distribute");
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index ed4bd60f88b..e10dc22b56b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -398,6 +398,16 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ snprintf (key, 256, "volume%d.disperse_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->disperse_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.redundancy_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->redundancy_count);
+ if (ret)
+ goto out;
+
snprintf (key, 256, "volume%d.transport", count);
ret = dict_set_int32 (volumes, key, volinfo->transport_type);
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index c31d8a8ad71..086a6550a72 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -844,6 +844,18 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
if (ret)
goto out;
+ snprintf (buf, sizeof (buf), "%d", volinfo->disperse_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->redundancy_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
+ buf);
+ if (ret)
+ goto out;
+
snprintf (buf, sizeof (buf), "%d", volinfo->version);
ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf);
if (ret)
@@ -2618,6 +2630,12 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {
volinfo->replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {
+ volinfo->disperse_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) {
+ volinfo->redundancy_count = atoi (value);
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {
volinfo->transport_type = atoi (value);
@@ -2754,6 +2772,11 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo->replica_count > 0);
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ GF_ASSERT (volinfo->disperse_count > 0);
+ GF_ASSERT (volinfo->redundancy_count > 0);
+ break;
+
default:
GF_ASSERT (0);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 89cf24de789..fb7de7b1b10 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -44,6 +44,8 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count"
#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count"
#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count"
+#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"
+#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"
#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index dc923b1eeb4..aff2356eb4f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -548,6 +548,8 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
new_volinfo->type = volinfo->type;
new_volinfo->replica_count = volinfo->replica_count;
new_volinfo->stripe_count = volinfo->stripe_count;
+ new_volinfo->disperse_count = volinfo->disperse_count;
+ new_volinfo->redundancy_count = volinfo->redundancy_count;
new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->transport_type = volinfo->transport_type;
@@ -2525,6 +2527,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
goto out;
memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->disperse_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->redundancy_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count);
ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count);
if (ret)
@@ -4206,6 +4220,24 @@ glusterd_import_volinfo (dict_t *peer_data, int count,
gf_log (THIS->name, GF_LOG_INFO,
"peer is possibly old version");
+ /* not having a 'disperse_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
+ /* not having a 'redundancy_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
/* not having a 'dist_count' key is not a error
(as peer may be of old version) */
memset (key, 0, sizeof (key));
@@ -6932,6 +6964,9 @@ glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo)
int rcount = volinfo->replica_count;
int scount = volinfo->stripe_count;
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)
+ return volinfo->disperse_count;
+
return (rcount ? rcount : 1) * (scount ? scount : 1);
}
@@ -11694,6 +11729,13 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
}
}
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ if (volinfo->op_version < GD_OP_VERSION_3_6_0)
+ volinfo->op_version = GD_OP_VERSION_3_6_0;
+ if (volinfo->client_op_version < GD_OP_VERSION_3_6_0)
+ volinfo->client_op_version = GD_OP_VERSION_3_6_0;
+ }
+
return;
}
@@ -12774,7 +12816,7 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
- up_count = volinfo->replica_count - down_count;
+ up_count = volinfo->dist_leaf_count - down_count;
if (quorum_type && !strcmp (quorum_type, "fixed")) {
if (up_count >= quorum_count) {
@@ -12782,7 +12824,8 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
} else {
- if (volinfo->replica_count % 2 == 0) {
+ if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) &&
+ (volinfo->dist_leaf_count % 2 == 0)) {
if ((up_count > quorum_count) ||
((up_count == quorum_count) && first_brick_on)) {
quorum_met = _gf_true;
@@ -12835,8 +12878,9 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
goto out;
}
- if (!glusterd_is_volume_replicate (volinfo) ||
- volinfo->replica_count < 3) {
+ if ((!glusterd_is_volume_replicate (volinfo) ||
+ volinfo->replica_count < 3) &&
+ (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) {
for (i = 0; i < volinfo->brick_count ; i++) {
/* for a pure distribute volume, and replica volume
with replica count 2, quorum is not met if even
@@ -12858,7 +12902,8 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
ret = 0;
quorum_met = _gf_true;
} else {
- distribute_subvols = volinfo->brick_count / volinfo->replica_count;
+ distribute_subvols = volinfo->brick_count /
+ volinfo->dist_leaf_count;
for (j = 0; j < distribute_subvols; j++) {
// by default assume quorum is not met
/* TODO: Handle distributed striped replicate volumes
@@ -12867,11 +12912,11 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
*/
ret = 1;
quorum_met = _gf_false;
- for (i = 0; i < volinfo->replica_count; i++) {
+ for (i = 0; i < volinfo->dist_leaf_count; i++) {
snprintf (key, sizeof (key),
"%s%"PRId64".brick%"PRId64".status", key_prefix,
index,
- (j * volinfo->replica_count) + i);
+ (j * volinfo->dist_leaf_count) + i);
ret = dict_get_int32 (dict, key, &brick_online);
if (ret || !brick_online) {
if (i == 0)
@@ -13043,6 +13088,9 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
else
quorum_count =
volinfo->replica_count/2 + 1;
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ quorum_count = volinfo->disperse_count -
+ volinfo->redundancy_count;
} else {
quorum_count = volinfo->brick_count;
}
@@ -13061,8 +13109,22 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
if the quorum-type option is not set to auto,
the behavior is set to the default behavior)
*/
- if (!ret)
- quorum_count = tmp;
+ if (!ret) {
+ /* for dispersed volumes, only allow quorums
+ equal or larger than minimum functional
+ value.
+ */
+ if ((GF_CLUSTER_TYPE_DISPERSE !=
+ volinfo->type) ||
+ (tmp >= quorum_count)) {
+ quorum_count = tmp;
+ } else {
+ gf_log(this->name, GF_LOG_INFO,
+ "Ignoring small quorum-count "
+ "(%d) on dispersed volume", tmp);
+ quorum_type = NULL;
+ }
+ }
else
quorum_type = NULL;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 6ab899a16cf..9701c6b939c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -2684,10 +2684,14 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
"%s-replicate-%d"};
char *stripe_args[] = {"cluster/stripe",
"%s-stripe-%d"};
+ char *disperse_args[] = {"cluster/disperse",
+ "%s-disperse-%d"};
+ char option[32] = "";
int rclusters = 0;
int clusters = 0;
int dist_count = 0;
int ret = -1;
+ xlator_t * ec = NULL;
if (!volinfo->dist_leaf_count)
goto out;
@@ -2737,6 +2741,26 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
if (clusters < 0)
goto out;
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ clusters = volgen_graph_build_clusters (graph, volinfo,
+ disperse_args[0],
+ disperse_args[1],
+ volinfo->brick_count,
+ volinfo->disperse_count);
+ if (clusters < 0)
+ goto out;
+
+ sprintf(option, "%d", volinfo->redundancy_count);
+ ec = first_of (graph);
+ while (clusters-- > 0) {
+ ret = xlator_set_option (ec, "redundancy", option);
+ if (ret)
+ goto out;
+
+ ec = ec->next;
+ }
+
+ break;
default:
gf_log ("", GF_LOG_ERROR, "volume inconsistency: "
"unrecognized clustering type");
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 53beebe0555..f23a9eb96b7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -1689,6 +1689,27 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
"replica count for volume %s", volname);
goto out;
}
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ ret = dict_get_int32 (dict, "disperse-count",
+ &volinfo->disperse_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "disperse count for volume %s", volname);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "redundancy-count",
+ &volinfo->redundancy_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "redundancy count for volume %s", volname);
+ goto out;
+ }
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ gf_log (this->name, GF_LOG_ERROR, "Disperse volume "
+ "needs op-version 3.6.0 or higher");
+ ret = -1;
+ goto out;
+ }
}
/* dist-leaf-count is the count of brick nodes for a given
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index a8ecb505a5b..ddbb2c81338 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -336,6 +336,8 @@ struct glusterd_volinfo_ {
int sub_count; /* backward compatibility */
int stripe_count;
int replica_count;
+ int disperse_count;
+ int redundancy_count;
int subvol_count; /* Number of subvolumes in a
distribute volume */
int dist_leaf_count; /* Number of bricks in one