summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-04-29 17:41:18 +0530
committerAtin Mukherjee <amukherj@redhat.com>2016-05-19 09:40:04 -0700
commit61c1b2cee973b11897a37d508910012e616033bc (patch)
treefbaf9adc16738884c9c073677ac01704f8f6f560 /xlators/mgmt/glusterd/src/glusterd-brick-ops.c
parent6f1a71210ee0a0f3741b5ece3b5240c1e4b5fa6d (diff)
cli/glusterd: add/remove brick fixes for arbiter volumes
1.Provide a command to convert replica 2 volumes to arbiter volumes. Existing self-heal logic will automatically heal the file hierarchy into the arbiter brick, the progress of which can be monitored using the heal info command. Syntax: gluster volume add-brick <VOLNAME> replica 3 arbiter 1 <HOST:arbiter-brick-path> 2. Add checks when removing bricks from arbiter volumes: - When converting from arbiter to replica 2 volume, allow only arbiter brick to be removed. - When converting from arbiter to plain distribute volume, allow only if arbiter is one of the bricks that is removed. 3. Some clean-up: - Use GD_MSG_DICT_GET_SUCCESS instead of GD_MSG_DICT_GET_FAILED to log messages that are not failures. - Remove unused variable `brick_list` - Move 'brickinfo->group' related functions to glusted-utils. Change-Id: Ic87b8c7e4d7d3ab03f93e7b9f372b314d80947ce BUG: 1318289 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/14126 Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-brick-ops.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c171
1 files changed, 145 insertions, 26 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 5b2f559b546..a90114ab2b3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -219,8 +219,8 @@ out:
static int
gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
- int total_bricks, int *type, char *err_str,
- int err_len)
+ int arbiter_count, int total_bricks, int *type,
+ char *err_str, int err_len)
{
int ret = -1;
@@ -283,6 +283,14 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
goto out;
}
if (replica_count == volinfo->replica_count) {
+ if (arbiter_count && !volinfo->arbiter_count) {
+ snprintf (err_str, err_len,
+ "Cannot convert replica 3 volume "
+ "to arbiter volume.");
+ gf_msg (THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
+ }
if (!(total_bricks % volinfo->dist_leaf_count)) {
ret = 1;
goto out;
@@ -413,6 +421,7 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
xlator_t *this = NULL;
int total_bricks = 0;
int32_t replica_count = 0;
+ int32_t arbiter_count = 0;
int32_t stripe_count = 0;
int type = 0;
glusterd_conf_t *conf = NULL;
@@ -486,14 +495,21 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
ret = dict_get_int32 (dict, "replica-count", &replica_count);
if (!ret) {
gf_msg (this->name, GF_LOG_INFO, errno,
- GD_MSG_DICT_GET_FAILED, "replica-count is %d",
+ GD_MSG_DICT_GET_SUCCESS, "replica-count is %d",
replica_count);
}
+ ret = dict_get_int32 (dict, "arbiter-count", &arbiter_count);
+ if (!ret) {
+ gf_msg (this->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS, "arbiter-count is %d",
+ arbiter_count);
+ }
+
ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
if (!ret) {
gf_msg (this->name, GF_LOG_INFO, errno,
- GD_MSG_DICT_GET_FAILED, "stripe-count is %d",
+ GD_MSG_DICT_GET_SUCCESS, "stripe-count is %d",
stripe_count);
}
@@ -602,7 +618,7 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
}
ret = gd_addbr_validate_replica_count (volinfo, replica_count,
- total_bricks,
+ arbiter_count, total_bricks,
&type, err_str,
sizeof (err_str));
if (ret == -1) {
@@ -791,6 +807,71 @@ glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
return hot_brick_num;
}
+static int
+glusterd_remove_brick_validate_arbiters (glusterd_volinfo_t *volinfo,
+ int32_t count, int32_t replica_count,
+ glusterd_brickinfo_t **brickinfo_list,
+ char *err_str, size_t err_len)
+{
+ int i = 0;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *last = NULL;
+ char *arbiter_array = NULL;
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_REPLICATE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_STRIPE_REPLICATE))
+ goto out;
+
+ if (!replica_count || !volinfo->arbiter_count)
+ goto out;
+
+ if (replica_count == 2) {
+ /* If it is an arbiter to replica 2 conversion, only permit
+ * removal of the arbiter brick.*/
+ for (i = 0; i < count; i++) {
+ brickinfo = brickinfo_list[i];
+ last = get_last_brick_of_brick_group (volinfo,
+ brickinfo);
+ if (last != brickinfo) {
+ snprintf (err_str, err_len, "Remove arbiter "
+ "brick(s) only when converting from "
+ "arbiter to replica 2 subvolume.");
+ ret = -1;
+ goto out;
+ }
+ }
+ } else if (replica_count == 1) {
+ /* If it is an arbiter to plain distribute conversion, in every
+ * replica subvol, the arbiter has to be one of the bricks that
+ * are removed. */
+ arbiter_array = GF_CALLOC (volinfo->subvol_count,
+ sizeof (*arbiter_array),
+ gf_common_mt_char);
+ if (!arbiter_array)
+ return -1;
+ for (i = 0; i < count; i++) {
+ brickinfo = brickinfo_list[i];
+ last = get_last_brick_of_brick_group (volinfo,
+ brickinfo);
+ if (last == brickinfo)
+ arbiter_array[brickinfo->group] = 1;
+ }
+ for (i = 0; i < volinfo->subvol_count; i++)
+ if (!arbiter_array[i]) {
+ snprintf (err_str, err_len, "Removed bricks "
+ "must contain arbiter when converting"
+ " to plain distrubute.");
+ ret = -1;
+ break;
+ }
+ GF_FREE (arbiter_array);
+ }
+
+out:
+ return ret;
+}
+
int
__glusterd_handle_remove_brick (rpcsvc_request_t *req)
{
@@ -800,10 +881,10 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
int32_t count = 0;
char *brick = NULL;
char key[256] = {0,};
- char *brick_list = NULL;
int i = 1;
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t **brickinfo_list = NULL;
int *subvols = NULL;
char err_str[2048] = {0};
gf_cli_rsp rsp = {0,};
@@ -998,16 +1079,6 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
}
}
- brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char);
-
- if (!brick_list) {
- ret = -1;
- goto out;
- }
-
-
- strcpy (brick_list, " ");
-
/* subvol match is not required for tiered volume*/
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
(volinfo->type != GF_CLUSTER_TYPE_TIER) &&
@@ -1020,6 +1091,13 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
if (volinfo->type == GF_CLUSTER_TYPE_TIER)
count = glusterd_set_detach_bricks(dict, volinfo);
+ brickinfo_list = GF_CALLOC (count, sizeof (*brickinfo_list),
+ gf_common_mt_pointer);
+ if (!brickinfo_list) {
+ ret = -1;
+ goto out;
+ }
+
while ( i <= count) {
snprintf (key, sizeof (key), "brick%d", i);
ret = dict_get_str (dict, key, &brick);
@@ -1044,8 +1122,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
GD_MSG_BRICK_NOT_FOUND, "%s", err_str);
goto out;
}
- strcat(brick_list, brick);
- strcat(brick_list, " ");
+ brickinfo_list[i-1] = brickinfo;
i++;
if ((volinfo->type == GF_CLUSTER_TYPE_NONE) ||
@@ -1072,6 +1149,14 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+ ret = glusterd_remove_brick_validate_arbiters (volinfo, count,
+ replica_count,
+ brickinfo_list,
+ err_str,
+ sizeof (err_str));
+ if (ret)
+ goto out;
+
ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict);
out:
@@ -1092,8 +1177,8 @@ out:
}
- if (brick_list)
- GF_FREE (brick_list);
+ if (brickinfo_list)
+ GF_FREE (brickinfo_list);
subvol_matcher_destroy (subvols);
free (cli_req.dict.dict_val); //its malloced by xdr
@@ -1224,6 +1309,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
int32_t ret = -1;
int32_t stripe_count = 0;
int32_t replica_count = 0;
+ int32_t arbiter_count = 0;
int32_t type = 0;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_gsync_status_temp_t param = {0, };
@@ -1256,18 +1342,23 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
if (!ret)
gf_msg (THIS->name, GF_LOG_INFO, errno,
- GD_MSG_DICT_GET_FAILED,
+ GD_MSG_DICT_GET_SUCCESS,
"stripe-count is set %d", stripe_count);
ret = dict_get_int32 (dict, "replica-count", &replica_count);
if (!ret)
gf_msg (THIS->name, GF_LOG_INFO, errno,
- GD_MSG_DICT_GET_FAILED,
+ GD_MSG_DICT_GET_SUCCESS,
"replica-count is set %d", replica_count);
+ ret = dict_get_int32 (dict, "arbiter-count", &arbiter_count);
+ if (!ret)
+ gf_msg (THIS->name, GF_LOG_INFO, errno,
+ GD_MSG_DICT_GET_SUCCESS,
+ "arbiter-count is set %d", arbiter_count);
ret = dict_get_int32 (dict, "type", &type);
if (!ret)
gf_msg (THIS->name, GF_LOG_INFO, errno,
- GD_MSG_DICT_GET_FAILED,
+ GD_MSG_DICT_GET_SUCCESS,
"type is set %d, need to change it", type);
}
@@ -1328,6 +1419,9 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
if (replica_count) {
volinfo->replica_count = replica_count;
}
+ if (arbiter_count) {
+ volinfo->arbiter_count = arbiter_count;
+ }
if (stripe_count) {
volinfo->stripe_count = stripe_count;
}
@@ -1529,6 +1623,7 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
char *volname = NULL;
int count = 0;
int replica_count = 0;
+ int arbiter_count = 0;
int i = 0;
int32_t local_brick_count = 0;
char *bricks = NULL;
@@ -1578,6 +1673,12 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"Unable to get replica count");
}
+ ret = dict_get_int32 (dict, "arbiter-count", &arbiter_count);
+ if (ret) {
+ gf_msg_debug (THIS->name, 0,
+ "No arbiter count present in the dict");
+ }
+
if (replica_count > 0) {
ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS,
msg, sizeof(msg));
@@ -1589,10 +1690,10 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
}
- /* Do not allow add-brick for stopped volumes when replica-count
- * is being increased.
- */
if (glusterd_is_volume_replicate (volinfo)) {
+ /* Do not allow add-brick for stopped volumes when replica-count
+ * is being increased.
+ */
if (conf->op_version >= GD_OP_VERSION_3_7_10 &&
!dict_get (dict, "attach-tier") &&
replica_count &&
@@ -1606,6 +1707,20 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
*op_errstr = gf_strdup (msg);
goto out;
}
+ /* op-version check for replica 2 to arbiter conversion. If we
+ * dont have this check, an older peer added as arbiter brick
+ * will not have the arbiter xlator in its volfile. */
+ if ((conf->op_version < GD_OP_VERSION_3_8_0) &&
+ (arbiter_count == 1) && (replica_count == 3)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Cluster op-version must "
+ "be >= 30800 to add arbiter brick to a "
+ "replica 2 volume.");
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_ADD_FAIL, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
}
if (conf->op_version > GD_OP_VERSION_3_7_5 &&
@@ -2689,6 +2804,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
volinfo->replica_count, replica_count,
volinfo->volname);
volinfo->replica_count = replica_count;
+ /* A reduction in replica count implies an arbiter volume
+ * earlier is now no longer one. */
+ if (volinfo->arbiter_count)
+ volinfo->arbiter_count = 0;
volinfo->sub_count = replica_count;
volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);