summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
diff options
context:
space:
mode:
authorAmar Tumballi <amar@gluster.com>2011-09-09 09:42:51 +0530
committerVijay Bellur <vijay@gluster.com>2011-09-13 02:10:12 -0700
commit25daa42911d2ff697880ee29c591cac5f2abebed (patch)
tree9555284c052e1e205909e91f578a8b46b522ec56 /xlators/mgmt/glusterd/src/glusterd-brick-ops.c
parent17e57f27c714c94dd5d9fa91650f83d069f2f4e4 (diff)
support for de-commissioning a node using 'remove-brick'
to achieve this, we now create volume-file with 'decommissioned-nodes' option in distribute volume, then just perform the rebalance set of operations (with 'force' flag set). now onwards, the 'remove-brick' (with 'start' option) operation tries to migrate data from removed bricks to existing bricks. 'remove-brick' also supports similar options as of replace-brick. * (no options) -> works as 'force', will have the current behavior of remove-brick, ie., no data-migration, volume changes. * start (starts remove-brick with data-migration/draining process, which takes care of migrating data and once complete, will commit the changes to volume file) * pause (stop data migration, but keep the volume file intact with extra options whatever is set) * abort (stop data-migration, and fall back to old configuration) * commit (if volume is stopped, commits the changes to volumefile) * force (stops the data-migration and commits the changes to volume file) Change-Id: I3952bcfbe604a0952e68b6accace7014d5e401d3 BUG: 1952 Reviewed-on: http://review.gluster.com/118 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-brick-ops.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c369
1 files changed, 296 insertions, 73 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 8b3a03b6f3e..8832c69ed4f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -312,7 +312,8 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
strcpy (vol_type, "distribute");
/* Do not allow remove-brick if the volume is plain stripe */
- if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && (volinfo->brick_count == volinfo->sub_count)) {
+ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
+ (volinfo->brick_count == volinfo->sub_count)) {
snprintf (err_str, 2048, "Removing brick from a plain stripe is not allowed");
gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
ret = -1;
@@ -321,8 +322,8 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than the replica count
or stripe count */
- if (((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || (volinfo->type == GF_CLUSTER_TYPE_STRIPE))
- && !(volinfo->brick_count <= volinfo->sub_count)) {
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ !(volinfo->brick_count <= volinfo->sub_count)) {
if (volinfo->sub_count && (count % volinfo->sub_count != 0)) {
snprintf (err_str, 2048, "Remove brick incorrect"
" brick count of %d for %s %d",
@@ -512,16 +513,20 @@ out:
int
-glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick)
+glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate)
{
-
glusterd_brickinfo_t *brickinfo = NULL;
char *dup_brick = NULL;
- int32_t ret = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
GF_ASSERT (volinfo);
GF_ASSERT (brick);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
dup_brick = gf_strdup (brick);
if (!dup_brick)
goto out;
@@ -534,15 +539,26 @@ glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick)
if (ret)
goto out;
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_stop (volinfo, brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to stop "
- "glusterfs, ret: %d", ret);
- goto out;
+ if (!uuid_compare (brickinfo->uuid, priv->uuid)) {
+ /* Only if the brick is in this glusterd, do the rebalance */
+ if (need_migrate)
+ *need_migrate = 1;
+ }
+
+ if (force) {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_brick_stop (volinfo, brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop "
+ "glusterfs, ret: %d", ret);
+ goto out;
+ }
}
+ glusterd_delete_brick (volinfo, brickinfo);
+ goto out;
}
- glusterd_delete_brick (volinfo, brickinfo);
+
+ brickinfo->decommissioned = 1;
out:
if (dup_brick)
GF_FREE (dup_brick);
@@ -700,17 +716,18 @@ out:
}
int
-glusterd_op_stage_remove_brick (dict_t *dict)
+glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- dict_t *ctx = NULL;
- char *errstr = NULL;
- int32_t brick_count = 0;
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+ int32_t brick_count = 0;
+ char msg[2048] = {0,};
+ int32_t flag = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
ret = dict_get_str (dict, "volname", &volname);
-
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
goto out;
@@ -723,25 +740,64 @@ glusterd_op_stage_remove_brick (dict_t *dict)
goto out;
}
- if (glusterd_is_defrag_on(volinfo)) {
- ctx = glusterd_op_get_ctx ();
- errstr = gf_strdup("Rebalance is in progress. Please retry"
- " after completion");
- if (!errstr) {
- ret = -1;
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ goto out;
+ }
+ cmd = flag;
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ errstr = gf_strdup ("no remove-brick command issued");
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_START:
+ {
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started "
+ "before remove-brick (you can use 'force' or "
+ "'commit' to override this behavior)",
+ volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
- gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
- ret = dict_set_dynstr (ctx, "errstr", errstr);
- if (ret) {
- GF_FREE (errstr);
- gf_log ("", GF_LOG_DEBUG,
- "failed to set errstr ctx");
+ if (glusterd_is_defrag_on(volinfo)) {
+ errstr = gf_strdup("Rebalance is in progress. Please retry"
+ " after completion");
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
goto out;
}
+ break;
+ }
- ret = -1;
- goto out;
+ case GF_OP_CMD_PAUSE:
+ case GF_OP_CMD_ABORT:
+ {
+ if (!volinfo->decommission_in_progress) {
+ errstr = gf_strdup("remove-brick is not in progress");
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ break;
+ }
+
+ case GF_OP_CMD_COMMIT:
+ if (volinfo->decommission_in_progress) {
+ errstr = gf_strdup ("use 'force' option as migration "
+ "is in progress");
+ goto out;
+ }
+ break;
+
+ case GF_OP_CMD_COMMIT_FORCE:
+ break;
}
ret = dict_get_int32 (dict, "count", &brick_count);
@@ -750,41 +806,96 @@ glusterd_op_stage_remove_brick (dict_t *dict)
goto out;
}
+ ret = 0;
if (volinfo->brick_count == brick_count) {
- ctx = glusterd_op_get_ctx ();
- if (!ctx) {
- gf_log ("", GF_LOG_ERROR,
- "Operation Context is not present");
- ret = -1;
- goto out;
- }
errstr = gf_strdup ("Deleting all the bricks of the "
"volume is not allowed");
- if (!errstr) {
- gf_log ("", GF_LOG_ERROR, "Out of memory");
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynstr (ctx, "errstr", errstr);
- if (ret) {
- GF_FREE (errstr);
- gf_log ("", GF_LOG_DEBUG,
- "failed to set pump status in ctx");
- goto out;
- }
-
ret = -1;
goto out;
}
out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret && errstr) {
+ if (op_errstr)
+ *op_errstr = errstr;
+ }
return ret;
}
int
+glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status)
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+
+ switch (status) {
+ case GF_DEFRAG_STATUS_PAUSED:
+ case GF_DEFRAG_STATUS_FAILED:
+ /* No changes required in the volume file.
+ everything should remain as is */
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ /* Fall back to the old volume file */
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ }
+ break;
+
+ case GF_DEFRAG_STATUS_COMPLETE:
+ /* Done with the task, you can remove the brick from the
+ volume file */
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s",
+ brickinfo->path);
+ brickinfo->decommissioned = 0;
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_brick_stop (volinfo, brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to stop glusterfs (%d)", ret);
+ }
+ }
+ glusterd_delete_brick (volinfo, brickinfo);
+ }
+ break;
+
+ default:
+ GF_ASSERT (!"cbk function called with wrong status");
+ break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to write volume files (%d)", ret);
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to store volume info (%d)", ret);
+
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_check_generate_start_nfs ();
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to start nfs process (%d)", ret);
+ }
+
+ volinfo->decommission_in_progress = 0;
+ return 0;
+}
+
+
+int
glusterd_op_add_brick (dict_t *dict, char **op_errstr)
{
int ret = 0;
@@ -848,15 +959,20 @@ out:
}
int
-glusterd_op_remove_brick (dict_t *dict)
+glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *brick = NULL;
- int32_t count = 0;
- int32_t i = 1;
- char key[256] = {0,};
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t flag = 0;
+ char err_str[4096] = {0,};
+ int need_rebalance = 0;
+ int force = 0;
+ gf1_op_commands cmd = 0;
ret = dict_get_str (dict, "volname", &volname);
@@ -866,12 +982,99 @@ glusterd_op_remove_brick (dict_t *dict)
}
ret = glusterd_volinfo_find (volname, &volinfo);
-
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
goto out;
}
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ goto out;
+ }
+ cmd = flag;
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_PAUSE:
+ {
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->defrag == (void *)1)
+ volinfo->defrag = NULL;
+
+ if (volinfo->defrag) {
+ LOCK (&volinfo->defrag->lock);
+
+ volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;
+
+ UNLOCK (&volinfo->defrag->lock);
+ }
+ }
+
+ /* rebalance '_cbk()' will take care of volume file updates */
+ ret = 0;
+ goto out;
+ }
+
+ case GF_OP_CMD_ABORT:
+ {
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->defrag == (void *)1)
+ volinfo->defrag = NULL;
+
+ if (volinfo->defrag) {
+ LOCK (&volinfo->defrag->lock);
+
+ volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ UNLOCK (&volinfo->defrag->lock);
+ }
+ }
+
+ /* rebalance '_cbk()' will take care of volume file updates */
+ ret = 0;
+ goto out;
+ }
+
+ case GF_OP_CMD_START:
+ force = 0;
+ break;
+
+ case GF_OP_CMD_COMMIT:
+ force = 1;
+ break;
+
+ case GF_OP_CMD_COMMIT_FORCE:
+
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->defrag == (void *)1)
+ volinfo->defrag = NULL;
+
+ if (volinfo->defrag) {
+ LOCK (&volinfo->defrag->lock);
+ /* Fake 'rebalance-complete' so the graph change
+ happens right away */
+ volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+
+ UNLOCK (&volinfo->defrag->lock);
+ }
+ ret = 0;
+ /* Graph change happens in rebalance _cbk function,
+ no need to do anything here */
+ goto out;
+ }
+
+ force = 1;
+ break;
+ }
+
ret = dict_get_int32 (dict, "count", &count);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to get count");
@@ -887,26 +1090,46 @@ glusterd_op_remove_brick (dict_t *dict)
goto out;
}
- ret = glusterd_op_perform_remove_brick (volinfo, brick);
+ ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
+ (i == 1) ? &need_rebalance : NULL);
if (ret)
goto out;
i++;
}
ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret)
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles");
goto out;
-
- volinfo->defrag_status = 0;
+ }
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
-
- if (ret)
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo");
goto out;
+ }
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_check_generate_start_nfs ();
+ volinfo->defrag_status = 0;
+ if (!force && need_rebalance) {
+ /* perform the rebalance operations */
+ ret = glusterd_handle_defrag_start (volinfo, err_str, 4096,
+ GF_DEFRAG_CMD_START_FORCE,
+ glusterd_remove_brick_migrate_cbk);
+ if (!ret)
+ volinfo->decommission_in_progress = 1;
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to start the rebalance");
+ }
+ } else {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_check_generate_start_nfs ();
+ }
out:
+ if (ret && err_str[0] && op_errstr)
+ *op_errstr = gf_strdup (err_str);
+
return ret;
}