summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
diff options
context:
space:
mode:
authorKaushal M <kaushal@redhat.com>2012-09-14 11:45:34 +0530
committerAnand Avati <avati@redhat.com>2012-12-19 13:32:49 -0800
commit5eb8bac561b7374589bd72d597ed7eec95aa7de6 (patch)
tree028a5aa6582b0497cf0ffda62bf023bc88b391b3 /xlators/mgmt/glusterd/src/glusterd-brick-ops.c
parentda7ca1efcf3a621c27f05d621715e57fdc5aa397 (diff)
glusterd, cli: Task id's for async tasks
This patch introduces task-id's for async tasks like rebalance, remove-brick and replace-brick. An id is generated for each task when it is started and displayed to the user in cli output. The status of running tasks is also included in the output of "volume status" along with its id, so that a user can easily track the progress of an async task. Also, * added tests for this feature into the regression test suite. * added a python script for creating files, 'create-files.py', courtesy Vijaykumar Koppad (vkoppad@redhat.com) into the test suite. This patch reverts the revert commit 698deb33d731df6de84da8ae8ee4045e1543a168. BUG: 857330 Change-Id: Id43d7cb629a38f47f733fbc18cb4c5f2f0327c7a Signed-off-by: Kaushal M <kaushal@redhat.com> Reviewed-on: http://review.gluster.org/4294 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-brick-ops.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c161
1 files changed, 111 insertions, 50 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 1ca52bc5fc4..105e74079cf 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -838,6 +838,7 @@ out:
ret = 0; //sent error to cli, prevent second reply
}
+
GF_FREE (brick_list);
free (cli_req.dict.dict_val); //its malloced by xdr
@@ -1173,17 +1174,22 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
char msg[2048] = {0,};
int32_t flag = 0;
gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname);
+ gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname);
goto out;
}
@@ -1195,7 +1201,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
snprintf (msg, sizeof (msg), "Replace brick is in progress on "
"volume %s. Please retry after replace-brick "
"operation is committed or aborted", volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
*op_errstr = gf_strdup (msg);
ret = -1;
goto out;
@@ -1203,7 +1209,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
ret = dict_get_int32 (dict, "command", &flag);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count");
goto out;
}
cmd = flag;
@@ -1221,20 +1227,38 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
case GF_OP_CMD_START:
{
if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- snprintf (msg, sizeof (msg), "Volume %s needs to be started "
- "before remove-brick (you can use 'force' or "
- "'commit' to override this behavior)",
- volinfo->volname);
+ snprintf (msg, sizeof (msg), "Volume %s needs to be "
+ "started before remove-brick (you can use "
+ "'force' or 'commit' to override this "
+ "behavior)", volinfo->volname);
errstr = gf_strdup (msg);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
if (glusterd_is_defrag_on(volinfo)) {
- errstr = gf_strdup("Rebalance is in progress. Please retry"
- " after completion");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
+ errstr = gf_strdup("Rebalance is in progress. Please "
+ "retry after completion");
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
+
+ if (is_origin_glusterd ()) {
+ ret = glusterd_generate_and_set_task_id
+ (dict, GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Missing remove-brick-id");
+ ret = 0;
+ }
+ }
break;
}
@@ -1256,7 +1280,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
ret = dict_get_int32 (dict, "count", &brick_count);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count");
goto out;
}
@@ -1269,7 +1293,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
}
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
if (ret && errstr) {
if (op_errstr)
*op_errstr = errstr;
@@ -1406,10 +1430,10 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
}
/* Need to reset the defrag/rebalance status accordingly */
- switch (volinfo->defrag_status) {
+ switch (volinfo->rebal.defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- volinfo->defrag_status = 0;
+ volinfo->rebal.defrag_status = 0;
default:
break;
}
@@ -1428,42 +1452,67 @@ out:
int
glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *brick = NULL;
- int32_t count = 0;
- int32_t i = 1;
- char key[256] = {0,};
- int32_t flag = 0;
- char err_str[4096] = {0,};
- int need_rebalance = 0;
- int force = 0;
- gf1_op_commands cmd = 0;
- int32_t replica_count = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmp = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t flag = 0;
+ char err_str[4096] = {0,};
+ int need_rebalance = 0;
+ int force = 0;
+ gf1_op_commands cmd = 0;
+ int32_t replica_count = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
goto out;
}
ret = dict_get_int32 (dict, "command", &flag);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get command");
goto out;
}
cmd = flag;
+ /* Set task-id, if available, in ctx dict for operations other than
+ * start
+ */
+ if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) {
+ if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, dict,
+ GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set remove-brick-id");
+ goto out;
+ }
+ }
+ }
+
+ /* Clear task-id on completion/stopping of remove-brick operation */
+ if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS))
+ uuid_clear (volinfo->rebal.rebalance_id);
+
ret = -1;
switch (cmd) {
case GF_OP_CMD_NONE:
@@ -1484,7 +1533,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
}
ret = glusterd_create_volfiles_and_notify_services (volinfo);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_WARNING,
"failed to create volfiles");
goto out;
}
@@ -1492,7 +1541,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
ret = glusterd_store_volinfo (volinfo,
GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_WARNING,
"failed to store volinfo");
goto out;
}
@@ -1502,6 +1551,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
}
case GF_OP_CMD_START:
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Missing remove-brick-id");
+ ret = 0;
+ } else {
+ uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ;
+ }
force = 0;
break;
@@ -1512,13 +1569,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
+ if (volinfo->rebal.defrag) {
+ LOCK (&volinfo->rebal.defrag->lock);
/* Fake 'rebalance-complete' so the graph change
happens right away */
- volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ volinfo->rebal.defrag_status =
+ GF_DEFRAG_STATUS_COMPLETE;
- UNLOCK (&volinfo->defrag->lock);
+ UNLOCK (&volinfo->rebal.defrag->lock);
}
/* Graph change happens in rebalance _cbk function,
no need to do anything here */
@@ -1541,7 +1599,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
snprintf (key, 256, "brick%d", i);
ret = dict_get_str (dict, key, &brick);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get %s", key);
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get %s",
+ key);
goto out;
}
@@ -1553,7 +1612,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
}
ret = dict_get_int32 (dict, "replica-count", &replica_count);
if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_INFO,
"changing replica count %d to %d on volume %s",
volinfo->replica_count, replica_count,
volinfo->volname);
@@ -1575,34 +1634,36 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
ret = glusterd_create_volfiles_and_notify_services (volinfo);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles");
+ gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles");
goto out;
}
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo");
+ gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo");
goto out;
}
/* Need to reset the defrag/rebalance status accordingly */
- switch (volinfo->defrag_status) {
+ switch (volinfo->rebal.defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- volinfo->defrag_status = 0;
+ volinfo->rebal.defrag_status = 0;
default:
break;
}
if (!force && need_rebalance) {
/* perform the rebalance operations */
- ret = glusterd_handle_defrag_start (volinfo, err_str, 4096,
- GF_DEFRAG_CMD_START_FORCE,
- glusterd_remove_brick_migrate_cbk);
+ ret = glusterd_handle_defrag_start
+ (volinfo, err_str, sizeof (err_str),
+ GF_DEFRAG_CMD_START_FORCE,
+ glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
+
if (!ret)
volinfo->decommission_in_progress = 1;
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"failed to start the rebalance");
}
} else {