glusterd, cli: Task id's for async tasks

This patch introduces task-id's for async tasks like rebalance, remove-brick and replace-brick. An id is generated for each task when it is started and displayed to the user in cli output. The status of running tasks is also included in the output of "volume status" along with its id, so that a user can easily track the progress of an async task. Also, * added tests for this feature into the regression test suite. * added a python script for creating files, 'create-files.py', courtesy Vijaykumar Koppad (vkoppad@redhat.com) into the test suite. This patch reverts the revert commit 698deb33d731df6de84da8ae8ee4045e1543a168. BUG: 857330 Change-Id: Id43d7cb629a38f47f733fbc18cb4c5f2f0327c7a Signed-off-by: Kaushal M <kaushal@redhat.com> Reviewed-on: http://review.gluster.org/4294 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
author: Kaushal M <kaushal@redhat.com> 2012-09-14 11:45:34 +0530
committer: Anand Avati <avati@redhat.com> 2012-12-19 13:32:49 -0800
commit: 5eb8bac561b7374589bd72d597ed7eec95aa7de6 (patch)
tree: 028a5aa6582b0497cf0ffda62bf023bc88b391b3 /xlators/mgmt/glusterd/src/glusterd-brick-ops.c
parent: da7ca1efcf3a621c27f05d621715e57fdc5aa397 (diff)
1 files changed, 111 insertions, 50 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 1ca52bc5fc4..105e74079cf 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -838,6 +838,7 @@ out:
                 ret = 0; //sent error to cli, prevent second reply
 
         }
+
         GF_FREE (brick_list);
         free (cli_req.dict.dict_val); //its malloced by xdr
 
@@ -1173,17 +1174,22 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
         char                msg[2048]   = {0,};
         int32_t             flag        = 0;
         gf1_op_commands     cmd         = GF_OP_CMD_NONE;
+        char               *task_id_str = NULL;
+        xlator_t           *this        = NULL;
+
+        this = THIS;
+        GF_ASSERT (this);
 
         ret = dict_get_str (dict, "volname", &volname);
         if (ret) {
-                gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+                gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
                 goto out;
         }
 
         ret = glusterd_volinfo_find (volname, &volinfo);
 
         if (ret) {
-                gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname);
+                gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname);
                 goto out;
         }
 
@@ -1195,7 +1201,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
                 snprintf (msg, sizeof (msg), "Replace brick is in progress on "
                           "volume %s. Please retry after replace-brick "
                           "operation is committed or aborted", volname);
-                gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+                gf_log (this->name, GF_LOG_ERROR, "%s", msg);
                 *op_errstr = gf_strdup (msg);
                 ret = -1;
                 goto out;
@@ -1203,7 +1209,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
 
         ret = dict_get_int32 (dict, "command", &flag);
         if (ret) {
-                gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+                gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count");
                 goto out;
         }
         cmd = flag;
@@ -1221,20 +1227,38 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
         case GF_OP_CMD_START:
         {
                 if (GLUSTERD_STATUS_STARTED != volinfo->status) {
-                        snprintf (msg, sizeof (msg), "Volume %s needs to be started "
-                                  "before remove-brick (you can use 'force' or "
-                                  "'commit' to override this behavior)",
-                                  volinfo->volname);
+                        snprintf (msg, sizeof (msg), "Volume %s needs to be "
+                                  "started before remove-brick (you can use "
+                                  "'force' or 'commit' to override this "
+                                  "behavior)", volinfo->volname);
                         errstr = gf_strdup (msg);
-                        gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr);
+                        gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
                         goto out;
                 }
                 if (glusterd_is_defrag_on(volinfo)) {
-                        errstr = gf_strdup("Rebalance is in progress. Please retry"
-                                           " after completion");
-                        gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
+                        errstr = gf_strdup("Rebalance is in progress. Please "
+                                           "retry after completion");
+                        gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
                         goto out;
                 }
+
+                if (is_origin_glusterd ()) {
+                        ret = glusterd_generate_and_set_task_id
+                                (dict, GF_REMOVE_BRICK_TID_KEY);
+                        if (ret) {
+                                gf_log (this->name, GF_LOG_ERROR,
+                                        "Failed to generate task-id");
+                                goto out;
+                        }
+                } else {
+                        ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
+                                            &task_id_str);
+                        if (ret) {
+                                gf_log (this->name, GF_LOG_WARNING,
+                                        "Missing remove-brick-id");
+                                ret = 0;
+                        }
+                }
                 break;
         }
 
@@ -1256,7 +1280,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
 
         ret = dict_get_int32 (dict, "count", &brick_count);
         if (ret) {
-                gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+                gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count");
                 goto out;
         }
 
@@ -1269,7 +1293,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
         }
 
 out:
-        gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+        gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
         if (ret && errstr) {
                 if (op_errstr)
                         *op_errstr = errstr;
@@ -1406,10 +1430,10 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
         }
 
         /* Need to reset the defrag/rebalance status accordingly */
-        switch (volinfo->defrag_status) {
+        switch (volinfo->rebal.defrag_status) {
         case GF_DEFRAG_STATUS_FAILED:
         case GF_DEFRAG_STATUS_COMPLETE:
-                volinfo->defrag_status = 0;
+                volinfo->rebal.defrag_status = 0;
         default:
                 break;
         }
@@ -1428,42 +1452,67 @@ out:
 int
 glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
 {
-        int                 ret            = -1;
-        char               *volname        = NULL;
-        glusterd_volinfo_t *volinfo        = NULL;
-        char               *brick          = NULL;
-        int32_t             count          = 0;
-        int32_t             i              = 1;
-        char                key[256]       = {0,};
-        int32_t             flag           = 0;
-        char                err_str[4096]  = {0,};
-        int                 need_rebalance = 0;
-        int                 force          = 0;
-        gf1_op_commands     cmd            = 0;
-        int32_t             replica_count  = 0;
-        glusterd_brickinfo_t *brickinfo    = NULL;
-        glusterd_brickinfo_t *tmp          = NULL;
+        int                     ret            = -1;
+        char                    *volname       = NULL;
+        glusterd_volinfo_t      *volinfo       = NULL;
+        char                    *brick         = NULL;
+        int32_t                 count          = 0;
+        int32_t                 i              = 1;
+        char                    key[256]       = {0,};
+        int32_t                 flag           = 0;
+        char                    err_str[4096]  = {0,};
+        int                     need_rebalance = 0;
+        int                     force          = 0;
+        gf1_op_commands         cmd            = 0;
+        int32_t                 replica_count  = 0;
+        glusterd_brickinfo_t    *brickinfo     = NULL;
+        glusterd_brickinfo_t    *tmp           = NULL;
+        char                    *task_id_str   = NULL;
+        xlator_t                *this          = NULL;
+
+        this = THIS;
+        GF_ASSERT (this);
 
         ret = dict_get_str (dict, "volname", &volname);
 
         if (ret) {
-                gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+                gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
                 goto out;
         }
 
         ret = glusterd_volinfo_find (volname, &volinfo);
         if (ret) {
-                gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+                gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
                 goto out;
         }
 
         ret = dict_get_int32 (dict, "command", &flag);
         if (ret) {
-                gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+                gf_log (this->name, GF_LOG_ERROR, "Unable to get command");
                 goto out;
         }
         cmd = flag;
 
+        /* Set task-id, if available, in ctx dict for operations other than
+         * start
+         */
+        if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) {
+                if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
+                        ret = glusterd_copy_uuid_to_dict
+                                (volinfo->rebal.rebalance_id, dict,
+                                 GF_REMOVE_BRICK_TID_KEY);
+                        if (ret) {
+                                gf_log (this->name, GF_LOG_ERROR,
+                                        "Failed to set remove-brick-id");
+                                goto out;
+                        }
+                }
+        }
+
+        /* Clear task-id on completion/stopping of remove-brick operation */
+        if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS))
+                uuid_clear (volinfo->rebal.rebalance_id);
+
         ret = -1;
         switch (cmd) {
         case GF_OP_CMD_NONE:
@@ -1484,7 +1533,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
                 }
                 ret = glusterd_create_volfiles_and_notify_services (volinfo);
                 if (ret) {
-                        gf_log (THIS->name, GF_LOG_WARNING,
+                        gf_log (this->name, GF_LOG_WARNING,
                                 "failed to create volfiles");
                         goto out;
                 }
@@ -1492,7 +1541,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
                 ret = glusterd_store_volinfo (volinfo,
                                              GLUSTERD_VOLINFO_VER_AC_INCREMENT);
                 if (ret) {
-                        gf_log (THIS->name, GF_LOG_WARNING,
+                        gf_log (this->name, GF_LOG_WARNING,
                                 "failed to store volinfo");
                         goto out;
                 }
@@ -1502,6 +1551,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
         }
 
         case GF_OP_CMD_START:
+                ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+                if (ret) {
+                        gf_log (this->name, GF_LOG_DEBUG,
+                                "Missing remove-brick-id");
+                        ret = 0;
+                } else {
+                        uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ;
+                }
                 force = 0;
                 break;
 
@@ -1512,13 +1569,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
         case GF_OP_CMD_COMMIT_FORCE:
 
                 if (volinfo->decommission_in_progress) {
-                        if (volinfo->defrag) {
-                                LOCK (&volinfo->defrag->lock);
+                        if (volinfo->rebal.defrag) {
+                                LOCK (&volinfo->rebal.defrag->lock);
                                 /* Fake 'rebalance-complete' so the graph change
                                    happens right away */
-                                volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+                                volinfo->rebal.defrag_status =
+                                                GF_DEFRAG_STATUS_COMPLETE;
 
-                                UNLOCK (&volinfo->defrag->lock);
+                                UNLOCK (&volinfo->rebal.defrag->lock);
                         }
                         /* Graph change happens in rebalance _cbk function,
                            no need to do anything here */
@@ -1541,7 +1599,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
                 snprintf (key, 256, "brick%d", i);
                 ret = dict_get_str (dict, key, &brick);
                 if (ret) {
-                        gf_log ("", GF_LOG_ERROR, "Unable to get %s", key);
+                        gf_log (this->name, GF_LOG_ERROR, "Unable to get %s",
+                                key);
                         goto out;
                 }
 
@@ -1553,7 +1612,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
         }
         ret = dict_get_int32 (dict, "replica-count", &replica_count);
         if (!ret) {
-                gf_log (THIS->name, GF_LOG_INFO,
+                gf_log (this->name, GF_LOG_INFO,
                         "changing replica count %d to %d on volume %s",
                         volinfo->replica_count, replica_count,
                         volinfo->volname);
@@ -1575,34 +1634,36 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
 
         ret = glusterd_create_volfiles_and_notify_services (volinfo);
         if (ret) {
-                gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles");
+                gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles");
                 goto out;
         }
 
         ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
         if (ret) {
-                gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo");
+                gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo");
                 goto out;
         }
 
         /* Need to reset the defrag/rebalance status accordingly */
-        switch (volinfo->defrag_status) {
+        switch (volinfo->rebal.defrag_status) {
         case GF_DEFRAG_STATUS_FAILED:
         case GF_DEFRAG_STATUS_COMPLETE:
-                volinfo->defrag_status = 0;
+                volinfo->rebal.defrag_status = 0;
         default:
                 break;
         }
         if (!force && need_rebalance) {
                 /* perform the rebalance operations */
-                ret = glusterd_handle_defrag_start (volinfo, err_str, 4096,
-                                                    GF_DEFRAG_CMD_START_FORCE,
-                                                    glusterd_remove_brick_migrate_cbk);
+                ret = glusterd_handle_defrag_start
+                        (volinfo, err_str, sizeof (err_str),
+                         GF_DEFRAG_CMD_START_FORCE,
+                         glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
+
                 if (!ret)
                         volinfo->decommission_in_progress = 1;
 
                 if (ret) {
-                        gf_log (THIS->name, GF_LOG_ERROR,
+                        gf_log (this->name, GF_LOG_ERROR,
                                 "failed to start the rebalance");
                 }
         } else {
author	Kaushal M <kaushal@redhat.com>	2012-09-14 11:45:34 +0530
committer	Anand Avati <avati@redhat.com>	2012-12-19 13:32:49 -0800
commit	5eb8bac561b7374589bd72d597ed7eec95aa7de6 (patch)
tree	028a5aa6582b0497cf0ffda62bf023bc88b391b3 /xlators/mgmt/glusterd/src/glusterd-brick-ops.c
parent	da7ca1efcf3a621c27f05d621715e57fdc5aa397 (diff)