glusterd: Aggregate tasks status in 'volume status [tasks]'

Backport of http://review.gluster.org/6230 Previously, glusterd used to just send back the local status of a task in a 'volume status [tasks]' command. As the rebalance operation is distributed and asynchronus, this meant that different peers could give different status values for a rebalance or remove-brick task. With this patch, all the peers will send back the tasks status as a part of the 'volume status' commit op, and the origin peer will aggregate these to arrive at a final status for the task. The aggregation is only done for rebalance or remove-brick tasks. The replace-brick task will have the same status on all the peers (see comment in glusterd_volume_status_aggregate_tasks_status() for more information) and need not be aggregated. The rebalance process has 5 states, NOT_STARTED - rebalance process has not been started on this node STARTED - rebalance process has been started and is still running STOPPED - rebalance process was stopped by a 'rebalance/remove-brick stop' command COMPLETED - rebalance process completed successfully FAILED - rebalance process failed to complete successfully The aggregation is done using the following precedence, STARTED > FAILED > STOPPED > COMPLETED > NOT_STARTED The new changes make the 'volume status tasks' command a distributed command as we need to get the task status from all peers. The following tests were performed, - Start a remove-brick task and do a status command on a peer which doesn't have the brick being removed. The remove-brick status was given correctly as 'in progress' and 'completed', instead of 'not started' - Start a rebalance task, run the status command. The status moved to 'completed' only after rebalance completed on all nodes. Also, change the CLI xml output code for rebalance status to use the same algorithm for status aggregation. Change-Id: Ifd4aff705aa51609a612d5a9194acc73e10a82c0 BUG: 1027094 Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com> http://review.gluster.org/6230 Reviewed-on: http://review.gluster.org/6562 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
author: Krishnan Parthasarathi <kparthas@redhat.com> 2013-12-23 14:07:45 +0530
committer: Vijay Bellur <vbellur@redhat.com> 2013-12-23 06:56:34 -0800
commit: 9d592246d6121aa38cd6fb6a875be4473d4979c8 (patch)
tree: 41f33ebf536ae3fee4c1aa84bf8ed10b23bde9cd
parent: 3ef4b7eb9d1f4e305e1b7c85ee5bb51d7b18e305 (diff)
5 files changed, 236 insertions, 22 deletions
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index 6c7bc1752..cca51a6a0 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -3245,13 +3245,30 @@ cli_xml_output_vol_rebalance_status (xmlTextWriterPtr writer, dict_t *dict,
                     overall_elapsed = elapsed;
                 }
 
+                /* Rebalance has 5 states,
+                 * NOT_STARTED, STARTED, STOPPED, COMPLETE, FAILED
+                 * The precedence used to determine the aggregate status is as
+                 * below,
+                 * STARTED > FAILED > STOPPED > COMPLETE > NOT_STARTED
+                 */
+                /* TODO: Move this to a common place utilities that both CLI and
+                 * glusterd need.
+                 * Till then if the below algorithm is changed, change it in
+                 * glusterd_volume_status_aggregate_tasks_status in
+                 * glusterd-utils.c
+                 */
+
                 if (-1 == overall_status)
                         overall_status = status_rcd;
-                else if ((GF_DEFRAG_STATUS_COMPLETE == overall_status ||
-                          status_rcd > overall_status) &&
-                         (status_rcd != GF_DEFRAG_STATUS_COMPLETE))
+                int rank[] = {
+                        [GF_DEFRAG_STATUS_STARTED] = 1,
+                        [GF_DEFRAG_STATUS_FAILED] = 2,
+                        [GF_DEFRAG_STATUS_STOPPED] = 3,
+                        [GF_DEFRAG_STATUS_COMPLETE] = 4,
+                        [GF_DEFRAG_STATUS_NOT_STARTED] = 5
+                };
+                if (rank[status_rcd] <= rank[overall_status])
                         overall_status = status_rcd;
-                XML_RET_CHECK_AND_GOTO (ret, out);
 
                 /* </node> */
                 ret = xmlTextWriterEndElement (writer);
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index e9437057a..b69c0c857 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2177,7 +2177,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr,
         if (ret)
                 goto out;
 
-        if (is_origin_glusterd ()) {
+        if (origin_glusterd) {
                 ret = 0;
                 if ((cmd & GF_CLI_STATUS_ALL)) {
                         ret = glusterd_get_all_volnames (rsp_dict);
@@ -2341,11 +2341,9 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr,
 
         /* Active tasks */
         /* Tasks are added only for normal volume status request for either a
-         * single volume or all volumes, and only by the origin glusterd
+         * single volume or all volumes
          */
-        if (((cmd & GF_CLI_STATUS_MASK) != GF_CLI_STATUS_NONE) ||
-            !(cmd & (GF_CLI_STATUS_VOL | GF_CLI_STATUS_ALL)) ||
-            !origin_glusterd)
+        if (!glusterd_status_has_tasks (cmd))
                 goto out;
 
         ret = glusterd_aggregate_task_status (rsp_dict, volinfo);
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 33a865209..c975d01f1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -1229,12 +1229,7 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
         glusterd_op_set_op  (op);
         INIT_LIST_HEAD (&conf->xaction_peers);
 
-        /* Make 'volume status tasks' command a local operation.
-         * This is accomplished by setting npeers to 0.
-         */
-        if (!glusterd_is_status_tasks_op (op, op_ctx))
-                npeers = gd_build_peers_list  (&conf->peers,
-                                               &conf->xaction_peers, op);
+        npeers = gd_build_peers_list  (&conf->peers, &conf->xaction_peers, op);
 
         ret = gd_lock_op_phase (&conf->xaction_peers, op, op_ctx, &op_errstr,
                                 npeers);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 8cbff80f7..ad6731e62 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -7420,8 +7420,12 @@ glusterd_volume_status_add_peer_rsp (dict_t *this, char *key, data_t *value,
         int32_t                         ret = 0;
 
         /* Skip the following keys, they are already present in the ctx_dict */
+        /* Also, skip all the task related pairs. They will be added to the
+         * ctx_dict later
+         */
         if (!strcmp (key, "count") || !strcmp (key, "cmd") ||
-            !strcmp (key, "brick-index-max") || !strcmp (key, "other-count"))
+            !strcmp (key, "brick-index-max") || !strcmp (key, "other-count") ||
+            !strncmp (key, "task", 4))
                 return 0;
 
         rsp_ctx = data;
@@ -7446,6 +7450,194 @@ glusterd_volume_status_add_peer_rsp (dict_t *this, char *key, data_t *value,
         return 0;
 }
 
+static int
+glusterd_volume_status_copy_tasks_to_ctx_dict (dict_t *this, char *key,
+                                               data_t *value, void *data)
+{
+        int     ret = 0;
+        dict_t  *ctx_dict = NULL;
+        data_t  *new_value = NULL;
+
+        if (strncmp (key, "task", 4))
+                return 0;
+
+        ctx_dict = data;
+        GF_ASSERT (ctx_dict);
+
+        new_value = data_copy (value);
+        GF_ASSERT (new_value);
+
+        ret = dict_set (ctx_dict, key, new_value);
+
+        return ret;
+}
+
+int
+glusterd_volume_status_aggregate_tasks_status (dict_t *ctx_dict,
+                                               dict_t *rsp_dict)
+{
+        int             ret             = -1;
+        xlator_t        *this           = NULL;
+        int             local_count     = 0;
+        int             remote_count    = 0;
+        int             i               = 0;
+        int             j               = 0;
+        char            key[128]        = {0,};
+        char            *task_type      = NULL;
+        int             local_status    = 0;
+        int             remote_status   = 0;
+        char            *local_task_id  = NULL;
+        char            *remote_task_id = NULL;
+
+        GF_ASSERT (ctx_dict);
+        GF_ASSERT (rsp_dict);
+
+        this = THIS;
+        GF_ASSERT (this);
+
+        ret = dict_get_int32 (rsp_dict, "tasks", &remote_count);
+        if (ret) {
+                gf_log (this->name, GF_LOG_ERROR,
+                        "Failed to get remote task count");
+                goto out;
+        }
+        /* Local count will not be present when this is called for the first
+         * time with the origins rsp_dict
+         */
+        ret = dict_get_int32 (ctx_dict, "tasks", &local_count);
+        if (ret) {
+                ret = dict_foreach (rsp_dict,
+                                glusterd_volume_status_copy_tasks_to_ctx_dict,
+                                ctx_dict);
+                if (ret)
+                        gf_log (this->name, GF_LOG_ERROR, "Failed to copy tasks"
+                                "to ctx_dict.");
+                goto out;
+        }
+
+        if (local_count != remote_count) {
+                gf_log (this->name, GF_LOG_ERROR, "Local tasks count (%d) and "
+                        "remote tasks count (%d) do not match. Not aggregating "
+                        "tasks status.", local_count, remote_count);
+                ret = -1;
+                goto out;
+        }
+
+        /* Update the tasks statuses. For every remote tasks, search for the
+         * local task, and update the local task status based on the remote
+         * status.
+         */
+        for (i = 0; i < remote_count; i++) {
+
+                memset (key, 0, sizeof (key));
+                snprintf (key, sizeof (key), "task%d.type", i);
+                ret = dict_get_str (rsp_dict, key, &task_type);
+                if (ret) {
+                        gf_log (this->name, GF_LOG_ERROR,
+                                "Failed to get task typpe from rsp dict");
+                        goto out;
+                }
+
+                /* Skip replace-brick status as it is going to be the same on
+                 * all peers. rb_status is set by the replace brick commit
+                 * function on all peers based on the replace brick command.
+                 * We return the value of rb_status as the status for a
+                 * replace-brick task in a 'volume status' command.
+                 */
+                if (!strcmp (task_type, "Replace brick"))
+                        continue;
+
+                memset (key, 0, sizeof (key));
+                snprintf (key, sizeof (key), "task%d.status", i);
+                ret = dict_get_int32 (rsp_dict, key, &remote_status);
+                if (ret) {
+                        gf_log (this->name, GF_LOG_ERROR,
+                                "Failed to get task status from rsp dict");
+                        goto out;
+                }
+                snprintf (key, sizeof (key), "task%d.id", i);
+                ret = dict_get_str (rsp_dict, key, &remote_task_id);
+                if (ret) {
+                        gf_log (this->name, GF_LOG_ERROR,
+                                "Failed to get task id from rsp dict");
+                        goto out;
+                }
+                for (j = 0; j < local_count; j++) {
+                        memset (key, 0, sizeof (key));
+                        snprintf (key, sizeof (key), "task%d.id", j);
+                        ret = dict_get_str (ctx_dict, key, &local_task_id);
+                        if (ret) {
+                                gf_log (this->name, GF_LOG_ERROR,
+                                        "Failed to get local task-id");
+                                goto out;
+                        }
+
+                        if (strncmp (remote_task_id, local_task_id,
+                                     strlen (remote_task_id))) {
+                                /* Quit if a matching local task is not found */
+                                if (j == (local_count - 1)) {
+                                        gf_log (this->name, GF_LOG_ERROR,
+                                                "Could not find matching local "
+                                                "task for task %s",
+                                                remote_task_id);
+                                        goto out;
+                                }
+                                continue;
+                        }
+
+                        memset (key, 0, sizeof (key));
+                        snprintf (key, sizeof (key), "task%d.status", j);
+                        ret = dict_get_int32 (ctx_dict, key, &local_status);
+                        if (ret) {
+                                gf_log (this->name, GF_LOG_ERROR,
+                                        "Failed to get local task status");
+                                goto out;
+                        }
+
+                        /* Rebalance has 5 states,
+                         * NOT_STARTED, STARTED, STOPPED, COMPLETE, FAILED
+                         * The precedence used to determine the aggregate status
+                         * is as below,
+                         * STARTED > FAILED > STOPPED > COMPLETE > NOT_STARTED
+                         */
+                        /* TODO: Move this to a common place utilities that both
+                         * CLI and glusterd need.
+                         * Till then if the below algorithm is changed, change
+                         * it in cli_xml_output_vol_rebalance_status in
+                         * cli-xml-output.c
+                         */
+                        ret = 0;
+                        int rank[] = {
+                                [GF_DEFRAG_STATUS_STARTED] = 1,
+                                [GF_DEFRAG_STATUS_FAILED] = 2,
+                                [GF_DEFRAG_STATUS_STOPPED] = 3,
+                                [GF_DEFRAG_STATUS_COMPLETE] = 4,
+                                [GF_DEFRAG_STATUS_NOT_STARTED] = 5
+                        };
+                        if (rank[remote_status] <= rank[local_status])
+                                        ret = dict_set_int32 (ctx_dict, key,
+                                                              remote_status);
+                        if (ret) {
+                                gf_log (this->name, GF_LOG_ERROR, "Failed to "
+                                        "update task status");
+                                goto out;
+                        }
+                        break;
+                }
+        }
+
+out:
+        return ret;
+}
+
+gf_boolean_t
+glusterd_status_has_tasks (int cmd) {
+        if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) &&
+             (cmd & GF_CLI_STATUS_VOL))
+                return _gf_true;
+        return _gf_false;
+}
+
 int
 glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict)
 {
@@ -7499,11 +7691,8 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict)
                 }
         }
 
-        if ((cmd & GF_CLI_STATUS_TASKS) != 0) {
-                dict_copy (rsp_dict, aggr);
-                ret = 0;
-                goto out;
-        }
+        if ((cmd & GF_CLI_STATUS_TASKS) != 0)
+                goto aggregate_tasks;
 
         ret = dict_get_int32 (rsp_dict, "count", &rsp_node_count);
         if (ret) {
@@ -7548,9 +7737,22 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict)
 
         ret = dict_set_int32 (ctx_dict, "other-count",
                               (other_count + rsp_other_count));
-        if (ret)
+        if (ret) {
                 gf_log (THIS->name, GF_LOG_ERROR,
                         "Failed to update other-count");
+                goto out;
+        }
+
+aggregate_tasks:
+        /* Tasks are only present for a normal status command for a volume or
+         * for an explicit tasks status command for a volume
+         */
+        if (!(cmd & GF_CLI_STATUS_ALL) &&
+            (((cmd & GF_CLI_STATUS_TASKS) != 0) ||
+             glusterd_status_has_tasks (cmd)))
+                ret = glusterd_volume_status_aggregate_tasks_status (ctx_dict,
+                                                                     rsp_dict);
+
 out:
         return ret;
 }
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 9907a03d4..7917fba19 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -619,4 +619,6 @@ glusterd_store_quota_conf_stamp_header (xlator_t *this, int fd);
 int
 glusterd_remove_auxiliary_mount (char *volname);
 
+gf_boolean_t
+glusterd_status_has_tasks (int cmd);
 #endif
author	Krishnan Parthasarathi <kparthas@redhat.com>	2013-12-23 14:07:45 +0530
committer	Vijay Bellur <vbellur@redhat.com>	2013-12-23 06:56:34 -0800
commit	9d592246d6121aa38cd6fb6a875be4473d4979c8 (patch)
tree	41f33ebf536ae3fee4c1aa84bf8ed10b23bde9cd
parent	3ef4b7eb9d1f4e305e1b7c85ee5bb51d7b18e305 (diff)