From 7b5a21707edbbee1940f7cd3d05043bec998e51a Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Fri, 14 Sep 2012 11:45:34 +0530 Subject: glusterd, cli: Task id's for async tasks This patch introduces task-id's for async tasks like rebalance, remove-brick and replace-brick. An id is generated for each task when it is started and displayed to the user in cli output. The status of running tasks is also included in the output of "volume status" along with its id, so that a user can easily track the progress of an async task. Also, * added tests for this feature into the regression test suite. * added a python script for creating files, 'create-files.py', courtesy Vijaykumar Koppad (vkoppad@redhat.com) into the test suite. Change-Id: Ib0c0d12e0d6c8f72ace48d303d7ff3102157e876 BUG: 857330 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/3942 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 161 +++++++++---- xlators/mgmt/glusterd/src/glusterd-handler.c | 2 +- xlators/mgmt/glusterd/src/glusterd-handshake.c | 2 + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 177 ++++++++++++-- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 186 +++++++++++---- xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 219 +++++++++++------ xlators/mgmt/glusterd/src/glusterd-store.c | 73 ++++-- xlators/mgmt/glusterd/src/glusterd-store.h | 1 + xlators/mgmt/glusterd/src/glusterd-utils.c | 265 ++++++++++++++++----- xlators/mgmt/glusterd/src/glusterd-utils.h | 6 + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 +- xlators/mgmt/glusterd/src/glusterd.c | 27 +++ xlators/mgmt/glusterd/src/glusterd.h | 40 +++- 13 files changed, 879 insertions(+), 284 deletions(-) (limited to 'xlators') diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index a14828e98..6ab859a10 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -822,6 +822,7 @@ out: ret = 0; //sent error to cli, prevent second reply } + GF_FREE (brick_list); free (cli_req.dict.dict_val); //its malloced by xdr @@ -1157,17 +1158,22 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) char msg[2048] = {0,}; int32_t flag = 0; gf1_op_commands cmd = GF_OP_CMD_NONE; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname); goto out; } @@ -1179,7 +1185,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) snprintf (msg, sizeof (msg), "Replace brick is in progress on " "volume %s. Please retry after replace-brick " "operation is committed or aborted", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; @@ -1187,7 +1193,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } cmd = flag; @@ -1205,20 +1211,38 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_START: { if (GLUSTERD_STATUS_STARTED != volinfo->status) { - snprintf (msg, sizeof (msg), "Volume %s needs to be started " - "before remove-brick (you can use 'force' or " - "'commit' to override this behavior)", - volinfo->volname); + snprintf (msg, sizeof (msg), "Volume %s needs to be " + "started before remove-brick (you can use " + "'force' or 'commit' to override this " + "behavior)", volinfo->volname); errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } if (glusterd_is_defrag_on(volinfo)) { - errstr = gf_strdup("Rebalance is in progress. Please retry" - " after completion"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); + errstr = gf_strdup("Rebalance is in progress. Please " + "retry after completion"); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } + + if (is_origin_glusterd ()) { + ret = glusterd_generate_and_set_task_id + (dict, GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing remove-brick-id"); + ret = 0; + } + } break; } @@ -1240,7 +1264,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) ret = dict_get_int32 (dict, "count", &brick_count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); goto out; } @@ -1253,7 +1277,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); if (ret && errstr) { if (op_errstr) *op_errstr = errstr; @@ -1390,10 +1414,10 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } @@ -1412,42 +1436,67 @@ out: int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - int32_t flag = 0; - char err_str[4096] = {0,}; - int need_rebalance = 0; - int force = 0; - gf1_op_commands cmd = 0; - int32_t replica_count = 0; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + int32_t flag = 0; + char err_str[4096] = {0,}; + int need_rebalance = 0; + int force = 0; + gf1_op_commands cmd = 0; + int32_t replica_count = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = dict_get_int32 (dict, "command", &flag); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get command"); goto out; } cmd = flag; + /* Set task-id, if available, in ctx dict for operations other than + * start + */ + if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set remove-brick-id"); + goto out; + } + } + } + + /* Clear task-id on completion/stopping of remove-brick operation */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) + uuid_clear (volinfo->rebal.rebalance_id); + ret = -1; switch (cmd) { case GF_OP_CMD_NONE: @@ -1468,7 +1517,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -1476,7 +1525,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } @@ -1486,6 +1535,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } case GF_OP_CMD_START: + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Missing remove-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + } force = 0; break; @@ -1496,13 +1553,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); + if (volinfo->rebal.defrag) { + LOCK (&volinfo->rebal.defrag->lock); /* Fake 'rebalance-complete' so the graph change happens right away */ - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_COMPLETE; - UNLOCK (&volinfo->defrag->lock); + UNLOCK (&volinfo->rebal.defrag->lock); } /* Graph change happens in rebalance _cbk function, no need to do anything here */ @@ -1525,7 +1583,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) snprintf (key, 256, "brick%d", i); ret = dict_get_str (dict, key, &brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); + gf_log (this->name, GF_LOG_ERROR, "Unable to get %s", + key); goto out; } @@ -1537,7 +1596,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "changing replica count %d to %d on volume %s", volinfo->replica_count, replica_count, volinfo->volname); @@ -1559,34 +1618,36 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles"); + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo"); + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } /* Need to reset the defrag/rebalance status accordingly */ - switch (volinfo->defrag_status) { + switch (volinfo->rebal.defrag_status) { case GF_DEFRAG_STATUS_FAILED: case GF_DEFRAG_STATUS_COMPLETE: - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; default: break; } if (!force && need_rebalance) { /* perform the rebalance operations */ - ret = glusterd_handle_defrag_start (volinfo, err_str, 4096, - GF_DEFRAG_CMD_START_FORCE, - glusterd_remove_brick_migrate_cbk); + ret = glusterd_handle_defrag_start + (volinfo, err_str, sizeof (err_str), + GF_DEFRAG_CMD_START_FORCE, + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + if (!ret) volinfo->decommission_in_progress = 1; if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to start the rebalance"); } } else { diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 9bbfdba8b..7708139fc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -370,7 +370,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, goto out; snprintf (key, 256, "volume%d.rebalance", count); - ret = dict_set_int32 (volumes, key, volinfo->defrag_cmd); + ret = dict_set_int32 (volumes, key, volinfo->rebal.defrag_cmd); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 428051d4a..209dd736e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -434,6 +434,8 @@ out: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + ret = 0; + if (clnt_dict) dict_unref (clnt_dict); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 66b58eca4..54faa8fa5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1826,6 +1826,79 @@ out: return ret; } +static int +_add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) +{ + + int ret = -1; + char key[128] = {0,}; + char *uuid_str = NULL; + int status = 0; + xlator_t *this = NULL; + + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + this = THIS; + GF_ASSERT (this); + + switch (op) { + case GD_OP_REBALANCE: + case GD_OP_REMOVE_BRICK: + uuid_str = gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id)); + status = volinfo->rebal.defrag_status; + break; + + case GD_OP_REPLACE_BRICK: + uuid_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + status = volinfo->rep_brick.rb_status; + break; + + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "%s operation doesn't have a" + " task_id", gd_op_list[op]); + goto out; + } + + snprintf (key, sizeof (key), "task%d.type", index); + ret = dict_set_str (dict, key, + (char *)gd_op_list[op]); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task type in dict"); + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", index); + + + if (!uuid_str) + goto out; + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task id in dict"); + goto out; + } + uuid_str = NULL; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", index); + ret = dict_set_int32 (dict, key, status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task status in dict"); + goto out; + } + +out: + if (uuid_str) + GF_FREE (uuid_str); + return ret; +} + static int glusterd_op_status_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict) @@ -1845,6 +1918,8 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, dict_t *vol_opts = NULL; gf_boolean_t nfs_disabled = _gf_false; gf_boolean_t shd_enabled = _gf_true; + gf_boolean_t origin_glusterd = _gf_false; + int tasks = 0; this = THIS; GF_ASSERT (this); @@ -1854,6 +1929,8 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, GF_ASSERT (dict); + origin_glusterd = is_origin_glusterd (); + ret = dict_get_uint32 (dict, "cmd", &cmd); if (ret) goto out; @@ -1866,11 +1943,10 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, if ((cmd & GF_CLI_STATUS_ALL)) { ret = glusterd_get_all_volnames (rsp_dict); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to get all volume " "names for status"); } - } ret = dict_set_uint32 (rsp_dict, "cmd", cmd); @@ -1886,7 +1962,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Volume with name: %s " + gf_log (this->name, GF_LOG_ERROR, "Volume with name: %s " "does not exist", volname); goto out; } @@ -1984,23 +2060,56 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, ret = dict_set_int32 (rsp_dict, "brick-index-max", brick_index); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Error setting brick-index-max to dict"); goto out; } ret = dict_set_int32 (rsp_dict, "other-count", other_count); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Error setting other-count to dict"); goto out; } ret = dict_set_int32 (rsp_dict, "count", node_count); - if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Error setting node count to dict"); + goto out; + } + + /* Active tasks */ + if (((cmd & GF_CLI_STATUS_MASK) != GF_CLI_STATUS_NONE) || + !origin_glusterd) + goto out; + + if (glusterd_is_defrag_on (volinfo)) { + ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + if (glusterd_is_rb_ongoing (volinfo)) { + ret = _add_task_to_dict (rsp_dict, volinfo, GD_OP_REPLACE_BRICK, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + + ret = dict_set_int32 (rsp_dict, "tasks", tasks); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error setting tasks count in dict"); out: - gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -2257,9 +2366,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) char *volname = NULL; uint32_t status_cmd = GF_CLI_STATUS_NONE; char *errstr = NULL; + xlator_t *this = THIS; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + req_dict = dict_new (); if (!req_dict) goto out; @@ -2268,7 +2381,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) op = glusterd_op_get_op (); ctx = (void*)glusterd_op_get_ctx (); if (!ctx) { - gf_log ("", GF_LOG_ERROR, "Null Context for " + gf_log (this->name, GF_LOG_ERROR, "Null Context for " "op %d", op); ret = -1; goto out; @@ -2280,8 +2393,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) if (ret) goto out; ctx = op_ctx; - } #undef GD_SYNC_OPCODE_KEY + } dict = ctx; switch (op) { @@ -2316,7 +2429,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "volname is not present in " "operation ctx"); goto out; @@ -2329,6 +2442,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) if (ret) goto out; } + dict_destroy (req_dict); req_dict = dict_ref (dict); } break; @@ -2339,12 +2453,33 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) break; } + case GD_OP_REMOVE_BRICK: + { + dict_t *dict = ctx; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } + + ret = glusterd_dict_set_volid (dict, volname, + op_errstr); + if (ret) + goto out; + + dict_destroy (req_dict); + req_dict = dict_ref (dict); + } + break; + case GD_OP_STATUS_VOLUME: { ret = dict_get_uint32 (dict, "cmd", &status_cmd); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Status command not present " "in op ctx"); goto out; @@ -2361,7 +2496,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_ADD_BRICK: case GD_OP_REPLACE_BRICK: case GD_OP_RESET_VOLUME: - case GD_OP_REMOVE_BRICK: case GD_OP_LOG_ROTATE: case GD_OP_QUOTA: case GD_OP_PROFILE_VOLUME: @@ -2377,7 +2511,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "volname is not present in " "operation ctx"); goto out; @@ -3862,6 +3996,7 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, char buf[1024] = {0,}; char *node_str = NULL; glusterd_conf_t *priv = NULL; + glusterd_rebalance_t *rebal = NULL; priv = THIS->private; GF_ASSERT (req_dict); @@ -3877,6 +4012,8 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, if (ret) goto out; + rebal = &volinfo->rebal; + if (rsp_dict) { ret = glusterd_defrag_volume_status_update (volinfo, rsp_dict); @@ -3905,42 +4042,42 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, memset (key, 0 , 256); snprintf (key, 256, "files-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_files); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_files); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set file count"); memset (key, 0 , 256); snprintf (key, 256, "size-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_data); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_data); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set size of xfer"); memset (key, 0 , 256); snprintf (key, 256, "lookups-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->lookedup_files); + ret = dict_set_uint64 (op_ctx, key, rebal->lookedup_files); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set lookedup file count"); memset (key, 0 , 256); snprintf (key, 256, "status-%d", i); - ret = dict_set_int32 (op_ctx, key, volinfo->defrag_status); + ret = dict_set_int32 (op_ctx, key, rebal->defrag_status); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set status"); memset (key, 0 , 256); snprintf (key, 256, "failures-%d", i); - ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_failures); + ret = dict_set_uint64 (op_ctx, key, rebal->rebalance_failures); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set failure count"); memset (key, 0, 256); snprintf (key, 256, "run-time-%d", i); - ret = dict_set_double (op_ctx, key, volinfo->rebalance_time); + ret = dict_set_double (op_ctx, key, rebal->rebalance_time); if (ret) gf_log (THIS->name, GF_LOG_ERROR, "failed to set run-time"); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index fa1af7d29..4c7282e82 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -88,12 +88,12 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, if (!volinfo) return 0; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; if (!defrag) return 0; if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) - volinfo->defrag = NULL; + volinfo->rebal.defrag = NULL; GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); @@ -126,12 +126,12 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, UNLOCK (&defrag->lock); if (!glusterd_is_service_running (pidfile, NULL)) { - if (volinfo->defrag_status == - GF_DEFRAG_STATUS_STARTED) { - volinfo->defrag_status = - GF_DEFRAG_STATUS_FAILED; + if (volinfo->rebal.defrag_status == + GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_FAILED; } else { - volinfo->defrag_cmd = 0; + volinfo->rebal.defrag_cmd = 0; } } @@ -142,7 +142,8 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, defrag->rpc = NULL; } if (defrag->cbk_fn) - defrag->cbk_fn (volinfo, volinfo->defrag_status); + defrag->cbk_fn (volinfo, + volinfo->rebal.defrag_status); GF_FREE (defrag); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", @@ -161,7 +162,8 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk) + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) { int ret = -1; glusterd_defrag_info_t *defrag = NULL; @@ -183,22 +185,24 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); if (ret) goto out; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; + volinfo->rebal.op = op; + LOCK_INIT (&defrag->lock); - volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; glusterd_volinfo_reset_defrag_stats (volinfo); - volinfo->defrag_cmd = cmd; glusterd_store_perform_node_state_store (volinfo); GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); @@ -294,13 +298,15 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, int ret = -1; glusterd_defrag_info_t *defrag = NULL; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + + if (!volinfo->rebal.defrag) goto out; - defrag = volinfo->defrag; + defrag = volinfo->rebal.defrag; defrag->cmd = cmd; @@ -434,10 +440,12 @@ out: glusterd_op_sm (); if (ret) { - ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, req, - dict, "operation failed"); + ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, + req, dict, + "operation failed"); if (dict) dict_unref (dict); + } free (cli_req.dict.dict_val);//malloced by xdr @@ -449,38 +457,71 @@ out: int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT (this); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not found"); + gf_log (this->name, GF_LOG_DEBUG, "volname not found"); goto out; } + ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd not found"); + gf_log (this->name, GF_LOG_DEBUG, "cmd not found"); goto out; } ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "failed to validate"); + gf_log (this->name, GF_LOG_DEBUG, "failed to validate"); goto out; } switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd ()) { + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (op_ctx, GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, + &task_id_str); + if (ret) { + snprintf (msg, sizeof (msg), + "Missing rebalance-id"); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = 0; + } + } ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, - "start validate failed"); + gf_log (this->name, GF_LOG_DEBUG, + "start validate failed"); goto out; } break; @@ -503,43 +544,86 @@ out: int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - char *volname = NULL; - int ret = 0; - int32_t cmd = 0; - char msg[2048] = {0}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - gf_boolean_t volfile_update = _gf_false; - - priv = THIS->private; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "volname not given"); + gf_log (this->name, GF_LOG_DEBUG, "volname not given"); goto out; } ret = dict_get_int32 (dict, "rebalance-command", &cmd); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "command not given"); + gf_log (this->name, GF_LOG_DEBUG, "command not given"); goto out; } + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, msg, sizeof (msg)); if (ret) { - gf_log (THIS->name, GF_LOG_DEBUG, "cmd validate failed"); + gf_log (this->name, GF_LOG_DEBUG, "cmd validate failed"); goto out; } + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + ret = -1; + goto out; + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REMOVE_BRICK_TID_KEY); + else + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set task-id"); + goto out; + } + } + } + switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance " + "id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + } ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), - cmd, NULL); + cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: /* Fall back to the old volume file in case of decommission*/ @@ -558,7 +642,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); goto out; } @@ -566,7 +650,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index bca306bd8..a6c5a9aaf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -168,7 +168,7 @@ glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo, if (!volinfo || !brickinfo) goto out; - *brickinfo = volinfo->dst_brick; + *brickinfo = volinfo->rep_brick.dst_brick; ret = 0; @@ -199,38 +199,43 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, char *savetok = NULL; char voldir[PATH_MAX] = {0}; char pidfile[PATH_MAX] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + + priv = this->private; GF_ASSERT (priv); ret = dict_get_str (dict, "src-brick", &src_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); goto out; } - gf_log ("", GF_LOG_DEBUG, "src brick=%s", src_brick); + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); ret = dict_get_str (dict, "dst-brick", &dst_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get dest brick"); goto out; } - gf_log ("", GF_LOG_DEBUG, "dst brick=%s", dst_brick); + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); if (ret) { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "dict get on replace-brick operation failed"); goto out; } @@ -262,7 +267,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, !glusterd_is_valid_volfpath (volname, dst_brick)) { snprintf (msg, sizeof (msg), "brick path %s is too " "long.", dst_brick); - gf_log ("", GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; @@ -271,10 +276,10 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = glusterd_check_gsync_running (volinfo, &is_run); if (ret && (is_run == _gf_false)) - gf_log ("", GF_LOG_WARNING, "Unable to get the status" + gf_log (this->name, GF_LOG_WARNING, "Unable to get the status" " of active "GEOREP" session"); if (is_run) { - gf_log ("", GF_LOG_WARNING, GEOREP" sessions active" + gf_log (this->name, GF_LOG_WARNING, GEOREP" sessions active" "for the volume %s ", volname); snprintf (msg, sizeof(msg), GEOREP" sessions are active " "for the volume %s.\nStop "GEOREP " sessions " @@ -289,29 +294,58 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, if (glusterd_is_defrag_on(volinfo)) { snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " "progress. Please retry after completion", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); ret = -1; goto out; } + ctx = glusterd_op_get_ctx(); + switch (replace_op) { case GF_REPLACE_OP_START: if (glusterd_is_rb_started (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started for volume "); + snprintf (msg, sizeof (msg), "Replace brick is already " + "started for volume"); + gf_log (this->name, GF_LOG_ERROR, msg); + *op_errstr = gf_strdup (msg); ret = -1; goto out; } + if (is_origin_glusterd ()) { + if (!ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (ctx, GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + + } else { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing replace-brick-id"); + ret = 0; + } + } break; case GF_REPLACE_OP_PAUSE: if (glusterd_is_rb_paused (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "paused for volume "); + gf_log (this->name, GF_LOG_ERROR, "Replace brick is " + "already paused for volume "); ret = -1; goto out; } else if (!glusterd_is_rb_started(volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" " started for volume "); ret = -1; goto out; @@ -320,7 +354,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, case GF_REPLACE_OP_ABORT: if (!glusterd_is_rb_ongoing (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" " started or paused for volume "); ret = -1; goto out; @@ -329,7 +363,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, case GF_REPLACE_OP_COMMIT: if (!glusterd_is_rb_ongoing (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not " + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not " "started for volume "); ret = -1; goto out; @@ -349,7 +383,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, goto out; } - gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); ret = -1; goto out; } @@ -369,10 +403,9 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, goto out; } - ctx = glusterd_op_get_ctx(); if (ctx) { if (!glusterd_is_fuse_available ()) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to open /dev/" + gf_log (this->name, GF_LOG_ERROR, "Unable to open /dev/" "fuse (%s), replace-brick command failed", strerror (errno)); snprintf (msg, sizeof(msg), "Fuse unavailable\n " @@ -384,7 +417,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "I AM THE SOURCE HOST"); if (src_brickinfo->port && rsp_dict) { ret = dict_set_int32 (rsp_dict, "src-brick-port", @@ -416,14 +449,14 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, dup_dstbrick = gf_strdup (dst_brick); if (!dup_dstbrick) { ret = -1; - gf_log ("", GF_LOG_ERROR, "Memory allocation failed"); + gf_log (this->name, GF_LOG_ERROR, "Memory allocation failed"); goto out; } host = strtok_r (dup_dstbrick, ":", &savetok); path = strtok_r (NULL, ":", &savetok); if (!host || !path) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "dst brick %s is not of form :", dst_brick); ret = -1; @@ -439,7 +472,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, if (ret) { *op_errstr = gf_strdup (msg); ret = -1; - gf_log (THIS->name, GF_LOG_ERROR, *op_errstr); + gf_log (this->name, GF_LOG_ERROR, *op_errstr); goto out; } @@ -447,8 +480,8 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, (replace_op == GF_REPLACE_OP_START || replace_op == GF_REPLACE_OP_COMMIT_FORCE)) { - volinfo->src_brick = src_brickinfo; - volinfo->dst_brick = dst_brickinfo; + volinfo->rep_brick.src_brick = src_brickinfo; + volinfo->rep_brick.dst_brick = dst_brickinfo; } if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) { @@ -457,7 +490,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, *op_errstr = gf_strdup ("Incorrect source or " "destination brick"); if (*op_errstr) - gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr); + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); goto out; } @@ -499,7 +532,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, out: GF_FREE (dup_dstbrick); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -1487,6 +1520,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) char *dst_brick = NULL; glusterd_brickinfo_t *src_brickinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; + char *task_id_str = NULL; this = THIS; GF_ASSERT (this); @@ -1496,26 +1530,23 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_get_str (dict, "src-brick", &src_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "src brick=%s", src_brick); + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); ret = dict_get_str (dict, "dst-brick", &dst_brick); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get dst brick"); goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "dst brick=%s", dst_brick); + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } @@ -1528,28 +1559,30 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); goto out; } ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, &src_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get src-brickinfo"); goto out; } ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get " + gf_log (this->name, GF_LOG_ERROR, "Unable to get " "replace brick destination brickinfo"); goto out; } ret = glusterd_resolve_brick (dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to resolve dst-brickinfo"); goto out; } @@ -1558,29 +1591,62 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) if (ret) goto out; + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get op_ctx"); + ret = -1; + goto out; + } + if ((GF_REPLACE_OP_START != replace_op)) { ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, dict, replace_op); if (ret) goto out; + + /* Set task-id, if available, in op_ctx dict for operations + * other than start + */ + if (!uuid_is_null (volinfo->rep_brick.rb_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rep_brick.rb_id, ctx, + GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set replace-brick-id"); + goto out; + } + } } switch (replace_op) { case GF_REPLACE_OP_START: { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Missing replace-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rep_brick.rb_id); + } + if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); if (!glusterd_is_rb_paused (volinfo)) { - ret = rb_spawn_destination_brick (volinfo, dst_brickinfo); + ret = rb_spawn_destination_brick + (volinfo, dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to spawn destination brick"); + gf_log (this->name, GF_LOG_DEBUG, + "Failed to spawn destination " + "brick"); goto out; } } else { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started=> no need to restart dst brick "); + gf_log (this->name, GF_LOG_ERROR, + "Replace brick is already started=> no " + "need to restart dst brick "); } } @@ -1589,14 +1655,14 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = rb_src_brick_restart (volinfo, src_brickinfo, 1); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); + gf_log (this->name, GF_LOG_DEBUG, + "Could not restart src-brick"); goto out; } } if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "adding dst-brick port no"); ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, @@ -1617,7 +1683,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_COMMIT); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Commit operation failed"); goto out; } @@ -1627,16 +1693,17 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) case GF_REPLACE_OP_COMMIT_FORCE: { ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Received commit - will be adding dst brick and " "removing src brick"); if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "I AM THE DESTINATION HOST"); - ret = rb_kill_destination_brick (volinfo, dst_brickinfo); + ret = rb_kill_destination_brick (volinfo, + dst_brickinfo); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "Unable to cleanup dst brick"); goto out; } @@ -1644,39 +1711,41 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_nodesvcs_stop (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Unable to stop nfs server, ret: %d", ret); } ret = glusterd_op_perform_replace_brick (volinfo, src_brick, dst_brick); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to add " - "dst-brick: %s to volume: %s", - dst_brick, volinfo->volname); + gf_log (this->name, GF_LOG_CRITICAL, "Unable to add " + "dst-brick: %s to volume: %s", dst_brick, + volinfo->volname); (void) glusterd_nodesvcs_handle_graph_change (volinfo); goto out; } - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; ret = glusterd_nodesvcs_handle_graph_change (volinfo); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, + gf_log (this->name, GF_LOG_CRITICAL, "Failed to generate nfs volume file"); } ret = glusterd_fetchspec_notify (THIS); glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; + uuid_clear (volinfo->rep_brick.rb_id); } break; case GF_REPLACE_OP_PAUSE: { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Received pause - doing nothing"); ctx = glusterd_op_get_ctx (); if (ctx) { @@ -1684,7 +1753,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_PAUSE); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Pause operation failed"); goto out; } @@ -1703,7 +1772,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) dst_brickinfo, GF_REPLACE_OP_ABORT); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Abort operation failed"); goto out; } @@ -1711,7 +1780,8 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); if (ret) { - gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to disable pump"); + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to disable pump"); } if (!glusterd_is_local_addr (src_brickinfo->hostname)) { @@ -1726,7 +1796,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log (THIS->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_INFO, "I AM THE DESTINATION HOST"); ret = rb_kill_destination_brick (volinfo, dst_brickinfo); if (ret) { @@ -1736,14 +1806,15 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } } glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; } break; case GF_REPLACE_OP_STATUS: { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "received status - doing nothing"); ctx = glusterd_op_get_ctx (); if (ctx) { @@ -1751,7 +1822,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = dict_set_str (ctx, "status-reply", "replace brick has been paused"); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to set pump status" " in ctx"); goto out; @@ -1775,7 +1846,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't store" + gf_log (this->name, GF_LOG_ERROR, "Couldn't store" " replace brick operation's state"); out: diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 413c8a39a..bb7c0a76c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -891,29 +891,33 @@ glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo) GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - snprintf (buf, sizeof (buf), "%d", volinfo->rb_status); + snprintf (buf, sizeof (buf), "%d", volinfo->rep_brick.rb_status); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS, buf); if (ret) goto out; - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { snprintf (buf, sizeof (buf), "%s:%s", - volinfo->src_brick->hostname, - volinfo->src_brick->path); + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%s:%s", - volinfo->dst_brick->hostname, - volinfo->dst_brick->path); + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK, buf); if (ret) goto out; + + uuid_unparse (volinfo->rep_brick.rb_id, buf); + ret = glusterd_store_save_value (fd, GF_REPLACE_BRICK_TID_KEY, + buf); } out: @@ -960,17 +964,29 @@ glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - if (volinfo->defrag_cmd == GF_DEFRAG_CMD_STATUS) { + if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) { ret = 0; goto out; } - snprintf (buf, sizeof (buf), "%d", volinfo->defrag_cmd); + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_cmd); ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf); if (ret) goto out; + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.op); + ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, + buf); + if (ret) + goto out; + + if (volinfo->rebal.defrag_cmd) { + uuid_unparse (volinfo->rebal.rebalance_id, buf); + ret = glusterd_store_save_value (fd, + GF_REBALANCE_TID_KEY, + buf); + } out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -2145,22 +2161,26 @@ glusterd_store_retrieve_rbstate (char *volname) while (!ret) { if (!strncmp (key, GLUSTERD_STORE_KEY_RB_STATUS, strlen (GLUSTERD_STORE_KEY_RB_STATUS))) { - volinfo->rb_status = atoi (value); + volinfo->rep_brick.rb_status = atoi (value); } - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { if (!strncmp (key, GLUSTERD_STORE_KEY_RB_SRC_BRICK, strlen (GLUSTERD_STORE_KEY_RB_SRC_BRICK))) { ret = glusterd_brickinfo_new_from_brick (value, - &volinfo->src_brick); + &volinfo->rep_brick.src_brick); if (ret) goto out; } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_BRICK, strlen (GLUSTERD_STORE_KEY_RB_DST_BRICK))) { ret = glusterd_brickinfo_new_from_brick (value, - &volinfo->dst_brick); + &volinfo->rep_brick.dst_brick); if (ret) goto out; + } else if (!strncmp (key, GF_REPLACE_BRICK_TID_KEY, + strlen (GF_REPLACE_BRICK_TID_KEY))) { + uuid_parse (value, + volinfo->rep_brick.rb_id); } } @@ -2227,13 +2247,30 @@ glusterd_store_retrieve_node_state (char *volname) if (ret) goto out; - if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, - strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { - volinfo->defrag_cmd = atoi (value); - } + while (ret == 0) { + if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, + strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { + volinfo->rebal.defrag_cmd = atoi (value); + } - GF_FREE (key); - GF_FREE (value); + if (volinfo->rebal.defrag_cmd) { + if (!strncmp (key, GF_REBALANCE_TID_KEY, + strlen (GF_REBALANCE_TID_KEY))) + uuid_parse (value, volinfo->rebal.rebalance_id); + + if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP, + strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) + volinfo->rebal.op = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = glusterd_store_iter_get_next (iter, &key, &value, + &op_errno); + } if (op_errno != GD_STORE_EOF) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 1ab398c0b..03ef00059 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -52,6 +52,7 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" #define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" #define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" #define GLUSTERD_STORE_KEY_USERNAME "username" #define GLUSTERD_STORE_KEY_PASSWORD "password" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 879bb126a..4d2d5f8ab 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1714,16 +1714,18 @@ int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) { - int32_t ret = -1; - char prefix[512] = {0,}; - char key[512] = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t i = 1; - char *volume_id_str = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - char *str = NULL; - glusterd_dict_ctx_t ctx = {0}; + int32_t ret = -1; + char prefix[512] = {0,}; + char key[512] = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t i = 1; + char *volume_id_str = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *str = NULL; + glusterd_dict_ctx_t ctx = {0}; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1794,14 +1796,16 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); - if (!volume_id_str) + if (!volume_id_str) { + ret = -1; goto out; - + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.volume_id", count); ret = dict_set_dynstr (dict, key, volume_id_str); if (ret) goto out; + volume_id_str = NULL; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.username", count); @@ -1822,24 +1826,46 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); - ret = dict_set_int32 (dict, key, volinfo->rb_status); + snprintf (key, 256, "volume%d.rebalance", count); + ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd); if (ret) goto out; + if (volinfo->rebal.defrag_cmd) { + rebalance_id_str = gf_strdup (uuid_utoa + (volinfo->rebal.rebalance_id)); + if (!rebalance_id_str) { + ret = -1; + goto out; + } + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance-id", count); + ret = dict_set_dynstr (dict, key, rebalance_id_str); + if (ret) + goto out; + rebalance_id_str = NULL; + } + memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d.rebalance", count); - ret = dict_set_int32 (dict, key, volinfo->defrag_cmd); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_set_uint32 (dict, key, volinfo->rebal.op); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); + ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status); if (ret) goto out; - if (volinfo->rb_status > GF_RB_STATUS_NONE) { + + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, count); gf_asprintf (&src_brick, "%s:%s", - volinfo->src_brick->hostname, - volinfo->src_brick->path); + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); ret = dict_set_dynstr (dict, key, src_brick); if (ret) goto out; @@ -1848,11 +1874,24 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, count); gf_asprintf (&dst_brick, "%s:%s", - volinfo->dst_brick->hostname, - volinfo->dst_brick->path); + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); ret = dict_set_dynstr (dict, key, dst_brick); if (ret) goto out; + + rb_id_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + if (!rb_id_str) { + ret = -1; + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_set_dynstr (dict, key, rb_id_str); + if (ret) + goto out; + rb_id_str = NULL; } snprintf (prefix, sizeof (prefix), "volume%d", count); @@ -1907,6 +1946,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, out: + GF_FREE (volume_id_str); + GF_FREE (rebalance_id_str); + GF_FREE (rb_id_str); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -2473,6 +2516,8 @@ glusterd_import_volinfo (dict_t *vols, int count, char *dst_brick = NULL; char *str = NULL; int rb_status = 0; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -2580,6 +2625,8 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + uuid_parse (volume_id_str, new_volinfo->volume_id); + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.username", count); ret = dict_get_str (vols, key, &str); @@ -2609,23 +2656,46 @@ glusterd_import_volinfo (dict_t *vols, int count, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.rebalance", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->defrag_cmd); + ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); goto out; } - uuid_parse (volume_id_str, new_volinfo->volume_id); + if (new_volinfo->rebal.defrag_cmd) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-id", count); + ret = dict_get_str (vols, key, &rebalance_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rebalance_id_str, + new_volinfo->rebal.rebalance_id); + } + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_get_uint32 (vols, key,(uint32_t *) &new_volinfo->rebal.op); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); ret = dict_get_int32 (vols, key, &rb_status); if (ret) goto out; - new_volinfo->rb_status = rb_status; + new_volinfo->rep_brick.rb_status = rb_status; - if (new_volinfo->rb_status > GF_RB_STATUS_NONE) { + if (new_volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, @@ -2635,7 +2705,7 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; ret = glusterd_brickinfo_new_from_brick (src_brick, - &new_volinfo->src_brick); + &new_volinfo->rep_brick.src_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " src brickinfo"); @@ -2650,12 +2720,24 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; ret = glusterd_brickinfo_new_from_brick (dst_brick, - &new_volinfo->dst_brick); + &new_volinfo->rep_brick.dst_brick); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create" " dst brickinfo"); goto out; } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_get_str (vols, key, &rb_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rb_id_str, new_volinfo->rep_brick.rb_id); + } } @@ -3161,8 +3243,8 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) } else if (pending_node->type == GD_NODE_REBALANCE) { volinfo = pending_node->node; - if (volinfo->defrag) - rpc = volinfo->defrag->rpc; + if (volinfo->rebal.defrag) + rpc = volinfo->rebal.defrag->rpc; } else if (pending_node->type == GD_NODE_NFS) { nfs = pending_node->node; @@ -4691,7 +4773,7 @@ out: int glusterd_is_defrag_on (glusterd_volinfo_t *volinfo) { - return (volinfo->defrag != NULL); + return (volinfo->rebal.defrag != NULL); } gf_boolean_t @@ -4793,8 +4875,8 @@ int glusterd_is_rb_started(glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_started:status=%d", volinfo->rb_status); - return (volinfo->rb_status == GF_RB_STATUS_STARTED); + "is_rb_started:status=%d", volinfo->rep_brick.rb_status); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_STARTED); } @@ -4802,9 +4884,9 @@ int glusterd_is_rb_paused ( glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_paused:status=%d", volinfo->rb_status); + "is_rb_paused:status=%d", volinfo->rep_brick.rb_status); - return (volinfo->rb_status == GF_RB_STATUS_PAUSED); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_PAUSED); } inline int @@ -4812,10 +4894,10 @@ glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status) { gf_log ("", GF_LOG_DEBUG, "setting status from %d to %d", - volinfo->rb_status, + volinfo->rep_brick.rb_status, status); - volinfo->rb_status = status; + volinfo->rep_brick.rb_status = status; return 0; } @@ -4823,19 +4905,27 @@ inline int glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst) { - if (!volinfo->src_brick || !volinfo->dst_brick) + glusterd_replace_brick_t *rb = NULL; + + GF_ASSERT (volinfo); + + rb = &volinfo->rep_brick; + + if (!rb->src_brick || !rb->dst_brick) return -1; - if (strcmp (volinfo->src_brick->hostname, src->hostname) || - strcmp (volinfo->src_brick->path, src->path)) { + if (strcmp (rb->src_brick->hostname, src->hostname) || + strcmp (rb->src_brick->path, src->path)) { gf_log("", GF_LOG_ERROR, "Replace brick src bricks differ"); return -1; } - if (strcmp (volinfo->dst_brick->hostname, dst->hostname) || - strcmp (volinfo->dst_brick->path, dst->path)) { + + if (strcmp (rb->dst_brick->hostname, dst->hostname) || + strcmp (rb->dst_brick->path, dst->path)) { gf_log ("", GF_LOG_ERROR, "Replace brick dst bricks differ"); return -1; } + return 0; } @@ -5831,7 +5921,7 @@ glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr, if (!glusterd_is_service_running (pidfile, &pid)) { glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd, - cbk); + cbk, volinfo->rebal.op); } else { glusterd_rebalance_rpc_create (volinfo, priv, cmd); } @@ -5847,10 +5937,10 @@ glusterd_restart_rebalance (glusterd_conf_t *conf) char op_errstr[256]; list_for_each_entry (volinfo, &conf->volumes, vol_list) { - if (!volinfo->defrag_cmd) + if (!volinfo->rebal.defrag_cmd) continue; glusterd_volume_defrag_restart (volinfo, op_errstr, 256, - volinfo->defrag_cmd, NULL); + volinfo->rebal.defrag_cmd, NULL); } return ret; } @@ -5859,13 +5949,15 @@ glusterd_restart_rebalance (glusterd_conf_t *conf) void glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo) { + glusterd_rebalance_t *rebal = NULL; GF_ASSERT (volinfo); - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; - volinfo->rebalance_failures = 0; - volinfo->rebalance_time = 0; + rebal = &volinfo->rebal; + rebal->rebalance_files = 0; + rebal->rebalance_data = 0; + rebal->lookedup_files = 0; + rebal->rebalance_failures = 0; + rebal->rebalance_time = 0; } @@ -5990,17 +6082,17 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, "failed to get run-time"); if (files) - volinfo->rebalance_files = files; + volinfo->rebal.rebalance_files = files; if (size) - volinfo->rebalance_data = size; + volinfo->rebal.rebalance_data = size; if (lookup) - volinfo->lookedup_files = lookup; + volinfo->rebal.lookedup_files = lookup; if (status) - volinfo->defrag_status = status; + volinfo->rebal.defrag_status = status; if (failures) - volinfo->rebalance_failures = failures; + volinfo->rebal.rebalance_failures = failures; if (run_time) - volinfo->rebalance_time = run_time; + volinfo->rebal.rebalance_time = run_time; return ret; } @@ -6715,7 +6807,8 @@ out: * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd () { +is_origin_glusterd () +{ int ret = 0; uuid_t lock_owner = {0,}; @@ -6725,3 +6818,63 @@ is_origin_glusterd () { return (uuid_compare (MY_UUID, lock_owner) == 0); } + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key) +{ + int ret = -1; + uuid_t task_id = {0,}; + char *uuid_str = NULL; + xlator_t *this = NULL; + + GF_ASSERT (dict); + + this = THIS; + GF_ASSERT (this); + + uuid_generate (task_id); + uuid_str = gf_strdup (uuid_utoa (task_id)); + if (!uuid_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s in dict", + key); + goto out; + } + gf_log (this->name, GF_LOG_INFO, "Generated task-id %s for key %s", + uuid_str, key); + +out: + if (ret) + GF_FREE (uuid_str); + return ret; +} + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key) +{ + int ret = -1; + char tmp_str[40] = {0,}; + char *task_id_str = NULL; + + GF_ASSERT (dict); + GF_ASSERT (key); + + uuid_unparse (uuid, tmp_str); + task_id_str = gf_strdup (tmp_str); + if (!task_id_str) + return -1; + + ret = dict_set_dynstr (dict, key, task_id_str); + if (ret) { + GF_FREE (task_id_str); + gf_log (THIS->name, GF_LOG_ERROR, + "Error setting uuid in dict with key %s", key); + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index c90c9d918..e5e6123cb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -486,4 +486,10 @@ gf_boolean_t glusterd_is_any_volume_in_server_quorum (xlator_t *this); gf_boolean_t does_gd_meet_server_quorum (xlator_t *this); + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key); + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 9ddeedb10..83e98f78c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -955,7 +955,7 @@ glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } - if (volinfo->rb_status != GF_RB_STATUS_NONE) { + if (volinfo->rep_brick.rb_status != GF_RB_STATUS_NONE) { snprintf (msg, sizeof(msg), "replace-brick session is " "in progress for the volume '%s'", volname); gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); @@ -1595,7 +1595,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) goto out; } - volinfo->defrag_status = 0; + volinfo->rebal.defrag_status = 0; list_add_tail (&volinfo->vol_list, &priv->volumes); vol_added = _gf_true; out: diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 204a6adfc..4ef2ab696 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -68,6 +68,33 @@ struct rpcsvc_program *all_programs[] = { }; int rpcsvc_programs_count = (sizeof (all_programs) / sizeof (all_programs[0])); +const char *gd_op_list[GD_OP_MAX + 1] = { + [GD_OP_NONE] = "Invalid op", + [GD_OP_CREATE_VOLUME] = "Create", + [GD_OP_DELETE_VOLUME] = "Delete", + [GD_OP_START_VOLUME] = "Start", + [GD_OP_STOP_VOLUME] = "Stop", + [GD_OP_DEFRAG_VOLUME] = "Rebalance", + [GD_OP_ADD_BRICK] = "Add brick", + [GD_OP_REMOVE_BRICK] = "Remove brick", + [GD_OP_REPLACE_BRICK] = "Replace brick", + [GD_OP_SET_VOLUME] = "Set", + [GD_OP_RESET_VOLUME] = "Reset", + [GD_OP_SYNC_VOLUME] = "Sync", + [GD_OP_LOG_ROTATE] = "Log rotate", + [GD_OP_GSYNC_SET] = "Geo-replication", + [GD_OP_PROFILE_VOLUME] = "Profile", + [GD_OP_QUOTA] = "Quota", + [GD_OP_STATUS_VOLUME] = "Status", + [GD_OP_REBALANCE] = "Rebalance", + [GD_OP_HEAL_VOLUME] = "Heal", + [GD_OP_STATEDUMP_VOLUME] = "Statedump", + [GD_OP_LIST_VOLUME] = "Lists", + [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks", + [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance", + [GD_OP_MAX] = "Invalid op" +}; + static int glusterd_opinfo_init () { diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 7152bd6a2..aaf4df9b9 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -83,6 +83,7 @@ typedef enum glusterd_op_ { GD_OP_MAX, } glusterd_op_t; +extern const char * gd_op_list[]; struct glusterd_store_iter_ { int fd; FILE *file; @@ -236,6 +237,29 @@ typedef enum glusterd_vol_backend_ { GD_VOL_BK_BD = 1, } glusterd_vol_backend_t; +struct glusterd_rebalance_ { + gf_defrag_status_t defrag_status; + uint64_t rebalance_files; + uint64_t rebalance_data; + uint64_t lookedup_files; + glusterd_defrag_info_t *defrag; + gf_cli_defrag_type defrag_cmd; + uint64_t rebalance_failures; + uuid_t rebalance_id; + double rebalance_time; + glusterd_op_t op; +}; + +typedef struct glusterd_rebalance_ glusterd_rebalance_t; + +struct glusterd_replace_brick_ { + gf_rb_status_t rb_status; + glusterd_brickinfo_t *src_brick; + glusterd_brickinfo_t *dst_brick; + uuid_t rb_id; +}; + +typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; struct glusterd_volinfo_ { char volname[GLUSTERD_MAX_VOLUME_NAME]; @@ -255,19 +279,10 @@ struct glusterd_volinfo_ { glusterd_store_handle_t *node_state_shandle; /* Defrag/rebalance related */ - gf_defrag_status_t defrag_status; - uint64_t rebalance_files; - uint64_t rebalance_data; - uint64_t lookedup_files; - glusterd_defrag_info_t *defrag; - gf_cli_defrag_type defrag_cmd; - uint64_t rebalance_failures; - double rebalance_time; + glusterd_rebalance_t rebal; /* Replace brick status */ - gf_rb_status_t rb_status; - glusterd_brickinfo_t *src_brick; - glusterd_brickinfo_t *dst_brick; + glusterd_replace_brick_t rep_brick; int version; uint32_t cksum; @@ -644,7 +659,8 @@ int glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req); int glusterd_handle_cli_label_volume (rpcsvc_request_t *req); int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd, defrag_cbk_fn_t cbk); + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op); int glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, glusterd_conf_t *priv, int cmd); -- cgit