From 25daa42911d2ff697880ee29c591cac5f2abebed Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Fri, 9 Sep 2011 09:42:51 +0530 Subject: support for de-commissioning a node using 'remove-brick' to achieve this, we now create volume-file with 'decommissioned-nodes' option in distribute volume, then just perform the rebalance set of operations (with 'force' flag set). now onwards, the 'remove-brick' (with 'start' option) operation tries to migrate data from removed bricks to existing bricks. 'remove-brick' also supports similar options as of replace-brick. * (no options) -> works as 'force', will have the current behavior of remove-brick, ie., no data-migration, volume changes. * start (starts remove-brick with data-migration/draining process, which takes care of migrating data and once complete, will commit the changes to volume file) * pause (stop data migration, but keep the volume file intact with extra options whatever is set) * abort (stop data-migration, and fall back to old configuration) * commit (if volume is stopped, commits the changes to volumefile) * force (stops the data-migration and commits the changes to volume file) Change-Id: I3952bcfbe604a0952e68b6accace7014d5e401d3 BUG: 1952 Reviewed-on: http://review.gluster.com/118 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- cli/src/cli-cmd-parser.c | 51 ++++++++++++--- cli/src/cli-cmd-volume.c | 18 +++--- cli/src/cli-rpc-ops.c | 160 ++++++++++++++++++++++++++++++++++++++++------- cli/src/cli.h | 2 +- 4 files changed, 192 insertions(+), 39 deletions(-) (limited to 'cli') diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 3e68b2cce8e..8ea6581aff8 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -753,7 +753,7 @@ out: int32_t cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, - dict_t **options) + dict_t **options, int *question) { dict_t *dict = NULL; char *volname = NULL; @@ -765,6 +765,10 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, int32_t j = 0; char *tmp_brick = NULL; char *tmp_brick1 = NULL; + char *opwords[] = { "start", "commit", "pause", "abort", "status", + "force", NULL }; + char *w = NULL; + int32_t command = GF_OP_CMD_NONE; GF_ASSERT (words); GF_ASSERT (options); @@ -782,19 +786,53 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, GF_ASSERT (volname); ret = dict_set_str (dict, "volname", volname); - if (ret) goto out; + w = str_getunamb (words[wordcount - 1], opwords); + if (!w) { + /* Should be default 'force' */ + command = GF_OP_CMD_COMMIT_FORCE; + if (question) + *question = 1; + } else { + /* handled this option */ + wordcount--; + if (!strcmp ("start", w)) { + command = GF_OP_CMD_START; + } else if (!strcmp ("commit", w)) { + command = GF_OP_CMD_COMMIT; + if (question) + *question = 1; + } else if (!strcmp ("pause", w)) { + command = GF_OP_CMD_PAUSE; + } else if (!strcmp ("abort", w)) { + command = GF_OP_CMD_ABORT; + } else if (!strcmp ("status", w)) { + command = GF_OP_CMD_STATUS; + } else if (!strcmp ("force", w)) { + command = GF_OP_CMD_COMMIT_FORCE; + if (question) + *question = 1; + } else { + GF_ASSERT (!"opword mismatch"); + ret = -1; + goto out; + } + } + if (wordcount < 4) { ret = -1; goto out; } - brick_index = 3; - + ret = dict_set_int32 (dict, "command", command); if (ret) - goto out; + gf_log ("cli", GF_LOG_INFO, "failed to set 'command' %d", + command); + + + brick_index = 3; tmp_index = brick_index; tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char); @@ -805,7 +843,7 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, ret = -1; goto out; } - + tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char); if (!tmp_brick1) { @@ -850,7 +888,6 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, } ret = dict_set_int32 (dict, "count", brick_count); - if (ret) goto out; diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 68c5ef57870..16dc32328d2 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -800,6 +800,7 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state, gf_answer_t answer = GF_ANSWER_NO; int sent = 0; int parse_error = 0; + int need_question = 0; const char *question = "Removing brick(s) can result in data loss. " "Do you want to Continue?"; @@ -808,7 +809,8 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state, if (!frame) goto out; - ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options); + ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options, + &need_question); if (ret) { cli_usage_out (word->pattern); @@ -816,11 +818,13 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state, goto out; } - answer = cli_cmd_get_confirmation (state, question); - - if (GF_ANSWER_NO == answer) { - ret = 0; - goto out; + if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) { + /* we need to ask question only in case of 'commit or force' */ + answer = cli_cmd_get_confirmation (state, question); + if (GF_ANSWER_NO == answer) { + ret = 0; + goto out; + } } proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK]; @@ -1304,7 +1308,7 @@ struct cli_cmd volume_cmds[] = { cli_cmd_volume_add_brick_cbk, "add brick to volume "}, - { "volume remove-brick ...", + { "volume remove-brick ... {start|pause|abort|status|commit|force}", cli_cmd_volume_remove_brick_cbk, "remove brick from volume "}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 3d6ce25ef1b..d7a5988f238 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -856,23 +856,36 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, "rebalance process"); goto done; } - if (rsp.op_errno == 0) + + switch (rsp.op_errno) { + case GF_DEFRAG_STATUS_NOT_STARTED: status = "not started"; - if (rsp.op_errno == 1) + break; + case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED: status = "step 1: layout fix in progress"; - if (rsp.op_errno == 2) + break; + case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED: status = "step 2: data migration in progress"; - if (rsp.op_errno == 3) + break; + case GF_DEFRAG_STATUS_STOPPED: status = "stopped"; - if (rsp.op_errno == 4) + break; + case GF_DEFRAG_STATUS_COMPLETE: status = "completed"; - if (rsp.op_errno == 5) + break; + case GF_DEFRAG_STATUS_FAILED: status = "failed"; - if (rsp.op_errno == 6) + break; + case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: status = "step 1: layout fix complete"; - if (rsp.op_errno == 7) + break; + case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE: status = "step 2: data migration complete"; - + break; + case GF_DEFRAG_STATUS_PAUSED: + status = "paused"; + break; + } if (rsp.files && (rsp.op_errno == 1)) { cli_out ("rebalance %s: fixed layout %"PRId64, status, rsp.files); @@ -1064,6 +1077,87 @@ out: return ret; } +int +gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gf2_cli_defrag_vol_rsp rsp = {0,}; + char *status = "unknown"; + int ret = 0; + + if (-1 == req->rpc_status) { + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error"); + goto out; + } + + ret = rsp.op_ret; + if (rsp.op_ret == -1) { + if (strcmp (rsp.op_errstr, "")) + cli_out ("%s", rsp.op_errstr); + else + cli_out ("failed to get the status of " + "remove-brick process"); + goto out; + } + + switch (rsp.op_errno) { + case GF_DEFRAG_STATUS_NOT_STARTED: + status = "not started"; + break; + case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED: + case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED: + case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: + status = "in progress"; + break; + case GF_DEFRAG_STATUS_STOPPED: + status = "stopped"; + break; + case GF_DEFRAG_STATUS_COMPLETE: + case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE: + status = "completed"; + break; + case GF_DEFRAG_STATUS_FAILED: + status = "failed"; + break; + case GF_DEFRAG_STATUS_PAUSED: + status = "paused"; + break; + } + + if (rsp.files && (rsp.op_errno == 1)) { + cli_out ("remove-brick %s: fixed layout %"PRId64, + status, rsp.files); + goto out; + } + if (rsp.files && (rsp.op_errno == 6)) { + cli_out ("remove-brick %s: fixed layout %"PRId64, + status, rsp.files); + goto out; + } + if (rsp.files) { + cli_out ("remove-brick %s: decommissioned %"PRId64 + " files of size %"PRId64, status, + rsp.files, rsp.size); + goto out; + } + + cli_out ("remove-brick %s", status); + +out: + if (rsp.op_errstr) + free (rsp.op_errstr); //malloced by xdr + if (rsp.volname) + free (rsp.volname); //malloced by xdr + cli_cmd_broadcast_response (ret); + return ret; +} + int gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov, @@ -2160,8 +2254,11 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this, void *data) { gf1_cli_remove_brick_req req = {0,}; + gf1_cli_defrag_vol_req status_req = {0,}; int ret = 0; - dict_t *dict = NULL; + dict_t *dict = NULL; + int32_t command = 0; + char *volname = NULL; if (!frame || !this || !data) { ret = -1; @@ -2170,30 +2267,45 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this, dict = data; - ret = dict_get_str (dict, "volname", &req.volname); - + ret = dict_get_str (dict, "volname", &volname); if (ret) goto out; ret = dict_get_int32 (dict, "count", &req.count); - if (ret) goto out; - ret = dict_allocate_and_serialize (dict, - &req.bricks.bricks_val, - (size_t *)&req.bricks.bricks_len); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "failed to get serialized length of dict"); + ret = dict_get_int32 (dict, "command", &command); + if (ret) goto out; - } - ret = cli_cmd_submit (&req, frame, cli_rpc_prog, - GLUSTER_CLI_REMOVE_BRICK, NULL, - this, gf_cli3_1_remove_brick_cbk, - (xdrproc_t) xdr_gf1_cli_remove_brick_req); + if (command != GF_OP_CMD_STATUS) { + req.volname = volname; + + ret = dict_allocate_and_serialize (dict, + &req.bricks.bricks_val, + (size_t *)&req.bricks.bricks_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict"); + goto out; + } + + ret = cli_cmd_submit (&req, frame, cli_rpc_prog, + GLUSTER_CLI_REMOVE_BRICK, NULL, + this, gf_cli3_1_remove_brick_cbk, + (xdrproc_t) xdr_gf1_cli_remove_brick_req); + } else { + /* Need rebalance status to e sent :-) */ + status_req.volname = volname; + status_req.cmd = GF_DEFRAG_CMD_STATUS; + ret = cli_cmd_submit (&status_req, frame, cli_rpc_prog, + GLUSTER_CLI_DEFRAG_VOLUME, NULL, + this, gf_cli3_remove_brick_status_cbk, + (xdrproc_t) xdr_gf1_cli_defrag_vol_req); + + } out: gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); diff --git a/cli/src/cli.h b/cli/src/cli.h index d3e1fc21bfa..1e0d69cd8a2 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -212,7 +212,7 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount, int32_t cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, - dict_t **options); + dict_t **options, int *question); int32_t cli_cmd_volume_replace_brick_parse (const char **words, int wordcount, -- cgit