From 924702de358160b2536138c073d293b76512838a Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Mon, 3 Dec 2012 12:16:28 -0500 Subject: glusterd: add "volume label" command This command is necessary when the local disk/filesystem containing a brick is unexpectedly lost and then recreated. Since 961bc80c, trying to start the brick will fail because the trusted.glusterfs.volume-id xattr is missing, and if we can't start it then we can't replace-brick or self-heal so we're stuck in a permanently degraded state. This command provides a way to label the empty brick with the proper volume ID so that further repair actions become possible. Change-Id: I1c1e5273a018b7a6b8d0852daf111ddc3fddfdc2 BUG: 860297 Signed-off-by: Jeff Darcy Reviewed-on: http://review.gluster.org/4259 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- cli/src/cli-cmd-volume.c | 61 +++++++++++++ cli/src/cli-rpc-ops.c | 61 +++++++++++++ rpc/rpc-lib/src/protocol-common.h | 1 + tests/bugs/bug-860297.t | 88 ++++++++++++++++-- xlators/mgmt/glusterd/src/glusterd-handler.c | 66 ++++++++++++++ xlators/mgmt/glusterd/src/glusterd-hooks.c | 1 + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 ++++ xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 1 + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 116 ++++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd.h | 6 ++ 10 files changed, 410 insertions(+), 7 deletions(-) mode change 100644 => 100755 tests/bugs/bug-860297.t diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 4dcb99ce2..f1526abae 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1790,6 +1790,62 @@ out: return ret; } +int +cli_cmd_volume_label_cbk (struct cli_state *state, struct cli_cmd_word *word, + const char **words, int wordcount) +{ + int ret = -1; + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + dict_t *options = NULL; + int sent = 0; + int parse_error = 0; + cli_local_t *local = NULL; + + frame = create_frame (THIS, THIS->ctx->pool); + if (!frame) + goto out; + + if (wordcount != 4) { + cli_usage_out (word->pattern); + parse_error = 1; + goto out; + } + + options = dict_new(); + if (!options) { + cli_out ("Could not allocate dict for label_volume"); + goto out; + } + + ret = dict_set_str (options, "volname", (char *)words[2]); + if (ret) + goto out; + + ret = dict_set_str (options, "brick", (char *)words[3]); + if (ret) + goto out; + + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LABEL_VOLUME]; + + CLI_LOCAL_INIT (local, words, frame, options); + + if (proc->fn) { + ret = proc->fn (frame, THIS, options); + } + +out: + if (ret) { + cli_cmd_sent_status_get (&sent); + if ((sent == 0) && (parse_error = 0)) + cli_out ("Volume label failed"); + } + + CLI_STACK_DESTROY (frame); + + return ret; +} + struct cli_cmd volume_cmds[] = { { "volume info [all|]", cli_cmd_volume_info_cbk, @@ -1900,6 +1956,11 @@ struct cli_cmd volume_cmds[] = { "Clear locks held on path" }, + {"volume label ", + cli_cmd_volume_label_cbk, + "Add a volume label to an empty replacement brick" + }, + { NULL, NULL, NULL } }; diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index da239b51e..e772137ed 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -6253,6 +6253,66 @@ out: return ret; } +int32_t +gf_cli_label_volume_cbk (struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + gf_cli_rsp rsp = {0,}; + int ret = -1; + dict_t *dict = NULL; + + if (-1 == req->rpc_status) + goto out; + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gf_cli_rsp); + if (ret < 0) { + + gf_log ("cli", GF_LOG_ERROR, "XDR decoding failed"); + goto out; + } + gf_log ("cli", GF_LOG_DEBUG, "Received response to label"); + + if (rsp.op_ret) { + cli_err ("Volume label unsuccessful"); + cli_err ("%s", rsp.op_errstr); + + } else { + cli_out ("Volume label successful"); + + } + + ret = rsp.op_ret; + +out: + if (dict) + dict_unref (dict); + cli_cmd_broadcast_response (ret); + return ret; +} + +int32_t +gf_cli_label_volume (call_frame_t *frame, xlator_t *this, void *data) +{ + gf_cli_req req = {{0,}}; + dict_t *options = NULL; + int ret = -1; + + if (!frame || !this || !data) + goto out; + + options = data; + + ret = cli_to_glusterd (&req, frame, gf_cli_label_volume_cbk, + (xdrproc_t) xdr_gf_cli_req, options, + GLUSTER_CLI_LABEL_VOLUME, this, cli_rpc_prog, + NULL); +out: + gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + + GF_FREE (req.dict.dict_val); + return ret; +} + int cli_to_glusterd (gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc, dict_t *dict, @@ -6364,6 +6424,7 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = { #ifdef HAVE_BD_XLATOR [GLUSTER_CLI_BD_OP] = {"BD_OP", gf_cli_bd_op}, #endif + [GLUSTER_CLI_LABEL_VOLUME] = {"LABEL_VOLUME", gf_cli_label_volume}, }; struct rpc_clnt_program cli_prog = { diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 97017e5fe..1f30d4322 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -155,6 +155,7 @@ enum gluster_cli_procnum { GLUSTER_CLI_CLRLOCKS_VOLUME, GLUSTER_CLI_UUID_RESET, GLUSTER_CLI_BD_OP, + GLUSTER_CLI_LABEL_VOLUME, GLUSTER_CLI_MAXVALUE, }; diff --git a/tests/bugs/bug-860297.t b/tests/bugs/bug-860297.t old mode 100644 new mode 100755 index 2a3ca7a7a..fa1b1ff28 --- a/tests/bugs/bug-860297.t +++ b/tests/bugs/bug-860297.t @@ -1,13 +1,87 @@ #!/bin/bash + . $(dirname $0)/../include.rc -cleanup; +cleanup + +function recreate { + # The rm is necessary so we don't get fooled by leftovers from old runs. + rm -rf $1 && mkdir -p $1 +} + +function count_bricks { + local count + local pid + count=0 + for pid in /var/lib/glusterd/vols/${1}/run/*pid; do + if kill -0 $(cat $pid); then + count=$((count+1)) + fi + done + echo $count +} TEST glusterd TEST pidof glusterd -TEST $CLI volume info -TEST $CLI volume create $V0 $H0:$B0/brick1 -setfattr -x trusted.glusterfs.volume-id $B0/brick1 -## If Extended attribute trusted.glusterfs.volume-id is not present -## then volume should not be able to start -TEST ! $CLI volume start $V0; +TEST $CLI volume info; + +## Start and create a volume +TEST recreate ${B0}/${V0}-0 +TEST recreate ${B0}/${V0}-1 +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1} + +function volinfo_field() +{ + local vol=$1; + local field=$2; + + $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +} + + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Start volume and verify that all bricks start. +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT 2 count_bricks $V0 +TEST $CLI volume stop $V0 + +# Nuke one of the bricks and make sure it *doesn't* start. +TEST recreate ${B0}/${V0}-1 +# We can't do the usual TEST/startup thing here because of another bug. If +# a server fails to start a brick, it won't start any others either. Since +# all of our bricks in testing are on one server, that means no bricks start +# and so the volume doesn't start either. Changing the order etc. doesn't +# help, because the attempted startup order is non-deterministic. Instead, +# we just don't rely on whether or not the volume starts; the brick count is +# sufficient for our purposes. +$CLI volume start $V0; +EXPECT 1 count_bricks $V0 +# If we can't depend on the volume starting, we can't depend on it stopping +# either. +$CLI volume stop $V0 + +# Label the recreated brick and make sure it starts now. +TEST $CLI volume label $V0 ${H0}:${B0}/${V0}-1 +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT 2 count_bricks $V0 + +# Make sure we can mount and use the volume. +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 +TEST dd if=/dev/zero of=$M0/block bs=4k count=1 + +if [ "$EXIT_EARLY" = "1" ]; then + exit 0; +fi + +## Finish up +TEST umount $M0 +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 673bfaeb7..9bbfdba8b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -2934,6 +2934,69 @@ out: return ret; } +int +glusterd_handle_cli_label_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + glusterd_op_t cli_op = GD_OP_LABEL_VOLUME; + char *volname = NULL; + dict_t *dict = NULL; + + GF_ASSERT (req); + + ret = -1; + if (!xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req)) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (THIS->name, GF_LOG_ERROR, + "failed to unserialize req-buffer to" + " dictionary"); + goto out; + } + + } else { + ret = -1; + gf_log (THIS->name, GF_LOG_ERROR, "Empty cli request."); + goto out; + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname"); + goto out; + } + + gf_log (THIS->name, GF_LOG_INFO, "Received label volume req " + "for volume %s", volname); + + ret = glusterd_op_begin (req, cli_op, dict); + +out: + glusterd_friend_sm (); + glusterd_op_sm (); + + if (ret) { + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, "operation failed"); + if (dict) + dict_unref (dict); + } + free (cli_req.dict.dict_val); + + return ret; +} + int glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, @@ -3239,6 +3302,9 @@ rpcsvc_actor_t gd_svc_cli_actors[] = { #ifdef HAVE_BD_XLATOR [GLUSTER_CLI_BD_OP] = {"BD_OP", GLUSTER_CLI_BD_OP, glusterd_handle_cli_bd_op, NULL, 0}, #endif + [GLUSTER_CLI_LABEL_VOLUME] = {"LABEL_VOLUME", GLUSTER_CLI_LABEL_VOLUME, + glusterd_handle_cli_label_volume, NULL, + 0}, }; struct rpcsvc_program gd_svc_cli_prog = { diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c index a61e1e85f..543fdc1e6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-hooks.c +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c @@ -59,6 +59,7 @@ char glusterd_hook_dirnames[GD_OP_MAX][256] = [GD_OP_LIST_VOLUME] = EMPTY, [GD_OP_CLEARLOCKS_VOLUME] = EMPTY, [GD_OP_DEFRAG_BRICK_VOLUME] = EMPTY, + [GD_OP_LABEL_VOLUME] = EMPTY, }; #undef EMPTY diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 48ae5b660..66b58eca4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -2373,6 +2373,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) #ifdef HAVE_BD_XLATOR case GD_OP_BD_OP: #endif + case GD_OP_LABEL_VOLUME: { ret = dict_get_str (dict, "volname", &volname); if (ret) { @@ -3502,15 +3503,23 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_op_stage_statedump_volume (dict, op_errstr); break; + case GD_OP_CLEARLOCKS_VOLUME: ret = glusterd_op_stage_clearlocks_volume (dict, op_errstr); break; + #ifdef HAVE_BD_XLATOR case GD_OP_BD_OP: ret = glusterd_op_stage_bd (dict, op_errstr); break; + #endif + + case GD_OP_LABEL_VOLUME: + ret = glusterd_op_stage_label_volume (dict, op_errstr); + break; + default: gf_log ("", GF_LOG_ERROR, "Unknown op %d", op); @@ -3606,11 +3615,17 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, case GD_OP_CLEARLOCKS_VOLUME: ret = glusterd_op_clearlocks_volume (dict, op_errstr); break; + #ifdef HAVE_BD_XLATOR case GD_OP_BD_OP: ret = 0; break; #endif + + case GD_OP_LABEL_VOLUME: + ret = glusterd_op_label_volume (dict, op_errstr); + break; + default: gf_log ("", GF_LOG_ERROR, "Unknown op %d", op); @@ -5386,6 +5401,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx) #ifdef HAVE_BD_XLATOR case GD_OP_BD_OP: #endif + case GD_OP_LABEL_VOLUME: dict_unref (ctx); break; default: diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 21fad7e93..2b3e7b8e7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -140,6 +140,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_CLEARLOCKS_VOLUME: case GD_OP_HEAL_VOLUME: case GD_OP_BD_OP: + case GD_OP_LABEL_VOLUME: { /*nothing specific to be done*/ break; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 263c8bf01..9ddeedb10 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1241,6 +1241,52 @@ out: return ret; } +int +glusterd_op_stage_label_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + gf_boolean_t exists = _gf_false; + char msg[2048] = {0}; + char *brick = NULL; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if (!exists) { + snprintf (msg, sizeof (msg), "Volume %s does not exist", + volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "brick", &brick); + if (ret) { + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, NULL); + if (ret) { + snprintf (msg, sizeof (msg), "Incorrect brick %s " + "for volume %s", brick, volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + #ifdef HAVE_BD_XLATOR int glusterd_op_stage_bd (dict_t *dict, char **op_errstr) @@ -2053,3 +2099,73 @@ out: return ret; } + +int +glusterd_op_label_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + char *volname = NULL; + char *brick = NULL; + glusterd_brickinfo_t *tmpbrkinfo = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "volname not found"); + goto out; + } + + ret = dict_get_str (dict, "brick", &brick); + /* If no brick is specified, do log-rotate for + all the bricks in the volume */ + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "no brick specified"); + goto out; + } + + ret = glusterd_brickinfo_new_from_brick (brick, &tmpbrkinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "cannot get brickinfo from brick"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + ret = -1; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if ((strcmp (tmpbrkinfo->hostname, brickinfo->hostname) || + strcmp (tmpbrkinfo->path,brickinfo->path))) + continue; + + ret = sys_lsetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY, + volinfo->volume_id, + sizeof(volinfo->volume_id), XATTR_CREATE); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "failed to set %s on %s: %s", + GF_XATTR_VOL_ID_KEY, brickinfo->path, + strerror(errno)); + } + break; + } + +out: + if (tmpbrkinfo) + glusterd_brickinfo_delete (tmpbrkinfo); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 66adff741..7152bd6a2 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -79,6 +79,7 @@ typedef enum glusterd_op_ { GD_OP_CLEARLOCKS_VOLUME, GD_OP_DEFRAG_BRICK_VOLUME, GD_OP_BD_OP, + GD_OP_LABEL_VOLUME, GD_OP_MAX, } glusterd_op_t; @@ -640,6 +641,7 @@ int32_t glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx); int glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req); int glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req); +int glusterd_handle_cli_label_volume (rpcsvc_request_t *req); int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, size_t len, int cmd, defrag_cbk_fn_t cbk); @@ -685,6 +687,10 @@ int glusterd_op_statedump_volume (dict_t *dict, char **op_errstr); int glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr); int glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr); + +int glusterd_op_stage_label_volume (dict_t *dict, char **op_errstr); +int glusterd_op_label_volume (dict_t *dict, char **op_errstr); + int glusterd_op_stage_bd (dict_t *dict, char **op_errstr); /* misc */ -- cgit