From 936f8aeac3252951e7fa0cdaa5d260fad3bd5ea0 Mon Sep 17 00:00:00 2001 From: Anuradha Talur Date: Mon, 22 Aug 2016 13:22:03 -0400 Subject: glusterd : Introduce reset brick The command basically allows replace brick with src and dst bricks as same. Usage: gluster v reset-brick start This command kills the brick to be reset. Once this command is run, admin can do other manual operations that they need to do, like configuring some options for the brick. Once this is done, resetting the brick can be continued with the following options. gluster v reset-brick commit {force} Does the job of resetting the brick. 'force' option should be used when the brick already contains volinfo id. Problem: On doing a disk-replacement of a brick in a replicate volume the following 2 scenarios may occur : a) there is a chance that reads are served from this replaced-disk brick, which leads to empty reads. b) potential data loss if next writes succeed only on replaced brick, and heal is done to other bricks from this one. Solution: After disk-replacement, make sure that reset-brick command is run for that brick so that pending markers are set for the brick and it is not chosen as source for reads and heal. But, as of now replace-brick for the same brick-path is not allowed. In order to fix the above mentioned problem, same brick-path replace-brick is needed. With this patch reset-brick commit {force} will be allowed even when source and destination are identical as long as 1) destination brick is not alive 2) source and destination brick have the same brick uuid and path. Also, the destination brick after replace-brick will use the same port as the source brick. Change-Id: I440b9e892ffb781ea4b8563688c3f85c7a7c89de BUG: 1266876 Signed-off-by: Anuradha Talur Reviewed-on: http://review.gluster.org/12250 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Ashish Pandey Reviewed-by: Pranith Kumar Karampuri --- xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 349 ++++++--------------- 1 file changed, 88 insertions(+), 261 deletions(-) (limited to 'xlators/mgmt/glusterd/src/glusterd-replace-brick.c') diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 0df5d437ca5..687ff004eb5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -27,13 +27,6 @@ #include -#define GLUSTERD_GET_RB_MNTPT(path, len, volinfo) \ - snprintf (path, len, \ - DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \ - volinfo->volname); - -extern uuid_t global_txn_id; - int glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (rpcsvc_request_t *req, glusterd_op_t op, @@ -46,14 +39,17 @@ __glusterd_handle_replace_brick (rpcsvc_request_t *req) dict_t *dict = NULL; char *src_brick = NULL; char *dst_brick = NULL; - int32_t op = 0; + char *cli_op = NULL; + glusterd_op_t op = -1; char *volname = NULL; char msg[2048] = {0,}; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; GF_ASSERT (req); this = THIS; GF_ASSERT (this); + conf = this->private; ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { @@ -95,7 +91,7 @@ __glusterd_handle_replace_brick (rpcsvc_request_t *req) goto out; } - ret = dict_get_int32 (dict, "operation", &op); + ret = dict_get_str (dict, "operation", &cli_op); if (ret) { gf_msg_debug (this->name, 0, "dict_get on operation failed"); @@ -103,6 +99,19 @@ __glusterd_handle_replace_brick (rpcsvc_request_t *req) goto out; } + op = gd_cli_to_gd_op (cli_op); + + if (conf->op_version < GD_OP_VERSION_3_9_0 && + strcmp (cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) { + snprintf (msg, sizeof (msg), "Cannot execute command. The " + "cluster is operating at version %d. reset-brick " + "command %s is unavailable in this version.", + conf->op_version, + gd_rb_op_to_str (cli_op)); + ret = -1; + goto out; + } + ret = dict_get_str (dict, "src-brick", &src_brick); if (ret) { @@ -114,52 +123,60 @@ __glusterd_handle_replace_brick (rpcsvc_request_t *req) gf_msg_debug (this->name, 0, "src brick=%s", src_brick); - ret = dict_get_str (dict, "dst-brick", &dst_brick); + if (!strcmp (cli_op, "GF_RESET_OP_COMMIT") || + !strcmp (cli_op, "GF_RESET_OP_COMMIT_FORCE") || + !strcmp (cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) { + ret = dict_get_str (dict, "dst-brick", &dst_brick); - if (ret) { - snprintf (msg, sizeof (msg), "Failed to get dest brick"); - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_GET_FAILED, "%s", msg); - goto out; + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get" + "dest brick"); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + + gf_msg_debug (this->name, 0, "dst brick=%s", dst_brick); } - gf_msg_debug (this->name, 0, "dst brick=%s", dst_brick); gf_msg (this->name, GF_LOG_INFO, 0, - GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD, - "Received replace brick commit-force " - "request operation"); + (op == GD_OP_REPLACE_BRICK) ? + GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD : + GD_MSG_RESET_BRICK_COMMIT_FORCE_REQ_RCVD, + "Received %s request.", + gd_rb_op_to_str (cli_op)); + + gf_event ((op == GD_OP_REPLACE_BRICK) ? EVENT_BRICK_REPLACE : + EVENT_BRICK_RESET, "received %s request. Source bricks %s," + "destination brick %s.", gd_rb_op_to_str (cli_op), + src_brick, (dst_brick) ? dst_brick : ""); ret = glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (req, - GD_OP_REPLACE_BRICK, dict); + op, dict); out: + if (ret) { + glusterd_op_send_cli_response (op, ret, 0, req, + dict, msg); + } + ret = 0; free (cli_req.dict.dict_val);//malloced by xdr return ret; } int -glusterd_handle_replace_brick (rpcsvc_request_t *req) +glusterd_handle_reset_brick (rpcsvc_request_t *req) { return glusterd_big_locked_handler (req, __glusterd_handle_replace_brick); } -static int -glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t **brickinfo) +int +glusterd_handle_replace_brick (rpcsvc_request_t *req) { - int32_t ret = -1; - - if (!volinfo || !brickinfo) - goto out; - - *brickinfo = volinfo->rep_brick.dst_brick; - - ret = 0; - -out: - return ret; + return glusterd_big_locked_handler (req, + __glusterd_handle_replace_brick); } int @@ -170,21 +187,22 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, char *src_brick = NULL; char *dst_brick = NULL; char *volname = NULL; - char *replace_op = NULL; + char *op = NULL; + glusterd_op_t gd_op = -1; glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *src_brickinfo = NULL; char *host = NULL; char *path = NULL; char msg[2048] = {0}; - char *dup_dstbrick = NULL; glusterd_peerinfo_t *peerinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; glusterd_conf_t *priv = NULL; char pidfile[PATH_MAX] = {0}; xlator_t *this = NULL; gf_boolean_t is_force = _gf_false; - gsync_status_param_t param = {0,}; - char *c = NULL; + pid_t pid = -1; + uuid_t volume_id = {0,}; + char *dup_dstbrick = NULL; this = THIS; GF_ASSERT (this); @@ -192,193 +210,41 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, priv = this->private; GF_ASSERT (priv); - ret = dict_get_str (dict, "src-brick", &src_brick); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_GET_FAILED, "Unable to get src brick"); - goto out; - } - - gf_msg_debug (this->name, 0, "src brick=%s", src_brick); - - ret = dict_get_str (dict, "dst-brick", &dst_brick); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_GET_FAILED, "Unable to get dest brick"); - goto out; - } - - gf_msg_debug (this->name, 0, "dst brick=%s", dst_brick); - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); - goto out; - } - - ret = dict_get_str (dict, "operation", &replace_op); - if (ret) { - gf_msg_debug (this->name, 0, - "dict get on replace-brick operation failed"); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - snprintf (msg, sizeof (msg), "volume: %s does not exist", - volname); - *op_errstr = gf_strdup (msg); - goto out; - } - - if (GLUSTERD_STATUS_STARTED != volinfo->status) { - ret = -1; - snprintf (msg, sizeof (msg), "volume: %s is not started", - volname); - *op_errstr = gf_strdup (msg); - goto out; - } - - ret = glusterd_disallow_op_for_tier (volinfo, GD_OP_REPLACE_BRICK, -1); - if (ret) { - snprintf (msg, sizeof (msg), "Replace brick commands are not " - "supported on tiered volume %s", volname); - *op_errstr = gf_strdup (msg); - goto out; - } - - if (!glusterd_store_is_valid_brickpath (volname, dst_brick) || - !glusterd_is_valid_volfpath (volname, dst_brick)) { - snprintf (msg, sizeof (msg), "brick path %s is too " - "long.", dst_brick); - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_BRKPATH_TOO_LONG, "%s", msg); - *op_errstr = gf_strdup (msg); - - ret = -1; - goto out; - } - - /* If geo-rep is configured, for this volume, it should be stopped. */ - param.volinfo = volinfo; - ret = glusterd_check_geo_rep_running (¶m, op_errstr); - if (ret || param.is_active) { - ret = -1; + ret = glusterd_brick_op_prerequisites (dict, &op, &gd_op, + &volname, &volinfo, + &src_brick, &src_brickinfo, + pidfile, + op_errstr, rsp_dict); + if (ret) goto out; - } - if (glusterd_is_defrag_on(volinfo)) { - snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " - "progress. Please retry after completion", volname); - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_OIP_RETRY_LATER, "%s", msg); - *op_errstr = gf_strdup (msg); + if (strcmp (op, "GF_REPLACE_OP_COMMIT_FORCE")) { ret = -1; goto out; - } - - if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) { - is_force = _gf_true; } else { - ret = -1; - goto out; - } - - ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, - &src_brickinfo, - _gf_false); - if (ret) { - snprintf (msg, sizeof (msg), "brick: %s does not exist in " - "volume: %s", src_brick, volname); - *op_errstr = gf_strdup (msg); - goto out; - } - - if (dict) { - if (!glusterd_is_fuse_available ()) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_RB_CMD_FAIL, "Unable to open /dev/" - "fuse (%s), replace-brick command failed", - strerror (errno)); - snprintf (msg, sizeof(msg), "Fuse unavailable\n " - "Replace-brick failed"); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - } - - if (gf_is_local_addr (src_brickinfo->hostname)) { - gf_msg_debug (this->name, 0, - "I AM THE SOURCE HOST"); - if (src_brickinfo->port && rsp_dict) { - ret = dict_set_int32 (rsp_dict, "src-brick-port", - src_brickinfo->port); - if (ret) { - gf_msg_debug (this->name, 0, - "Could not set src-brick-port=%d", - src_brickinfo->port); - } - } - - GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, src_brickinfo, - priv); - - } - - dup_dstbrick = gf_strdup (dst_brick); - if (!dup_dstbrick) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - GD_MSG_NO_MEMORY, "Memory allocation failed"); - goto out; - } - - /* - * IPv4 address contains '.' and ipv6 addresses contains ':' - * So finding the last occurance of ':' to - * mark the start of brick path - */ - c = strrchr(dup_dstbrick, ':'); - if (c != NULL) { - c[0] = '\0'; - host = dup_dstbrick; - path = c++; - } - - if (!host || !path) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_BAD_FORMAT, - "dst brick %s is not of form :", - dst_brick); - ret = -1; - goto out; - } + is_force = _gf_true; + } - ret = glusterd_brickinfo_new_from_brick (dst_brick, &dst_brickinfo, - _gf_true, NULL); + ret = glusterd_get_dst_brick_info (&dst_brick, volname, + op_errstr, + &dst_brickinfo, &host, + dict, &dup_dstbrick); if (ret) goto out; ret = glusterd_new_brick_validate (dst_brick, dst_brickinfo, - msg, sizeof (msg)); - if (ret) { + msg, sizeof (msg), op); + /* fail if brick being replaced with itself */ + if (ret) { *op_errstr = gf_strdup (msg); ret = -1; gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr); goto out; - } + } - if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) { - - volinfo->rep_brick.src_brick = src_brickinfo; - volinfo->rep_brick.dst_brick = dst_brickinfo; - } + volinfo->rep_brick.src_brick = src_brickinfo; + volinfo->rep_brick.dst_brick = dst_brickinfo; if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) { @@ -484,48 +350,8 @@ rb_kill_destination_brick (glusterd_volinfo_t *volinfo, return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true); } -static int -rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict, - dict_t *req_dict, char *replace_op) -{ - int ret = 0; - int dict_ret = 0; - int dst_port = 0; - - dict_ret = dict_get_int32 (req_dict, "dst-brick-port", &dst_port); - if (!dict_ret) - dst_brickinfo->port = dst_port; - - if (gf_is_local_addr (dst_brickinfo->hostname)) { - gf_msg ("glusterd", GF_LOG_INFO, 0, - GD_MSG_BRK_PORT_NO_ADD_INDO, - "adding dst-brick port no"); - - if (rsp_dict) { - ret = dict_set_int32 (rsp_dict, "dst-brick-port", - dst_brickinfo->port); - if (ret) { - gf_msg_debug ("glusterd", 0, - "Could not set dst-brick port no in rsp dict"); - goto out; - } - } - - if (req_dict) { - ret = dict_set_int32 (req_dict, "dst-brick-port", - dst_brickinfo->port); - if (ret) { - gf_msg_debug ("glusterd", 0, - "Could not set dst-brick port no"); - goto out; - } - } - } -out: - return ret; -} -static int +int glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, char *old_brick, char *new_brick, dict_t *dict) @@ -563,6 +389,7 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, strncpy (new_brickinfo->brick_id, old_brickinfo->brick_id, sizeof (new_brickinfo->brick_id)); + new_brickinfo->port = old_brickinfo->port; /* A bricks mount dir is required only by snapshots which were * introduced in gluster-3.6.0 @@ -579,8 +406,8 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, sizeof(new_brickinfo->mount_dir)); } - cds_list_add_tail (&new_brickinfo->brick_list, - &old_brickinfo->brick_list); + cds_list_add (&new_brickinfo->brick_list, + &old_brickinfo->brick_list); volinfo->brick_count++; @@ -699,11 +526,11 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, - dict, replace_op); + dict); if (ret) goto out; - if (strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) { + if (strcmp (replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) { ret = -1; goto out; } @@ -722,8 +549,8 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_svcs_stop (volinfo); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_NFS_SERVER_STOP_FAIL, - "Unable to stop nfs server, ret: %d", ret); + GD_MSG_GLUSTER_SERVICES_STOP_FAIL, + "Unable to stop gluster services, ret: %d", ret); } ret = glusterd_op_perform_replace_brick (volinfo, src_brick, @@ -732,7 +559,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) gf_msg (this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_ADD_FAIL, "Unable to add dst-brick: " "%s to volume: %s", dst_brick, volinfo->volname); - (void) glusterd_svcs_manager (volinfo); + (void) glusterd_svcs_manager (volinfo); goto out; } @@ -741,8 +568,8 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_svcs_manager (volinfo); if (ret) { gf_msg (this->name, GF_LOG_CRITICAL, 0, - GD_MSG_NFS_VOL_FILE_GEN_FAIL, - "Failed to generate nfs volume file"); + GD_MSG_GLUSTER_SERVICE_START_FAIL, + "Failed to start one or more gluster services."); } @@ -752,7 +579,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) volinfo->rep_brick.dst_brick = NULL; if (!ret) - ret = glusterd_store_volinfo (volinfo, + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) gf_msg (this->name, GF_LOG_ERROR, 0, -- cgit