From 4da244caccd38a77de5428b6954f565219ef0719 Mon Sep 17 00:00:00 2001 From: Sanju Rakonde Date: Fri, 6 Apr 2018 01:53:45 +0530 Subject: glusterd: handling brick termination in brick-mux Problem: There's a race between the glusterfs_handle_terminate() response sent to glusterd from last brick of the process and the socket disconnect event that encounters after the brick process got killed. Solution: When it is a last brick for the brick process, instead of sending GLUSTERD_BRICK_TERMINATE to brick process, glusterd will kill the process (same as we do it in case of non brick multiplecing). The test case is added for https://bugzilla.redhat.com/show_bug.cgi?id=1549996 Change-Id: If94958cd7649ea48d09d6af7803a0f9437a85503 fixes: bz#1545048 Signed-off-by: Sanju Rakonde --- glusterfsd/src/glusterfsd-mgmt.c | 24 ++++++++--------- tests/bugs/glusterd/stale-brick-proc-brick-mux.t | 33 ++++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-pmap.c | 3 ++- xlators/mgmt/glusterd/src/glusterd-syncop.c | 17 ------------ xlators/mgmt/glusterd/src/glusterd-utils.c | 11 +++++--- xlators/mgmt/glusterd/src/glusterd-utils.h | 3 ++- 6 files changed, 56 insertions(+), 35 deletions(-) create mode 100644 tests/bugs/glusterd/stale-brick-proc-brick-mux.t diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index db961c86304..e30d9287575 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -174,7 +174,6 @@ glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret) ret = dict_allocate_and_serialize (dict, &rsp.output.output_val, &rsp.output.output_len); - if (ret == 0) ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); @@ -188,15 +187,15 @@ glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret) int glusterfs_handle_terminate (rpcsvc_request_t *req) { - gd1_mgmt_brick_op_req xlator_req = {0,}; + gd1_mgmt_brick_op_req xlator_req = {0,}; ssize_t ret; - glusterfs_ctx_t *ctx = NULL; - xlator_t *top = NULL; - xlator_t *victim = NULL; - xlator_t *tvictim = NULL; - xlator_list_t **trav_p = NULL; - gf_boolean_t lockflag = _gf_false; - gf_boolean_t last_brick = _gf_false; + glusterfs_ctx_t *ctx = NULL; + xlator_t *top = NULL; + xlator_t *victim = NULL; + xlator_t *tvictim = NULL; + xlator_list_t **trav_p = NULL; + gf_boolean_t lockflag = _gf_false; + gf_boolean_t still_bricks_attached = _gf_false; ret = xdr_to_generic (req->msg[0], &xlator_req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); @@ -240,15 +239,16 @@ glusterfs_handle_terminate (rpcsvc_request_t *req) glusterfs_terminate_response_send (req, 0); for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { tvictim = (*trav_p)->xlator; - if (!tvictim->cleanup_starting && !strcmp (tvictim->name, xlator_req.name)) { + if (!tvictim->cleanup_starting && + !strcmp (tvictim->name, xlator_req.name)) { continue; } if (!tvictim->cleanup_starting) { - last_brick = _gf_true; + still_bricks_attached = _gf_true; break; } } - if (!last_brick) { + if (!still_bricks_attached) { gf_log (THIS->name, GF_LOG_INFO, "terminating after loss of last child %s", xlator_req.name); diff --git a/tests/bugs/glusterd/stale-brick-proc-brick-mux.t b/tests/bugs/glusterd/stale-brick-proc-brick-mux.t new file mode 100644 index 00000000000..f0a89760bb6 --- /dev/null +++ b/tests/bugs/glusterd/stale-brick-proc-brick-mux.t @@ -0,0 +1,33 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../cluster.rc + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +cleanup; + +TEST launch_cluster 2 +TEST $CLI_1 peer probe $H2; +EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count + +#bug-1549996 - stale brick processes on the nodes after volume deletion + +TEST $CLI_1 volume set all cluster.brick-multiplex on +TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/${V0}{1..3} $H2:$B2/${V0}{1..3} +TEST $CLI_1 volume start $V0 + +TEST $CLI_1 volume create $V1 replica 3 $H1:$B1/${V1}{1..3} $H2:$B2/${V1}{1..3} +TEST $CLI_1 volume start $V1 + +EXPECT 2 count_brick_processes + +TEST $CLI_1 volume stop $V0 +TEST $CLI_1 volume stop $V1 + +EXPECT 0 count_brick_processes + +cleanup + diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 98b1aaa63af..989d13ecde7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -606,7 +606,8 @@ __gluster_pmap_signout (rpcsvc_request_t *req) * removed in the brick op phase. This situation would * arise when the brick is killed explicitly from the * backend */ - ret = glusterd_brick_process_remove_brick (brickinfo); + ret = glusterd_brick_process_remove_brick (brickinfo, + NULL); if (ret) { gf_msg_debug (this->name, 0, "Couldn't remove " "brick %s:%s from brick process", diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 31b08d76adc..e5d4421deb4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -958,7 +958,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, gd1_mgmt_brick_op_req *req = NULL; int ret = 0; xlator_t *this = NULL; - glusterd_brickinfo_t *brickinfo = NULL; this = THIS; args.op_ret = -1; @@ -987,22 +986,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, else GF_FREE (args.errstr); } - if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) { - if (args.op_ret == 0) { - brickinfo = pnode->node; - ret = glusterd_brick_process_remove_brick (brickinfo); - if (ret) { - gf_msg ("glusterd", GF_LOG_ERROR, 0, - GD_MSG_BRICKPROC_REM_BRICK_FAILED, - "Removing brick %s:%s from brick" - " process failed", - brickinfo->hostname, - brickinfo->path); - args.op_ret = ret; - goto out; - } - } - } if (GD_OP_STATUS_VOLUME == op) { ret = dict_set_int32 (args.dict, "index", pnode->index); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 42a4411d92e..88aea178028 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2313,7 +2313,8 @@ glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc) } int -glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo, + int *last_brick) { int ret = -1; xlator_t *this = NULL; @@ -2352,6 +2353,8 @@ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) /* If all bricks have been removed, delete the brick process */ if (brick_proc->brick_count == 0) { + if (last_brick != NULL) + *last_brick = 1; ret = glusterd_brickprocess_delete (brick_proc); if (ret) goto out; @@ -2455,6 +2458,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, int ret = -1; char *op_errstr = NULL; char pidfile[PATH_MAX] = {0,}; + int last_brick = -1; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -2467,7 +2471,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, ret = 0; - ret = glusterd_brick_process_remove_brick (brickinfo); + ret = glusterd_brick_process_remove_brick (brickinfo, &last_brick); if (ret) { gf_msg_debug (this->name, 0, "Couldn't remove brick from" " brick process"); @@ -2487,7 +2491,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, * attaching and detaching bricks). Therefore, we have to send * an actual signal instead. */ - if (is_brick_mx_enabled ()) { + if (is_brick_mx_enabled () && last_brick != 1) { gf_msg_debug (this->name, 0, "About to send detach " "request for brick %s:%s", brickinfo->hostname, brickinfo->path); @@ -2511,7 +2515,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, } GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); - gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile); (void) sys_unlink (pidfile); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 0e9e54a0687..b1209728b67 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -182,7 +182,8 @@ glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, glusterd_volinfo_t *volinfo); int -glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo); +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo, + int *last_brick); int glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess); -- cgit