summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanju Rakonde <srakonde@redhat.com>2018-04-06 01:53:45 +0530
committerAmar Tumballi <amarts@redhat.com>2018-05-07 15:31:59 +0000
commit4da244caccd38a77de5428b6954f565219ef0719 (patch)
tree43311ffe448d78206aa4f68b1be07d2c38ac4bc3
parent23c1385b5f6f6103e820d15ecfe1df31940fdb45 (diff)
glusterd: handling brick termination in brick-mux
Problem: There's a race between the glusterfs_handle_terminate() response sent to glusterd from last brick of the process and the socket disconnect event that encounters after the brick process got killed. Solution: When it is a last brick for the brick process, instead of sending GLUSTERD_BRICK_TERMINATE to brick process, glusterd will kill the process (same as we do it in case of non brick multiplecing). The test case is added for https://bugzilla.redhat.com/show_bug.cgi?id=1549996 Change-Id: If94958cd7649ea48d09d6af7803a0f9437a85503 fixes: bz#1545048 Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c24
-rw-r--r--tests/bugs/glusterd/stale-brick-proc-brick-mux.t33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h3
6 files changed, 56 insertions, 35 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index db961c86304..e30d9287575 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -174,7 +174,6 @@ glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret)
ret = dict_allocate_and_serialize (dict, &rsp.output.output_val,
&rsp.output.output_len);
-
if (ret == 0)
ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
(xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
@@ -188,15 +187,15 @@ glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret)
int
glusterfs_handle_terminate (rpcsvc_request_t *req)
{
- gd1_mgmt_brick_op_req xlator_req = {0,};
+ gd1_mgmt_brick_op_req xlator_req = {0,};
ssize_t ret;
- glusterfs_ctx_t *ctx = NULL;
- xlator_t *top = NULL;
- xlator_t *victim = NULL;
- xlator_t *tvictim = NULL;
- xlator_list_t **trav_p = NULL;
- gf_boolean_t lockflag = _gf_false;
- gf_boolean_t last_brick = _gf_false;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ xlator_t *tvictim = NULL;
+ xlator_list_t **trav_p = NULL;
+ gf_boolean_t lockflag = _gf_false;
+ gf_boolean_t still_bricks_attached = _gf_false;
ret = xdr_to_generic (req->msg[0], &xlator_req,
(xdrproc_t)xdr_gd1_mgmt_brick_op_req);
@@ -240,15 +239,16 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
glusterfs_terminate_response_send (req, 0);
for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
tvictim = (*trav_p)->xlator;
- if (!tvictim->cleanup_starting && !strcmp (tvictim->name, xlator_req.name)) {
+ if (!tvictim->cleanup_starting &&
+ !strcmp (tvictim->name, xlator_req.name)) {
continue;
}
if (!tvictim->cleanup_starting) {
- last_brick = _gf_true;
+ still_bricks_attached = _gf_true;
break;
}
}
- if (!last_brick) {
+ if (!still_bricks_attached) {
gf_log (THIS->name, GF_LOG_INFO,
"terminating after loss of last child %s",
xlator_req.name);
diff --git a/tests/bugs/glusterd/stale-brick-proc-brick-mux.t b/tests/bugs/glusterd/stale-brick-proc-brick-mux.t
new file mode 100644
index 00000000000..f0a89760bb6
--- /dev/null
+++ b/tests/bugs/glusterd/stale-brick-proc-brick-mux.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+#bug-1549996 - stale brick processes on the nodes after volume deletion
+
+TEST $CLI_1 volume set all cluster.brick-multiplex on
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/${V0}{1..3} $H2:$B2/${V0}{1..3}
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume create $V1 replica 3 $H1:$B1/${V1}{1..3} $H2:$B2/${V1}{1..3}
+TEST $CLI_1 volume start $V1
+
+EXPECT 2 count_brick_processes
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume stop $V1
+
+EXPECT 0 count_brick_processes
+
+cleanup
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index 98b1aaa63af..989d13ecde7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -606,7 +606,8 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
* removed in the brick op phase. This situation would
* arise when the brick is killed explicitly from the
* backend */
- ret = glusterd_brick_process_remove_brick (brickinfo);
+ ret = glusterd_brick_process_remove_brick (brickinfo,
+ NULL);
if (ret) {
gf_msg_debug (this->name, 0, "Couldn't remove "
"brick %s:%s from brick process",
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 31b08d76adc..e5d4421deb4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -958,7 +958,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
gd1_mgmt_brick_op_req *req = NULL;
int ret = 0;
xlator_t *this = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
this = THIS;
args.op_ret = -1;
@@ -987,22 +986,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
else
GF_FREE (args.errstr);
}
- if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) {
- if (args.op_ret == 0) {
- brickinfo = pnode->node;
- ret = glusterd_brick_process_remove_brick (brickinfo);
- if (ret) {
- gf_msg ("glusterd", GF_LOG_ERROR, 0,
- GD_MSG_BRICKPROC_REM_BRICK_FAILED,
- "Removing brick %s:%s from brick"
- " process failed",
- brickinfo->hostname,
- brickinfo->path);
- args.op_ret = ret;
- goto out;
- }
- }
- }
if (GD_OP_STATUS_VOLUME == op) {
ret = dict_set_int32 (args.dict, "index", pnode->index);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 42a4411d92e..88aea178028 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2313,7 +2313,8 @@ glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc)
}
int
-glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo)
+glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo,
+ int *last_brick)
{
int ret = -1;
xlator_t *this = NULL;
@@ -2352,6 +2353,8 @@ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo)
/* If all bricks have been removed, delete the brick process */
if (brick_proc->brick_count == 0) {
+ if (last_brick != NULL)
+ *last_brick = 1;
ret = glusterd_brickprocess_delete (brick_proc);
if (ret)
goto out;
@@ -2455,6 +2458,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
int ret = -1;
char *op_errstr = NULL;
char pidfile[PATH_MAX] = {0,};
+ int last_brick = -1;
GF_ASSERT (volinfo);
GF_ASSERT (brickinfo);
@@ -2467,7 +2471,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
ret = 0;
- ret = glusterd_brick_process_remove_brick (brickinfo);
+ ret = glusterd_brick_process_remove_brick (brickinfo, &last_brick);
if (ret) {
gf_msg_debug (this->name, 0, "Couldn't remove brick from"
" brick process");
@@ -2487,7 +2491,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
* attaching and detaching bricks). Therefore, we have to send
* an actual signal instead.
*/
- if (is_brick_mx_enabled ()) {
+ if (is_brick_mx_enabled () && last_brick != 1) {
gf_msg_debug (this->name, 0, "About to send detach "
"request for brick %s:%s",
brickinfo->hostname, brickinfo->path);
@@ -2511,7 +2515,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
}
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
-
gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile);
(void) sys_unlink (pidfile);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 0e9e54a0687..b1209728b67 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -182,7 +182,8 @@ glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
glusterd_volinfo_t *volinfo);
int
-glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo);
+glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo,
+ int *last_brick);
int
glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess);