summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/glusterd/bug-1451248-mux-reboot-node.t54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c18
4 files changed, 79 insertions, 0 deletions
diff --git a/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
new file mode 100644
index 00000000000..5d8ce6e75e6
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../volume.rc
+
+function count_up_bricks {
+ $CLI --xml volume status all | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
+push_trapfunc "cleanup"
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0..2}
+TEST $CLI volume start $V0
+
+EXPECT 1 count_brick_processes
+EXPECT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+pkill glusterd
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+TEST $CLI volume create $V1 $H0:$B0/brick{3..5}
+TEST $CLI volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 40f5704b698..b3e1ec3a362 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5659,7 +5659,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
rpc_clnt_set_connected (&rpc->conn);
gf_msg_debug (this->name, 0, "Connected to %s:%s",
brickinfo->hostname, brickinfo->path);
+
glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
+ brickinfo->started_here = _gf_true;
+
gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",
brickinfo->hostname, volinfo->volname,
brickinfo->path);
@@ -5689,6 +5692,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
GD_MSG_BRICK_DISCONNECTED,
"Brick %s:%s has disconnected from glusterd.",
brickinfo->hostname, brickinfo->path);
+
+ brickinfo->started_here = _gf_false;
+
ret = get_volinfo_from_brickid (brickid, &volinfo);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index 0507715305c..aa34ce4900e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -561,6 +561,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo,
conf);
sys_unlink (pidfile);
+ brickinfo->started_here = _gf_false;
}
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 51db13df0f6..b86a8440458 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2146,6 +2146,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile);
(void) sys_unlink (pidfile);
+
+ brickinfo->started_here = _gf_false;
out:
return ret;
}
@@ -5172,6 +5174,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
glusterd_brickinfo_t *other_brick;
char pidfile2[PATH_MAX] = {0};
int32_t pid2 = -1;
+ int16_t retries = 15;
/*
* If comp_vol is provided, we have to check *volume* compatibility
@@ -5214,8 +5217,22 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
continue;
}
+
GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
conf);
+
+ /* It is possible that the pidfile hasn't yet been populated,
+ * when bricks are started in "no-wait" mode; for example
+ * when bricks are started by glusterd_restart_bricks(). So
+ * wait for the pidfile to be populated with a value before
+ * checking if the service is running */
+ while (retries > 0) {
+ if (sys_access (pidfile2, F_OK) == 0)
+ break;
+ sleep (1);
+ retries--;
+ }
+
if (!gf_is_service_running (pidfile2, &pid2)) {
gf_log (this->name, GF_LOG_INFO,
"cleaning up dead brick %s:%s",
@@ -5459,6 +5476,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
socketpath, brickinfo->path, volinfo->volname);
(void) glusterd_brick_connect (volinfo, brickinfo,
socketpath);
+ brickinfo->started_here = _gf_true;
}
return 0;
}