glusterd: Don't spawn new glusterfsds on node reboot with brick-mux

With brick multiplexing enabled, upon a node reboot new bricks were not being attached to the first spawned brick process even though there wasn't any compatibility issues. The reason for this is that upon glusterd restart after a node reboot, since brick services aren't running, glusterd starts the bricks in a "no-wait" mode. So after a brick process is spawned for the first brick, there isn't enough time for the corresponding pid file to get populated with a value before the compatibilty check is made for the next brick. This commit solves this by iteratively waiting for the pidfile to be populated in the brick compatibility comparison stage before checking if the brick process is alive. > Reviewed-on: https://review.gluster.org/17307 > Reviewed-by: Atin Mukherjee <amukherj@redhat.com> > Smoke: Gluster Build System <jenkins@build.gluster.org> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> (cherry picked from commit 13e7b3b354a252ad4065f7b2f0f805c40a3c5d18) Change-Id: Ibd1f8e54c63e4bb04162143c9d70f09918a44aa4 BUG: 1453086 Signed-off-by: Samikshan Bairagya <samikshan@gmail.com> Reviewed-on: https://review.gluster.org/17351 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
author: Samikshan Bairagya <samikshan@gmail.com> 2017-05-16 15:07:21 +0530
committer: Shyamsundar Ranganathan <srangana@redhat.com> 2017-05-22 14:17:23 +0000
commit: 671dfcd82f6a7c56fbcbfde33cba22c0b585a046 (patch)
tree: c71620c24ad364f23356798cdbf0e567949df0ce
parent: 74aa9ab2f2f6b2514847457101642b359823fde5 (diff)
4 files changed, 79 insertions, 0 deletions
diff --git a/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
new file mode 100644
index 00000000000..5d8ce6e75e6
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../volume.rc
+
+function count_up_bricks {
+        $CLI --xml volume status all | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+	pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+        $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+                                     | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
+push_trapfunc "cleanup"
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0..2}
+TEST $CLI volume start $V0
+
+EXPECT 1 count_brick_processes
+EXPECT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+pkill glusterd
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+TEST $CLI volume create $V1 $H0:$B0/brick{3..5}
+TEST $CLI volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 2a5772b6669..f16bc20c01f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5811,7 +5811,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
                 rpc_clnt_set_connected (&rpc->conn);
                 gf_msg_debug (this->name, 0, "Connected to %s:%s",
                         brickinfo->hostname, brickinfo->path);
+
                 glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
+                brickinfo->started_here = _gf_true;
+
                 gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",
                           brickinfo->hostname, volinfo->volname,
                           brickinfo->path);
@@ -5841,6 +5844,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
                                 GD_MSG_BRICK_DISCONNECTED,
                                 "Brick %s:%s has disconnected from glusterd.",
                                 brickinfo->hostname, brickinfo->path);
+
+                        brickinfo->started_here = _gf_false;
+
                         ret = get_volinfo_from_brickid (brickid, &volinfo);
                         if (ret) {
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index c3f5dbc3cd1..2b60b17841b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -563,6 +563,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
                         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo,
                                                     conf);
                         sys_unlink (pidfile);
+                        brickinfo->started_here = _gf_false;
                 }
         }
 
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index a66e04934db..4c47b47c67c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2143,6 +2143,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
         gf_msg_debug (this->name,  0, "Unlinking pidfile %s", pidfile);
         (void) sys_unlink (pidfile);
+
+        brickinfo->started_here = _gf_false;
 out:
         return ret;
 }
@@ -5169,6 +5171,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
         glusterd_brickinfo_t    *other_brick;
         char                    pidfile2[PATH_MAX]      = {0};
         int32_t                 pid2                    = -1;
+        int16_t                 retries                 = 15;
 
         /*
          * If comp_vol is provided, we have to check *volume* compatibility
@@ -5211,8 +5214,22 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
                 if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
                         continue;
                 }
+
                 GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
                                             conf);
+
+                /* It is possible that the pidfile hasn't yet been populated,
+                 * when bricks are started in "no-wait" mode; for example
+                 * when bricks are started by glusterd_restart_bricks(). So
+                 * wait for the pidfile to be populated with a value before
+                 * checking if the service is running */
+                while (retries > 0) {
+                        if (sys_access (pidfile2, F_OK) == 0)
+                                break;
+                        sleep (1);
+                        retries--;
+                }
+
                 if (!gf_is_service_running (pidfile2, &pid2)) {
                         gf_log (this->name, GF_LOG_INFO,
                                 "cleaning up dead brick %s:%s",
@@ -5456,6 +5473,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
                                 socketpath, brickinfo->path, volinfo->volname);
                         (void) glusterd_brick_connect (volinfo, brickinfo,
                                         socketpath);
+                        brickinfo->started_here = _gf_true;
                 }
                 return 0;
         }
author	Samikshan Bairagya <samikshan@gmail.com>	2017-05-16 15:07:21 +0530
committer	Shyamsundar Ranganathan <srangana@redhat.com>	2017-05-22 14:17:23 +0000
commit	671dfcd82f6a7c56fbcbfde33cba22c0b585a046 (patch)
tree	c71620c24ad364f23356798cdbf0e567949df0ce
parent	74aa9ab2f2f6b2514847457101642b359823fde5 (diff)