summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorSamikshan Bairagya <samikshan@gmail.com>2017-05-16 15:07:21 +0530
committerRaghavendra Talur <rtalur@redhat.com>2017-05-30 13:36:40 +0000
commit1a90d86296f6529423a4450bc1e0b3bb12e4f0a2 (patch)
treed88538f741038acd854d5f38b6a7bb64605cb2f2 /xlators
parent541e6a0079e50aa36e37270a91d86fccb2467ee4 (diff)
glusterd: Don't spawn new glusterfsds on node reboot with brick-mux
With brick multiplexing enabled, upon a node reboot new bricks were not being attached to the first spawned brick process even though there wasn't any compatibility issues. The reason for this is that upon glusterd restart after a node reboot, since brick services aren't running, glusterd starts the bricks in a "no-wait" mode. So after a brick process is spawned for the first brick, there isn't enough time for the corresponding pid file to get populated with a value before the compatibilty check is made for the next brick. This commit solves this by iteratively waiting for the pidfile to be populated in the brick compatibility comparison stage before checking if the brick process is alive. > Reviewed-on: https://review.gluster.org/17307 > Reviewed-by: Atin Mukherjee <amukherj@redhat.com> > Smoke: Gluster Build System <jenkins@build.gluster.org> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> (cherry picked from commit 13e7b3b354a252ad4065f7b2f0f805c40a3c5d18) Change-Id: Ibd1f8e54c63e4bb04162143c9d70f09918a44aa4 BUG: 1453087 Signed-off-by: Samikshan Bairagya <samikshan@gmail.com> Reviewed-on: https://review.gluster.org/17352 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c18
3 files changed, 25 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 40f5704b698..b3e1ec3a362 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5659,7 +5659,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
rpc_clnt_set_connected (&rpc->conn);
gf_msg_debug (this->name, 0, "Connected to %s:%s",
brickinfo->hostname, brickinfo->path);
+
glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
+ brickinfo->started_here = _gf_true;
+
gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",
brickinfo->hostname, volinfo->volname,
brickinfo->path);
@@ -5689,6 +5692,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
GD_MSG_BRICK_DISCONNECTED,
"Brick %s:%s has disconnected from glusterd.",
brickinfo->hostname, brickinfo->path);
+
+ brickinfo->started_here = _gf_false;
+
ret = get_volinfo_from_brickid (brickid, &volinfo);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index 0507715305c..aa34ce4900e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -561,6 +561,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo,
conf);
sys_unlink (pidfile);
+ brickinfo->started_here = _gf_false;
}
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 51db13df0f6..b86a8440458 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2146,6 +2146,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile);
(void) sys_unlink (pidfile);
+
+ brickinfo->started_here = _gf_false;
out:
return ret;
}
@@ -5172,6 +5174,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
glusterd_brickinfo_t *other_brick;
char pidfile2[PATH_MAX] = {0};
int32_t pid2 = -1;
+ int16_t retries = 15;
/*
* If comp_vol is provided, we have to check *volume* compatibility
@@ -5214,8 +5217,22 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
continue;
}
+
GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
conf);
+
+ /* It is possible that the pidfile hasn't yet been populated,
+ * when bricks are started in "no-wait" mode; for example
+ * when bricks are started by glusterd_restart_bricks(). So
+ * wait for the pidfile to be populated with a value before
+ * checking if the service is running */
+ while (retries > 0) {
+ if (sys_access (pidfile2, F_OK) == 0)
+ break;
+ sleep (1);
+ retries--;
+ }
+
if (!gf_is_service_running (pidfile2, &pid2)) {
gf_log (this->name, GF_LOG_INFO,
"cleaning up dead brick %s:%s",
@@ -5459,6 +5476,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
socketpath, brickinfo->path, volinfo->volname);
(void) glusterd_brick_connect (volinfo, brickinfo,
socketpath);
+ brickinfo->started_here = _gf_true;
}
return 0;
}