summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohammed Rafi KC <rkavunga@redhat.com>2019-05-06 23:35:08 +0530
committerAtin Mukherjee <amukherj@redhat.com>2019-05-10 14:19:29 +0000
commitb475551c66b86863cc93ebdfa6daeeab67bdbd9e (patch)
tree41abee1e5afbfc9d0891ff949f60bbcf2fbbc892
parent9ab2747da78061882f6734df4b265bce11adaef1 (diff)
shd/glusterd: Serialize shd manager to prevent race condition
At the time of a glusterd restart, while doing a handshake there is a possibility that multiple shd manager might get executed. Because of this, there is a chance that multiple shd get spawned during a glusterd restart Change-Id: Ie20798441e07d7d7a93b7d38dfb924cea178a920 fixes: bz#1707081 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
-rw-r--r--tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c14
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h3
4 files changed, 72 insertions, 0 deletions
diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
new file mode 100644
index 0000000..3a27c2a
--- /dev/null
+++ b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
@@ -0,0 +1,54 @@
+#! /bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+count=`$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l`
+echo $count
+}
+
+function check_shd {
+ps aux | grep $1 | grep glustershd | wc -l
+}
+
+cleanup
+
+
+TEST launch_cluster 6
+
+TESTS_EXPECTED_IN_LOOP=25
+for i in $(seq 2 6); do
+ hostname="H$i"
+ TEST $CLI_1 peer probe ${!hostname}
+done
+
+
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers;
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i
+ TEST $CLI_1 volume start ${V0}_$i force
+
+done
+
+#kill a node
+TEST kill_node 3
+
+TEST $glusterd_3;
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3
+
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume stop ${V0}_$i
+ TEST $CLI_1 volume delete ${V0}_$i
+
+done
+
+for i in $(seq 1 6); do
+ hostname="H$i"
+ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname}
+done
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
index 2edc273..598c082 100644
--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
@@ -254,14 +254,26 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
{
int ret = -1;
glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t shd_restart = _gf_false;
+ conf = THIS->private;
volinfo = data;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
GF_VALIDATE_OR_GOTO("glusterd", svc, out);
GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
if (volinfo)
glusterd_volinfo_ref(volinfo);
+ while (conf->restart_shd) {
+ synclock_unlock(&conf->big_lock);
+ sleep(2);
+ synclock_lock(&conf->big_lock);
+ }
+ conf->restart_shd = _gf_true;
+ shd_restart = _gf_true;
+
ret = glusterd_shdsvc_create_volfile(volinfo);
if (ret)
goto out;
@@ -310,6 +322,8 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
}
}
out:
+ if (shd_restart)
+ conf->restart_shd = _gf_false;
if (volinfo)
glusterd_volinfo_unref(volinfo);
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index c0973cb..6d7dd4a 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1819,6 +1819,7 @@ init(xlator_t *this)
conf->rpc = rpc;
conf->uds_rpc = uds_rpc;
conf->gfs_mgmt = &gd_brick_prog;
+ conf->restart_shd = _gf_false;
this->private = conf;
/* conf->workdir and conf->rundir are smaller than PATH_MAX; gcc's
* snprintf checking will throw an error here if sprintf is used.
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 5b9fed6..ca8e82a 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -219,6 +219,9 @@ typedef struct {
gf_atomic_t blockers;
uint32_t mgmt_v3_lock_timeout;
gf_boolean_t restart_bricks;
+ gf_boolean_t restart_shd; /* This flag prevents running two shd manager
+ simultaneously
+ */
pthread_mutex_t attach_lock; /* Lock can be per process or a common one */
pthread_mutex_t volume_lock; /* We release the big_lock from lot of places
which might lead the modification of volinfo