summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-utils.c
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2017-10-26 14:26:30 +0530
committerAtin Mukherjee <amukherj@redhat.com>2017-11-01 03:41:36 +0000
commit82be66ef8e9e3127d41a4c843daf74c1d8aec4aa (patch)
tree48a91287a7dd949ce7c9cb52760b337ad8a573dc /xlators/mgmt/glusterd/src/glusterd-utils.c
parentbb7fd73ce4245f54517de1f378a9471f6c8bb454 (diff)
glusterd: fix brick restart parallelism
glusterd's brick restart logic is not always sequential as there is atleast three different ways how the bricks are restarted. 1. through friend-sm and glusterd_spawn_daemons () 2. through friend-sm and handling volume quorum action 3. through friend handshaking when there is a mimatch on quorum on friend import. In a brick multiplexing setup, glusterd ended up trying to spawn the same brick process couple of times as almost in fraction of milliseconds two threads hit glusterd_brick_start () because of which glusterd didn't have any choice of rejecting any one of them as for both the case brick start criteria met. As a solution, it'd be better to control this madness by two different flags, one is a boolean called start_triggered which indicates a brick start has been triggered and it continues to be true till a brick dies or killed, the second is a mutex lock to ensure for a particular brick we don't end up getting into glusterd_brick_start () more than once at same point of time. Change-Id: I292f1e58d6971e111725e1baea1fe98b890b43e2 BUG: 1506513 Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c39
1 files changed, 30 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index a91f8dd7138..f211f199ce6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1086,7 +1086,7 @@ glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo)
goto out;
CDS_INIT_LIST_HEAD (&new_brickinfo->brick_list);
-
+ pthread_mutex_init (&new_brickinfo->restart_mutex, NULL);
*brickinfo = new_brickinfo;
ret = 0;
@@ -2500,7 +2500,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
(void) sys_unlink (pidfile);
brickinfo->status = GF_BRICK_STOPPED;
-
+ brickinfo->start_triggered = _gf_false;
if (del_brick)
glusterd_delete_brick (volinfo, brickinfo);
out:
@@ -5837,13 +5837,14 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
* three different triggers for an attempt to start the brick process
* due to the quorum handling code in glusterd_friend_sm.
*/
- if (brickinfo->status == GF_BRICK_STARTING) {
+ if (brickinfo->status == GF_BRICK_STARTING ||
+ brickinfo->start_triggered) {
gf_msg_debug (this->name, 0, "brick %s is already in starting "
"phase", brickinfo->path);
ret = 0;
goto out;
}
-
+ brickinfo->start_triggered = _gf_true;
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
if (gf_is_service_running (pidfile, &pid)) {
if (brickinfo->status != GF_BRICK_STARTING &&
@@ -5956,6 +5957,9 @@ run:
}
out:
+ if (ret && brickinfo) {
+ brickinfo->start_triggered = _gf_false;
+ }
gf_msg_debug (this->name, 0, "returning %d ", ret);
return ret;
}
@@ -6017,11 +6021,19 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
start_svcs = _gf_true;
glusterd_svcs_manager (NULL);
}
-
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
brick_list) {
- glusterd_brick_start (volinfo, brickinfo,
- _gf_false);
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock
+ (&brickinfo->restart_mutex);
+ {
+ glusterd_brick_start
+ (volinfo, brickinfo,
+ _gf_false);
+ }
+ pthread_mutex_unlock
+ (&brickinfo->restart_mutex);
+ }
}
ret = glusterd_store_volinfo
(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
@@ -6060,8 +6072,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
"volume %s", volinfo->volname);
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
brick_list) {
- glusterd_brick_start (volinfo, brickinfo,
- _gf_false);
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock
+ (&brickinfo->restart_mutex);
+ {
+ glusterd_brick_start
+ (volinfo, brickinfo,
+ _gf_false);
+ }
+ pthread_mutex_unlock
+ (&brickinfo->restart_mutex);
+ }
}
ret = glusterd_store_volinfo
(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);