From 281d95cb7d53069158eae99392e9e7863c8850fd Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 5 May 2014 11:29:12 +0530 Subject: mgmt/glusterd: Prevent spurious brick restarts Change-Id: I7ee5d18b926d6c31e3e4ea2f5fbe9050c8e1dee8 BUG: 959986 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/4954 Tested-by: Gluster Build System Reviewed-by: Kaushal M --- xlators/mgmt/glusterd/src/glusterd-sm.h | 7 ----- xlators/mgmt/glusterd/src/glusterd-utils.c | 47 +++++++++++++++++++++++------- xlators/mgmt/glusterd/src/glusterd.h | 13 +++++---- 3 files changed, 44 insertions(+), 23 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index b9bedbe6994..f903668e9ec 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -36,13 +36,6 @@ typedef enum gd_quorum_contribution_ { QUORUM_UP } gd_quorum_contrib_t; -typedef enum gd_quorum_status_ { - QUORUM_UNKNOWN, - QUORUM_NOT_APPLICABLE, - QUORUM_MEETS, - QUORUM_DOES_NOT_MEET -} gd_quorum_status_t; - typedef enum glusterd_friend_sm_state_ { GD_FRIEND_STATE_DEFAULT = 0, GD_FRIEND_STATE_REQ_SENT, diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index ef13665be50..117a3ab857c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -3457,35 +3457,62 @@ void glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, gf_boolean_t meets_quorum) { - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *conf = NULL; + gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM; + gf_boolean_t follows_quorum = _gf_false; conf = this->private; - if (volinfo->status != GLUSTERD_STATUS_STARTED) + if (volinfo->status != GLUSTERD_STATUS_STARTED) { + volinfo->quorum_status = NOT_APPLICABLE_QUORUM; goto out; + } - if (!glusterd_is_volume_in_server_quorum (volinfo)) - meets_quorum = _gf_true; + follows_quorum = glusterd_is_volume_in_server_quorum (volinfo); + if (follows_quorum) { + if (meets_quorum) + quorum_status = MEETS_QUORUM; + else + quorum_status = DOESNT_MEET_QUORUM; + } else { + quorum_status = NOT_APPLICABLE_QUORUM; + } - if (meets_quorum) + /* + * The following check is added to prevent spurious brick starts when + * events occur that affect quorum. + * Example: + * There is a cluster of 10 peers. Volume is in quorum. User + * takes down one brick from the volume to perform maintenance. + * Suddenly one of the peers go down. Cluster is still in quorum. But + * because of this 'peer going down' event, quorum is calculated and + * the bricks that are down are brought up again. In this process it + * also brings up the brick that is purposefully taken down. + */ + if (volinfo->quorum_status == quorum_status) + goto out; + + if (quorum_status == MEETS_QUORUM) { gf_msg (this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS, "Server quorum regained for volume %s. Starting local " "bricks.", volinfo->volname); - else + } else if (quorum_status == DOESNT_MEET_QUORUM) { gf_msg (this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS, "Server quorum lost for volume %s. Stopping local " "bricks.", volinfo->volname); + } list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { if (!glusterd_is_local_brick (this, volinfo, brickinfo)) continue; - if (meets_quorum) - glusterd_brick_start (volinfo, brickinfo, _gf_false); - else + if (quorum_status == DOESNT_MEET_QUORUM) glusterd_brick_stop (volinfo, brickinfo, _gf_false); + else + glusterd_brick_start (volinfo, brickinfo, _gf_false); } + volinfo->quorum_status = quorum_status; out: return; } diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ecb8f6b2252..821c9fdd8b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -122,12 +122,6 @@ typedef struct { gf_boolean_t online; } nodesrv_t; -typedef struct { - gf_boolean_t quorum; - double quorum_ratio; - uint64_t gl_opt_version; -} gd_global_opts_t; - typedef struct { struct _volfile_ctx *volfile; pthread_mutex_t mutex; @@ -295,6 +289,12 @@ struct glusterd_replace_brick_ { typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; +typedef enum gd_quorum_status_ { + NOT_APPLICABLE_QUORUM, //Does not follow quorum + MEETS_QUORUM, //Follows quorum and meets. + DOESNT_MEET_QUORUM, //Follows quorum and does not meet. +} gd_quorum_status_t; + struct glusterd_volinfo_ { gf_lock_t lock; gf_boolean_t is_snap_volume; @@ -371,6 +371,7 @@ struct glusterd_volinfo_ { int client_op_version; pthread_mutex_t reflock; int refcnt; + gd_quorum_status_t quorum_status; }; typedef enum gd_snap_status_ { -- cgit