summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2018-01-03 14:29:51 +0530
committerjiffin tony Thottan <jthottan@redhat.com>2018-01-12 05:43:49 +0000
commit8679151392e50e1684ed721710f44dd4fbb992b9 (patch)
tree5122212dab8b83991d6f9f1969632596c98a7700
parent19b74478fc87909b95a6c87ab212f21b79c809f3 (diff)
glusterd: connect to an existing brick process when qourum status is NOT_APPLICABLE_QUORUM
First of all, this patch reverts commit 635c1c3 as the same is causing a regression with bricks not coming up on time when a node is rebooted. This patch tries to fix the problem in a different way by just trying to connect to an existing running brick when quorum status is not applicable. >mainline patch : https://review.gluster.org/#/c/19134/ Change-Id: I0efb5901832824b1c15dcac529bffac85173e097 BUG: 1511301 Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c13
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c3
9 files changed, 41 insertions, 15 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 6d17ff4e32d..c82bc3158e1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1554,7 +1554,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
}
}
ret = glusterd_brick_start (volinfo, brickinfo,
- _gf_true);
+ _gf_true, _gf_false);
if (ret)
goto out;
i++;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index 8dfb528f10c..96eb523753c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -658,7 +658,7 @@ glusterd_create_missed_snap (glusterd_missed_snap_info *missed_snapinfo,
}
brickinfo->snap_status = 0;
- ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false);
+ ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false, _gf_false);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0,
GD_MSG_BRICK_DISCONNECTED, "starting the "
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 51579fe3826..57b2f09fbbd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2406,6 +2406,7 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
pthread_mutex_lock (&brickinfo->restart_mutex);
{
ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_false,
_gf_false);
}
pthread_mutex_unlock (&brickinfo->restart_mutex);
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 08a6df0235f..e02ce80cd08 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -425,7 +425,8 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false);
+ ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false,
+ _gf_false);
if (ret)
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
index 995a568caa4..b01bfaaf59f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -314,6 +314,7 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo = NULL;
gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM;
gf_boolean_t follows_quorum = _gf_false;
+ gf_boolean_t quorum_status_unchanged = _gf_false;
if (volinfo->status != GLUSTERD_STATUS_STARTED) {
volinfo->quorum_status = NOT_APPLICABLE_QUORUM;
@@ -341,9 +342,10 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
* the bricks that are down are brought up again. In this process it
* also brings up the brick that is purposefully taken down.
*/
- if (quorum_status != NOT_APPLICABLE_QUORUM &&
- volinfo->quorum_status == quorum_status)
+ if (volinfo->quorum_status == quorum_status) {
+ quorum_status_unchanged = _gf_true;
goto out;
+ }
if (quorum_status == MEETS_QUORUM) {
gf_msg (this->name, GF_LOG_CRITICAL, 0,
@@ -368,9 +370,10 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
if (!brickinfo->start_triggered) {
pthread_mutex_lock (&brickinfo->restart_mutex);
{
- glusterd_brick_start (volinfo,
- brickinfo,
- _gf_false);
+ ret = glusterd_brick_start (volinfo,
+ brickinfo,
+ _gf_false,
+ _gf_false);
}
pthread_mutex_unlock (&brickinfo->restart_mutex);
}
@@ -392,6 +395,20 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
}
}
out:
+ if (quorum_status_unchanged) {
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!glusterd_is_local_brick (this, volinfo, brickinfo))
+ continue;
+ ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_false, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED, "Failed to "
+ "connect to %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ }
+ }
+ }
return;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 31f4d95f63d..23b16258309 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -6972,7 +6972,7 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
cds_list_for_each_entry (brickinfo, &snap_vol->bricks,
brick_list) {
ret = glusterd_brick_start (snap_vol, brickinfo,
- _gf_false);
+ _gf_false, _gf_false);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0,
GD_MSG_BRICK_DISCONNECTED, "starting "
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index e627dcfcc2b..2cc1df22acb 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -5783,7 +5783,8 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
int
glusterd_brick_start (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
- gf_boolean_t wait)
+ gf_boolean_t wait,
+ gf_boolean_t only_connect)
{
int ret = -1;
xlator_t *this = NULL;
@@ -5834,7 +5835,9 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
ret = 0;
goto out;
}
- brickinfo->start_triggered = _gf_true;
+ if (!only_connect)
+ brickinfo->start_triggered = _gf_true;
+
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
if (gf_is_service_running (pidfile, &pid)) {
if (brickinfo->status != GF_BRICK_STARTING &&
@@ -5892,6 +5895,8 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
}
return 0;
}
+ if (only_connect)
+ return 0;
run:
ret = _mk_rundir_p (volinfo);
@@ -6019,7 +6024,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
{
glusterd_brick_start
(volinfo, brickinfo,
- _gf_false);
+ _gf_false, _gf_false);
}
pthread_mutex_unlock
(&brickinfo->restart_mutex);
@@ -6068,7 +6073,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
{
glusterd_brick_start
(volinfo, brickinfo,
- _gf_false);
+ _gf_false, _gf_false);
}
pthread_mutex_unlock
(&brickinfo->restart_mutex);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index b802f6ca616..a2f0737bb61 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -277,7 +277,8 @@ glusterd_all_volume_cond_check (glusterd_condition_func func, int status,
int
glusterd_brick_start (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
- gf_boolean_t wait);
+ gf_boolean_t wait,
+ gf_boolean_t only_connect);
int
glusterd_brick_stop (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 7c037e843b8..46e874494f2 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2553,7 +2553,8 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
if (flags & GF_CLI_FLAG_OP_FORCE) {
brickinfo->start_triggered = _gf_false;
}
- ret = glusterd_brick_start (volinfo, brickinfo, wait);
+ ret = glusterd_brick_start (volinfo, brickinfo, wait,
+ _gf_false);
/* If 'force' try to start all bricks regardless of success or
* failure
*/