From 0e03336a9362e5717e561f76b0c543e5a197b31b Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Mon, 30 Jan 2017 09:54:16 +0530 Subject: afr: all children of AFR must be up to resolve s-brain Problem: The various split-brain resolution policies (favorite-child-policy based, CLI based and mount (get/setfattr) based) attempt to resolve split-brain even when not all bricks of replica are up. This can be a problem when say in a replica 3, the only good copy is down and the other 2 bricks are up and blame each other (i.e. split-brain). We end up healing the file in such a case and allow I/O on it. Fix: A decision on whether the file is in split-brain or not must be taken only if we are able to examine the afr xattrs of *all* bricks of a given replica. Change-Id: Icddb1268b380005799990f5379ef957d84639ef9 BUG: 1417522 Signed-off-by: Ravishankar N Reviewed-on: https://review.gluster.org/16476 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Pranith Kumar Karampuri --- xlators/cluster/afr/src/afr-common.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'xlators/cluster/afr/src/afr-common.c') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 7f4db5c3778..ac834e90f4b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -748,14 +748,17 @@ afr_set_split_brain_choice (int ret, call_frame_t *frame, void *opaque) gf_boolean_t timer_reset = _gf_false; int old_spb_choice = -1; - if (ret) - goto out; - frame = data->frame; loc = data->loc; this = frame->this; priv = this->private; + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + delta.tv_sec = priv->spb_choice_timeout; delta.tv_nsec = 0; @@ -5611,6 +5614,12 @@ afr_is_split_brain (call_frame_t *frame, xlator_t *this, inode_t *inode, if (ret) goto out; + if (!afr_can_decide_split_brain_source_sinks (replies, + priv->child_count)) { + ret = -EAGAIN; + goto out; + } + ret = _afr_is_split_brain (frame, this, replies, AFR_DATA_TRANSACTION, d_spb); if (ret) @@ -5663,6 +5672,13 @@ afr_get_split_brain_status (void *opaque) if (!inode) goto out; + dict = dict_new (); + if (!dict) { + op_errno = ENOMEM; + ret = -1; + goto out; + } + /* Calculation for string length : * (child_count X length of child-name) + strlen (" Choices :") * child-name consists of : @@ -5676,13 +5692,9 @@ afr_get_split_brain_status (void *opaque) &m_spb); if (ret) { op_errno = -ret; - ret = -1; - goto out; - } - - dict = dict_new (); - if (!dict) { - op_errno = ENOMEM; + if (ret == -EAGAIN) + ret = dict_set_str (dict, GF_AFR_SBRAIN_STATUS, + SBRAIN_HEAL_NO_GO_MSG); ret = -1; goto out; } -- cgit