diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2017-01-30 09:54:16 +0530 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-02-15 07:31:45 -0500 | 
| commit | 8de5213db8771088ae214d42bcae056e409d7b6a (patch) | |
| tree | e9cb0c581646e5a0a175715c5ea70115b240c80b /xlators/cluster/afr/src | |
| parent | 48dc0655e214d8e4d313ebf86b5aabf3dc4b078a (diff) | |
afr: all children of AFR must be up to resolve s-brain
Problem:
The various split-brain resolution policies (favorite-child-policy based,
CLI based and mount (get/setfattr) based) attempt to resolve split-brain
even when not all bricks of replica are up. This can be a problem when
say in a replica 3, the only good copy is down and the other 2 bricks
are up and blame each other (i.e. split-brain). We end up healing the
file in such a  case and allow I/O on it.
Fix:
A decision on whether the file is in split-brain or not must be taken
only if we are able to examine the afr xattrs of *all* bricks of a given
replica.
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
> Reviewed-on: https://review.gluster.org/16476
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
(cherry picked from commit 0e03336a9362e5717e561f76b0c543e5a197b31b)
Change-Id: Icddb1268b380005799990f5379ef957d84639ef9
BUG: 1420982
Reviewed-on: https://review.gluster.org/16587
Tested-by: Ravishankar N <ravishankar@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 32 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 38 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 6 | 
3 files changed, 61 insertions, 15 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 7f4db5c3778..ac834e90f4b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -748,14 +748,17 @@ afr_set_split_brain_choice (int ret, call_frame_t *frame, void *opaque)          gf_boolean_t        timer_reset      = _gf_false;          int                 old_spb_choice   = -1; -        if (ret) -                goto out; -          frame = data->frame;          loc = data->loc;          this = frame->this;          priv = this->private; +        if (ret) { +                op_errno = -ret; +                ret = -1; +                goto out; +        } +          delta.tv_sec = priv->spb_choice_timeout;          delta.tv_nsec = 0; @@ -5611,6 +5614,12 @@ afr_is_split_brain (call_frame_t *frame, xlator_t *this, inode_t *inode,          if (ret)                  goto out; +        if (!afr_can_decide_split_brain_source_sinks (replies, +                                                      priv->child_count)) { +                ret = -EAGAIN; +                goto out; +        } +          ret = _afr_is_split_brain (frame, this, replies,                                      AFR_DATA_TRANSACTION, d_spb);          if (ret) @@ -5663,6 +5672,13 @@ afr_get_split_brain_status (void *opaque)          if (!inode)                  goto out; +        dict = dict_new (); +        if (!dict) { +                op_errno = ENOMEM; +                ret = -1; +                goto out; +        } +          /* Calculation for string length :          * (child_count X length of child-name) + strlen ("    Choices :")          * child-name consists of : @@ -5676,13 +5692,9 @@ afr_get_split_brain_status (void *opaque)                                    &m_spb);          if (ret) {                  op_errno = -ret; -                ret = -1; -                goto out; -        } - -        dict = dict_new (); -        if (!dict) { -                op_errno = ENOMEM; +                if (ret == -EAGAIN) +                        ret = dict_set_str (dict, GF_AFR_SBRAIN_STATUS, +                                            SBRAIN_HEAL_NO_GO_MSG);                  ret = -1;                  goto out;          } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 17e15d760c6..a8d3febb21b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -473,6 +473,19 @@ afr_dict_contains_heal_op (call_frame_t *frame)          return _gf_true;  } +gf_boolean_t +afr_can_decide_split_brain_source_sinks (struct afr_reply *replies, +                                         int child_count) +{ +        int i = 0; + +        for (i = 0; i < child_count; i++) +                if (replies[i].valid != 1 || replies[i].op_ret != 0) +                        return _gf_false; + +        return _gf_true; +} +  int  afr_mark_split_brain_source_sinks_by_heal_op (call_frame_t *frame,                                     xlator_t *this, unsigned char *sources, @@ -511,6 +524,14 @@ afr_mark_split_brain_source_sinks_by_heal_op (call_frame_t *frame,          }          xdata_rsp = local->xdata_rsp; +        if (!afr_can_decide_split_brain_source_sinks (replies, +                                                      priv->child_count)) { +                ret = dict_set_str (xdata_rsp, "sh-fail-msg", +                                    SBRAIN_HEAL_NO_GO_MSG); +                ret = -1; +                goto out; +        } +          for (i = 0 ; i < priv->child_count; i++)                  if (locked_on[i])                          sources[i] = 1; @@ -749,26 +770,35 @@ afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,          int fav_child = -1;          priv = this->private; +        if (!afr_can_decide_split_brain_source_sinks (replies, +                                                      priv->child_count)) { +                return -1; +        } +          switch (priv->fav_child_policy) {          case AFR_FAV_CHILD_BY_SIZE:                  fav_child = afr_sh_fav_by_size (this, replies, inode); -                if (policy_str && fav_child >= 0) +                if (policy_str && fav_child >= 0) {                          *policy_str = "SIZE"; +                }                  break;          case AFR_FAV_CHILD_BY_CTIME:                  fav_child = afr_sh_fav_by_ctime (this, replies, inode); -                if (policy_str && fav_child >= 0) +                if (policy_str && fav_child >= 0) {                          *policy_str = "CTIME"; +                }                  break;          case AFR_FAV_CHILD_BY_MTIME:                  fav_child = afr_sh_fav_by_mtime (this, replies, inode); -                if (policy_str && fav_child >= 0) +                if (policy_str && fav_child >= 0) {                          *policy_str = "MTIME"; +                }                  break;          case AFR_FAV_CHILD_BY_MAJORITY:                  fav_child = afr_sh_fav_by_majority (this, replies, inode); -                if (policy_str && fav_child >= 0) +                if (policy_str && fav_child >= 0) {                          *policy_str = "MAJORITY"; +                }                  break;          case AFR_FAV_CHILD_NONE:          default: diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 500227abe24..a33905033cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -81,7 +81,8 @@  #define IA_EQUAL(f,s,field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0) - +#define SBRAIN_HEAL_NO_GO_MSG "Failed to obtain replies from all bricks of "\ +                      "the replica (are they up?). Cannot resolve split-brain."  int  afr_selfheal (xlator_t *this, uuid_t gfid); @@ -220,6 +221,9 @@ afr_mark_active_sinks (xlator_t *this, unsigned char *sources,  gf_boolean_t  afr_dict_contains_heal_op (call_frame_t *frame); +gf_boolean_t +afr_can_decide_split_brain_source_sinks (struct afr_reply *replies, +                                         int child_count);  int  afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,                                     inode_t *inode,  | 
