diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2012-01-20 17:30:54 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2012-03-12 19:37:26 -0700 | 
| commit | 154a59a6e0988194c6a6e17527a30cca47a697f9 (patch) | |
| tree | 6b7e2bcd6acead9d64430847ef848f35daff0861 | |
| parent | fafd5c17c0d126e10b401199cd4f01f7786deef8 (diff) | |
cluster/afr: Handle split-brain/all-fool xattrs for directory
In case of split-brain/all-fool xattrs perform conservative merge.
Don't treat ignorant subvol as fool.
Change-Id: I3044d388d816d79268fec170d202ef23e7d5bf1c
BUG: 765528
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.com/2674
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 223 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 17 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 40 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 48 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 4 | 
6 files changed, 158 insertions, 176 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 377c72a88..db39512d2 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -157,6 +157,7 @@ afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,  int  afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix, +                          unsigned char *ignorant_subvols,                            dict_t *xattr[], afr_transaction_type type,                            size_t child_count)  { @@ -167,12 +168,6 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,          int            i                = 0;          int            j                = 0;          int            k                = 0; -        unsigned char *ignorant_subvols = NULL; - -        ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count, -                                      gf_afr_mt_char); -        if (NULL == ignorant_subvols) -                goto out;          afr_init_pending_matrix (pending_matrix, child_count); @@ -190,7 +185,8 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,                                   * subvolume.                                   */ -                                ignorant_subvols[i] = 1; +                                if (ignorant_subvols) +                                        ignorant_subvols[i] = 1;                                  continue;                          } @@ -201,19 +197,14 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,                  }          } -        afr_mark_ignorant_subvols_as_pending (pending_matrix, -                                              ignorant_subvols, -                                              child_count); -        GF_FREE (ignorant_subvols); -out:          return ret;  }  typedef enum { +        AFR_NODE_INVALID,          AFR_NODE_INNOCENT,          AFR_NODE_FOOL,          AFR_NODE_WISE, -        AFR_NODE_INVALID = -1,  } afr_node_type;  typedef struct { @@ -467,23 +458,18 @@ out:  int  afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs, -                                 int32_t *valid_children, int child_count, -                                 uint32_t uid) +                                 int32_t *success_children, +                                 unsigned int child_count, uint32_t uid)  {          int     i        = 0;          int     nsources = 0;          int     child    = 0; -        GF_ASSERT (bufs); -        GF_ASSERT (valid_children); -        GF_ASSERT (sources); -        GF_ASSERT (child_count > 0); -          for (i = 0; i < child_count; i++) { -                if (-1 == valid_children[i]) -                        continue; +                if (-1 == success_children[i]) +                        break; -                child = valid_children[i]; +                child = success_children[i];                  if (uid == bufs[child].ia_uid) {                          sources[child] = 1;                          nsources++; @@ -493,21 +479,17 @@ afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,  }  int -afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children, -                               int child_count) +afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children, +                               unsigned int child_count)  {          int     i        = 0;          int     smallest = -1;          int     child    = 0; -        GF_ASSERT (bufs); -        GF_ASSERT (valid_children); -        GF_ASSERT (child_count > 0); -          for (i = 0; i < child_count; i++) { -                if (-1 == valid_children[i]) -                        continue; -                child = valid_children[i]; +                if (-1 == success_children[i]) +                        break; +                child = success_children[i];                  if ((smallest == -1) ||                      (bufs[child].ia_uid < bufs[smallest].ia_uid)) {                          smallest = child; @@ -517,20 +499,20 @@ afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,  }  static int -afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *valid_children, +afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,                                    int child_count, int32_t *sources)  {          int   nsources              = 0;          int   smallest              = 0; -        smallest = afr_get_child_with_lowest_uid (bufs, valid_children, +        smallest = afr_get_child_with_lowest_uid (bufs, success_children,                                                    child_count);          if (smallest < 0) {                  nsources = -1;                  goto out;          }          nsources = afr_mark_child_as_source_by_uid (sources, bufs, -                                                    valid_children, child_count, +                                                    success_children, child_count,                                                      bufs[smallest].ia_uid);  out:          return nsources; @@ -560,7 +542,7 @@ afr_get_character_str (afr_node_type type)  afr_node_type  afr_find_child_character_type (int32_t *pending_row, int32_t child, -                               int32_t child_count, const char *xlator_name) +                               unsigned int child_count)  {          afr_node_type type = AFR_NODE_INVALID; @@ -574,11 +556,6 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child,                  type = AFR_NODE_FOOL;          else if (afr_sh_is_wise (pending_row, child, child_count))                  type = AFR_NODE_WISE; -        else -                GF_ASSERT (0); - -        gf_log (xlator_name, GF_LOG_DEBUG, "child %d character %s", -                child, afr_get_character_str (type));          return type;  } @@ -586,43 +563,76 @@ int  afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,                     int32_t **pending_matrix, int32_t *sources,                     int32_t *success_children, afr_transaction_type type, -                   afr_source_flags_t *flags) +                   int32_t *subvol_status, gf_boolean_t ignore_ignorant)  {          afr_private_t           *priv = NULL;          afr_self_heal_type      sh_type    = AFR_SELF_HEAL_INVALID;          int                     nsources   = -1; +        unsigned char           *ignorant_subvols = NULL; +        unsigned int            child_count = 0;          priv = this->private; +        child_count = priv->child_count;          if (afr_get_children_count (success_children, priv->child_count) == 0)                  goto out; +        if (!ignore_ignorant) { +                ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), +                                              child_count, gf_afr_mt_char); +                if (NULL == ignorant_subvols) +                        goto out; +        } +          afr_build_pending_matrix (priv->pending_key, pending_matrix, -                                  xattr, type, priv->child_count); +                                  ignorant_subvols, xattr, type, +                                  priv->child_count); +        if (!ignore_ignorant) +                afr_mark_ignorant_subvols_as_pending (pending_matrix, +                                                      ignorant_subvols, +                                                      priv->child_count);          sh_type = afr_self_heal_type_for_transaction (type);          if (AFR_SELF_HEAL_INVALID == sh_type)                  goto out;          afr_sh_print_pending_matrix (pending_matrix, this); -        nsources = afr_mark_sources (sources, pending_matrix, bufs, -                                     priv->child_count, sh_type, -                                     success_children, this->name, flags); +        nsources = afr_mark_sources (this, sources, pending_matrix, bufs, +                                     sh_type, success_children, subvol_status);  out: +        GF_FREE (ignorant_subvols);          return nsources;  }  void -afr_mark_valid_children_sources (int32_t *sources, int32_t *valid_children, -                                 unsigned int child_count) +afr_find_character_types (afr_node_character *characters, +                          int32_t **pending_matrix, int32_t *success_children, +                          unsigned int child_count) +{ +        afr_node_type type  = AFR_NODE_INVALID; +        int           child = 0; +        int           i     = 0; + +        for (i = 0; i < child_count; i++) { +                child = success_children[i]; +                if (child == -1) +                        break; +                type = afr_find_child_character_type (pending_matrix[child], +                                                      child, child_count); +                characters[child].type = type; +        } +} + +void +afr_mark_success_children_sources (int32_t *sources, int32_t *success_children, +                                   unsigned int child_count)  {          int i = 0; -        memset (sources, 0, sizeof (*sources) * child_count);          for (i = 0; i < child_count; i++) { -                if (valid_children[i] == -1) +                if (success_children[i] == -1)                          break; -                sources[valid_children[i]] = 1; +                sources[success_children[i]] = 1;          }  } @@ -646,23 +656,23 @@ afr_mark_valid_children_sources (int32_t *sources, int32_t *valid_children,   * a split-brain. If one wise node refers to the other but the other doesn't   * refer back, the referrer is a source.   * - * All fools are sinks, unless there are no 'wise' nodes. if 'allfools' is NULL, - * biggest fool(s) is/are marked as source. + * All fools are sinks, unless there are no 'wise' nodes. In that case, + * one of the fools is made a source.   */  int -afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, -                  int32_t child_count, afr_self_heal_type type, -                  int32_t *valid_children, const char *xlator_name, -                  afr_source_flags_t *flags) +afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix, +                  struct iatt *bufs, afr_self_heal_type type, +                  int32_t *success_children, int32_t *subvol_status)  {          /* stores the 'characters' (innocent, fool, wise) of the nodes */ -          afr_node_character *characters =  NULL; -        int            i              = 0; -        int            nsources       = -1; -        xlator_t      *this           = NULL; +        int                nsources    = -1; +        unsigned int       child_count = 0; +        afr_private_t      *priv       = NULL; +        priv = this->private; +        child_count = priv->child_count;          characters = GF_CALLOC (sizeof (afr_node_character),                                  child_count, gf_afr_mt_afr_node_character);          if (!characters) @@ -671,26 +681,14 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,          this = THIS;          /* start clean */ -        for (i = 0; i < child_count; i++) { -                sources[i] = 0; -        } - +        memset (sources, 0, sizeof (*sources) * child_count);          nsources = 0; -        for (i = 0; i < child_count; i++) { -                characters[i].type = -                        afr_find_child_character_type (pending_matrix[i], i, -                                                       child_count, -                                                       xlator_name); -                if (AFR_NODE_INVALID == characters[i].type) -                        gf_log (xlator_name, GF_LOG_WARNING, -                                "child %d had invalid xattrs", i); -        } - -        if ((type == AFR_SELF_HEAL_METADATA) -            && afr_sh_all_nodes_innocent (characters, child_count)) { - -                nsources = afr_sh_mark_lowest_uid_as_source (bufs, -                                                             valid_children, +        afr_find_character_types (characters, pending_matrix, success_children, +                                  child_count); +        if (afr_sh_all_nodes_innocent (characters, child_count)) { +                if (type == AFR_SELF_HEAL_METADATA) +                        nsources = afr_sh_mark_lowest_uid_as_source (bufs, +                                                             success_children,                                                               child_count,                                                               sources);                  goto out; @@ -700,24 +698,17 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,                  afr_sh_compute_wisdom (pending_matrix, characters, child_count);                  if (afr_sh_wise_nodes_conflict (characters, child_count)) { -                        /* split-brain */ -                        gf_log (this->name, GF_LOG_INFO, -                                "split-brain possible, no source detected"); -                        if (flags) -                                *flags |= AFR_SPLIT_BRAIN; +                        if (subvol_status) +                                *subvol_status |= SPLIT_BRAIN;                          nsources = -1; -                  } else {                          nsources = afr_sh_mark_wisest_as_sources (sources,                                                                    characters,                                                                    child_count);                  }          } else { -                if (flags) { -                        *flags |= AFR_ALL_FOOLS; -                        nsources = -1; -                        goto out; -                } +                if (subvol_status) +                        *subvol_status |= ALL_FOOLS;                  nsources = afr_mark_biggest_of_fools_as_source (sources,                                                                  pending_matrix,                                                                  characters, @@ -726,10 +717,9 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,  out:          if (nsources == 0) -                afr_mark_valid_children_sources (sources, valid_children, -                                                 child_count); -        if (characters) -                GF_FREE (characters); +                afr_mark_success_children_sources (sources, success_children, +                                                   child_count); +        GF_FREE (characters);          gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);          return nsources; @@ -1261,7 +1251,8 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,          nsources = afr_build_sources (this, sh->xattr, sh->buf,                                        sh->pending_matrix, sh->sources,                                        sh->child_success, -                                      afr_transaction_type_get (ia_type), NULL); +                                      afr_transaction_type_get (ia_type), +                                      NULL, _gf_false);          if (nsources < 0) {                  gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"                          " in missing entry self-heal, continuing with the rest" @@ -1686,13 +1677,13 @@ static void  afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,                             int32_t op_ret, int32_t op_errno)  { -        afr_self_heal_t    *sh  = NULL; -        afr_private_t      *priv = NULL; -        afr_local_t        *local = NULL; -        int                enoent_count = 0; -        int                nsources = 0; -        int                source  = -1; -        afr_source_flags_t flags = 0; +        afr_self_heal_t *sh  = NULL; +        afr_private_t   *priv = NULL; +        afr_local_t     *local = NULL; +        int             enoent_count = 0; +        int             nsources = 0; +        int             source  = -1; +        int32_t         subvol_status = 0;          local = frame->local;          sh = &local->self_heal; @@ -1722,22 +1713,22 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,          nsources = afr_build_sources (this, sh->xattr, sh->buf,                                        sh->pending_matrix, sh->sources,                                        sh->child_success, -                                      AFR_ENTRY_TRANSACTION, &flags); -        if ((nsources < 0) && !flags) { -                gf_log (this->name, GF_LOG_ERROR, "No sources for dir of %s," -                        " in missing entry self-heal, aborting self-heal", -                        local->loc.path); +                                      AFR_ENTRY_TRANSACTION, &subvol_status, +                                      _gf_true); +        if ((subvol_status & ALL_FOOLS) || +            (subvol_status & SPLIT_BRAIN)) { +                gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative " +                        "merge", sh->parent_loc.path); +                afr_mark_success_children_sources (sh->sources, +                                                   sh->child_success, +                                                   priv->child_count); +        } else if (nsources < 0) { +                gf_log (this->name, GF_LOG_ERROR, "No sources for dir " +                        "of %s, in missing entry self-heal, aborting " +                        "self-heal", local->loc.path);                  goto out;          } -        //if allfools/split-brain give the behavior of missing entry creation -        if (flags) { -                gf_log (this->name, GF_LOG_DEBUG, "%s: All subvols pending so " -                        "do missing entry creation", local->loc.path); -                afr_mark_valid_children_sources (sh->sources, sh->child_success, -                                                 priv->child_count); -        } -          source = afr_sh_select_source (sh->sources, priv->child_count);          if (source == -1) {                  GF_ASSERT (0); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index b313c17e9..1e325685d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -34,11 +34,6 @@ typedef enum {          AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,  } afr_lookup_flags_t; -typedef enum { -        AFR_SPLIT_BRAIN = 1, -        AFR_ALL_FOOLS =2 -} afr_source_flags_t; -  int  afr_sh_select_source (int sources[], int child_count); @@ -53,6 +48,7 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);  int  afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix, +                          unsigned char *ignorant_subvols,                            dict_t *xattr[], afr_transaction_type type,                            size_t child_count); @@ -62,10 +58,9 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,                           int child_count, afr_transaction_type type);  int -afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, -                  int32_t child_count, afr_self_heal_type type, -                  int32_t *valid_children, const char *xlator_name, -                  afr_source_flags_t *flags); +afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix, +                  struct iatt *bufs, afr_self_heal_type type, +                  int32_t *success_children, int32_t *subvol_status);  int  afr_sh_delta_to_xattr (afr_private_t *priv, @@ -83,10 +78,10 @@ afr_self_heal_type  afr_self_heal_type_for_transaction (afr_transaction_type type);  int -afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs, +afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,                     int32_t **pending_matrix, int32_t *sources,                     int32_t *success_children, afr_transaction_type type, -                   afr_source_flags_t *flags); +                   int32_t *subvol_status, gf_boolean_t ignore_ignorant);  void  afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);  int diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index a2bc3bb58..4aa5b9e86 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -594,15 +594,9 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)          sh = &local->self_heal;          priv = this->private; -        afr_build_pending_matrix (priv->pending_key, sh->pending_matrix, -                                  sh->xattr, AFR_DATA_TRANSACTION, -                                  priv->child_count); - -        afr_sh_print_pending_matrix (sh->pending_matrix, this); - -        nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf, -                                     priv->child_count, AFR_SELF_HEAL_DATA, -                                     sh->child_success, this->name, NULL); +         nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, +                                       sh->sources, sh->child_success, +                                       AFR_DATA_TRANSACTION, NULL, _gf_false);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -711,7 +705,7 @@ static int  afr_select_read_child_from_policy (int32_t *sources, int32_t child_count,                                     int32_t prev_read_child,                                     int32_t config_read_child, -                                   int32_t *valid_children) +                                   int32_t *success_children)  {          int32_t                  read_child = -1;          int                      i          = 0; @@ -729,7 +723,7 @@ afr_select_read_child_from_policy (int32_t *sources, int32_t child_count,                  goto out;          for (i = 0; i < child_count; i++) { -                read_child = valid_children[i]; +                read_child = success_children[i];                  if (read_child < 0)                          break;                  if (_gf_true == afr_is_fresh_read_child (sources, child_count, @@ -796,17 +790,17 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          int                      ret        = -1;          int32_t                  **pending_matrix = NULL;          int32_t                  *sources         = NULL; -        int32_t                  *valid_children  = NULL; +        int32_t                  *success_children  = NULL;          struct iatt              *bufs            = NULL;          int32_t                  nsources         = 0;          int32_t                  prev_read_child  = -1;          int32_t                  config_read_child = -1; +        int32_t                  subvol_status = 0;          afr_self_heal_t          *sh = NULL; -        afr_self_heal_type       sh_type = AFR_SELF_HEAL_INVALID;          priv = this->private;          bufs = local->cont.lookup.bufs; -        valid_children = local->cont.lookup.child_success; +        success_children = local->cont.lookup.child_success;          sh = &local->self_heal;          pending_matrix = afr_create_pending_matrix (priv->child_count); @@ -815,16 +809,12 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          sources = local->cont.lookup.sources;          memset (sources, 0, sizeof (*sources) * priv->child_count); -        afr_build_pending_matrix (priv->pending_key, pending_matrix, -                                  xattr, txn_type, priv->child_count); - -        sh_type = afr_self_heal_type_for_transaction (txn_type); -        if (AFR_SELF_HEAL_INVALID == sh_type) -                goto out; - -        nsources = afr_mark_sources (sources, pending_matrix, bufs, -                                     priv->child_count, sh_type, -                                     valid_children, this->name, NULL); +        nsources = afr_build_sources (this, xattr, bufs, pending_matrix, +                                      sources, success_children, txn_type, +                                      &subvol_status, _gf_false); +        if (subvol_status & SPLIT_BRAIN) +                gf_log (this->name, GF_LOG_WARNING, "%s: Possible split-brain", +                        local->loc.path);          if (nsources < 0) {                  ret = -1;                  goto out; @@ -836,7 +826,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,                                                          priv->child_count,                                                          prev_read_child,                                                          config_read_child, -                                                        valid_children); +                                                        success_children);          ret = 0;  out:          afr_destroy_pending_matrix (pending_matrix, priv->child_count); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 161b870f5..c7a4c2b28 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1602,7 +1602,7 @@ afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,  }  gf_boolean_t -afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources, +afr_sh_need_recreate (afr_self_heal_t *impunge_sh,                        unsigned int child, unsigned int child_count)  {          int32_t         *success_children = NULL; @@ -1610,7 +1610,6 @@ afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources,          GF_ASSERT (impunge_sh->impunging_entry_mode);          GF_ASSERT (impunge_sh->child_errno); -        GF_ASSERT (sources);          success_children = impunge_sh->child_success;          if (child == impunge_sh->active_source) { @@ -1638,7 +1637,7 @@ afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,          int             i = 0;          for (i = 0; i < child_count; i++) { -                if (afr_sh_need_recreate (impunge_sh, sources, i, child_count)) +                if (afr_sh_need_recreate (impunge_sh, i, child_count))                          count++;          } @@ -1673,8 +1672,7 @@ afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,          GF_ASSERT (recreate_count);          impunge_local->call_count = recreate_count;          for (i = 0; i < priv->child_count; i++) { -                if (afr_sh_need_recreate (impunge_sh, sh->sources, i, -                                          priv->child_count)) { +                if (afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {                          (void)afr_sh_entry_impunge_create (impunge_frame, this,                                                             i, buf,                                                             postparent); @@ -2135,12 +2133,12 @@ void  afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,                    int32_t op_ret, int32_t op_errno)  { -        afr_local_t        *local = NULL; -        afr_self_heal_t    *sh = NULL; -        afr_private_t      *priv = NULL; -        afr_source_flags_t flags = 0; - -        int nsources = 0; +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; +        afr_private_t   *priv = NULL; +        int              source = 0; +        int              nsources = 0; +        int32_t          subvol_status = 0;          local = frame->local;          sh = &local->self_heal; @@ -2158,26 +2156,30 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,                  goto heal;          } -        afr_build_pending_matrix (priv->pending_key, sh->pending_matrix, -                                  sh->xattr, AFR_ENTRY_TRANSACTION, -                                  priv->child_count); - -        afr_sh_print_pending_matrix (sh->pending_matrix, this); - -        nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf, -                                     priv->child_count, AFR_SELF_HEAL_ENTRY, -                                     sh->child_success, this->name, &flags); - -        if (nsources == 0) { +        nsources = afr_build_sources (this, sh->xattr, sh->buf, +                                      sh->pending_matrix, sh->sources, +                                      sh->child_success, +                                      AFR_ENTRY_TRANSACTION, &subvol_status, +                                      _gf_true); +        if ((subvol_status & ALL_FOOLS) || +            (subvol_status & SPLIT_BRAIN)) { +                gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative " +                        "merge", local->loc.path); +                source = -1; +                memset (sh->sources, 0, +                        sizeof (*sh->sources) * priv->child_count); +        } else if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE,                          "No self-heal needed for %s",                          local->loc.path);                  afr_sh_entry_finish (frame, this);                  return; +        } else { +                source = afr_sh_select_source (sh->sources, priv->child_count);          } -        sh->source = afr_sh_select_source (sh->sources, priv->child_count); +        sh->source = source;  heal:          afr_sh_entry_sync_prepare (frame, this); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 9b920eb15..0dc555937 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -480,7 +480,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,          nsources = afr_build_sources (this, sh->xattr, sh->buf,                                        sh->pending_matrix, sh->sources,                                        sh->child_success, -                                      AFR_METADATA_TRANSACTION, NULL); +                                      AFR_METADATA_TRANSACTION, NULL, _gf_false);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE,                          "No self-heal needed for %s", diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f29dcae41..1e3592f65 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -221,6 +221,10 @@ typedef struct {          call_frame_t *sh_frame;  } afr_self_heal_t; +typedef enum { +        SPLIT_BRAIN = 1, +        ALL_FOOLS = 2 +} afr_subvol_status_t;  typedef enum {          AFR_DATA_TRANSACTION,          /* truncate, write, ... */  | 
