From d2a05724a647c9ca38e14cc35be405074ff6795d Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 8 Nov 2011 22:21:30 +0530 Subject: cluster/afr: Handle split-brain/all-fool xattrs for directory Change-Id: I058ed91494e1a9bbcf5e6e6c49e8ee4f7e014e23 BUG: 3796 Reviewed-on: http://review.gluster.com/695 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-self-heal-common.c | 68 +++++++++++++++++------- xlators/cluster/afr/src/afr-self-heal-common.h | 11 +++- xlators/cluster/afr/src/afr-self-heal-data.c | 4 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 17 +++--- xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 +- 5 files changed, 67 insertions(+), 35 deletions(-) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 4f3a556b6..449c386b7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -585,7 +585,8 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child, int afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs, int32_t **pending_matrix, int32_t *sources, - int32_t *success_children, afr_transaction_type type) + int32_t *success_children, afr_transaction_type type, + afr_source_flags_t *flags) { afr_private_t *priv = NULL; afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID; @@ -607,11 +608,24 @@ afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs, nsources = afr_mark_sources (sources, pending_matrix, bufs, priv->child_count, sh_type, - success_children, this->name); + success_children, this->name, flags); out: return nsources; } +void +afr_mark_valid_children_sources (int32_t *sources, int32_t *valid_children, + unsigned int child_count) +{ + int i = 0; + memset (sources, 0, sizeof (*sources) * child_count); + for (i = 0; i < child_count; i++) { + if (valid_children[i] == -1) + break; + sources[valid_children[i]] = 1; + } +} + /** * mark_sources: Mark all 'source' nodes and return number of source * nodes found @@ -632,14 +646,15 @@ out: * a split-brain. If one wise node refers to the other but the other doesn't * refer back, the referrer is a source. * - * All fools are sinks, unless there are no 'wise' nodes. In that case, - * one of the fools is made a source. + * All fools are sinks, unless there are no 'wise' nodes. if 'allfools' is NULL, + * biggest fool(s) is/are marked as source. */ int afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, int32_t child_count, afr_self_heal_type type, - int32_t *valid_children, const char *xlator_name) + int32_t *valid_children, const char *xlator_name, + afr_source_flags_t *flags) { /* stores the 'characters' (innocent, fool, wise) of the nodes */ @@ -688,6 +703,8 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, /* split-brain */ gf_log (this->name, GF_LOG_INFO, "split-brain possible, no source detected"); + if (flags) + *flags |= AFR_SPLIT_BRAIN; nsources = -1; } else { @@ -696,6 +713,11 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, child_count); } } else { + if (flags) { + *flags |= AFR_ALL_FOOLS; + nsources = -1; + goto out; + } nsources = afr_mark_biggest_of_fools_as_source (sources, pending_matrix, characters, @@ -703,12 +725,9 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, } out: - if (nsources == 0) { - for (i = 0; i < child_count; i++) { - if (valid_children[i] != -1) - sources[valid_children[i]] = 1; - } - } + if (nsources == 0) + afr_mark_valid_children_sources (sources, valid_children, + child_count); if (characters) GF_FREE (characters); @@ -1242,7 +1261,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this, nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, sh->sources, sh->child_success, - afr_transaction_type_get (ia_type)); + afr_transaction_type_get (ia_type), NULL); if (nsources < 0) { gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," " in missing entry self-heal, continuing with the rest" @@ -1667,12 +1686,13 @@ static void afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this, int32_t op_ret, int32_t op_errno) { - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int enoent_count = 0; - int nsources = 0; - int source = -1; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int enoent_count = 0; + int nsources = 0; + int source = -1; + afr_source_flags_t flags = 0; local = frame->local; sh = &local->self_heal; @@ -1702,14 +1722,22 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this, nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, sh->sources, sh->child_success, - AFR_ENTRY_TRANSACTION); - if (nsources < 0) { + AFR_ENTRY_TRANSACTION, &flags); + if ((nsources < 0) && !flags) { gf_log (this->name, GF_LOG_ERROR, "No sources for dir of %s," " in missing entry self-heal, aborting self-heal", local->loc.path); goto out; } + //if allfools/split-brain give the behavior of missing entry creation + if (flags) { + gf_log (this->name, GF_LOG_DEBUG, "%s: All subvols pending so " + "do missing entry creation", local->loc.path); + afr_mark_valid_children_sources (sh->sources, sh->child_success, + priv->child_count); + } + source = afr_sh_select_source (sh->sources, priv->child_count); if (source == -1) { GF_ASSERT (0); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index d58fdb765..b313c17e9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -34,6 +34,11 @@ typedef enum { AFR_LOOKUP_FAIL_MISSING_GFIDS = 2, } afr_lookup_flags_t; +typedef enum { + AFR_SPLIT_BRAIN = 1, + AFR_ALL_FOOLS =2 +} afr_source_flags_t; + int afr_sh_select_source (int sources[], int child_count); @@ -59,7 +64,8 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr, int afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs, int32_t child_count, afr_self_heal_type type, - int32_t *valid_children, const char *xlator_name); + int32_t *valid_children, const char *xlator_name, + afr_source_flags_t *flags); int afr_sh_delta_to_xattr (afr_private_t *priv, @@ -79,7 +85,8 @@ afr_self_heal_type_for_transaction (afr_transaction_type type); int afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs, int32_t **pending_matrix, int32_t *sources, - int32_t *success_children, afr_transaction_type type); + int32_t *success_children, afr_transaction_type type, + afr_source_flags_t *flags); void afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count); int diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 5bd5edd66..d86dc6265 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -602,7 +602,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf, priv->child_count, AFR_SELF_HEAL_DATA, - sh->child_success, this->name); + sh->child_success, this->name, NULL); if (nsources == 0) { gf_log (this->name, GF_LOG_DEBUG, @@ -823,7 +823,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local, nsources = afr_mark_sources (sources, pending_matrix, bufs, priv->child_count, sh_type, - valid_children, this->name); + valid_children, this->name, NULL); if (nsources < 0) { ret = -1; goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 16dd46d2e..161b870f5 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1613,7 +1613,7 @@ afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources, GF_ASSERT (sources); success_children = impunge_sh->child_success; - if (sources[child] || (child == impunge_sh->active_source)) { + if (child == impunge_sh->active_source) { GF_ASSERT (afr_is_child_present (success_children, child_count, child)); goto out; @@ -2135,10 +2135,10 @@ void afr_sh_entry_fix (call_frame_t *frame, xlator_t *this, int32_t op_ret, int32_t op_errno) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int source = 0; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + afr_source_flags_t flags = 0; int nsources = 0; @@ -2166,7 +2166,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this, nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf, priv->child_count, AFR_SELF_HEAL_ENTRY, - sh->child_success, this->name); + sh->child_success, this->name, &flags); if (nsources == 0) { gf_log (this->name, GF_LOG_TRACE, @@ -2177,10 +2177,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this, return; } - source = afr_sh_select_source (sh->sources, priv->child_count); - - sh->source = source; - + sh->source = afr_sh_select_source (sh->sources, priv->child_count); heal: afr_sh_entry_sync_prepare (frame, this); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 5727209c5..9b920eb15 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -480,7 +480,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this, nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, sh->sources, sh->child_success, - AFR_METADATA_TRANSACTION); + AFR_METADATA_TRANSACTION, NULL); if (nsources == 0) { gf_log (this->name, GF_LOG_TRACE, "No self-heal needed for %s", -- cgit