From 32cb6d60ca5a40a15fee892aa0da9b373f6ca02e Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 18 Jul 2012 10:28:18 +0530 Subject: cluster/afr: Avoid setting split-brain outside inode locks RCA: The bug is observed because the decision to mark a file in split-brain is taken outside appropriate locks. Lookup gathers xattrs outside any lock. The xattrs being in split-brain in lookup should only be taken as a hint. Appropriate inodelks should be taken before confirming a split-brain. Self-heal confirms this at the moment. Fix: Self-heals are launched to inspect xattrs when the data/metadata self-heal options are turned on. Decision to set/reset split-brain flag is taken inside appropriate locks. Known Issue After fix: If data/metadata self-heal is turned off, inspecting of xattrs could not be performed so split-brain behavior does not work correctly if the self-heal options are turned off. This bug is handled only in upstream. Change-Id: I59a43d5ce7bf9ca35bff54a51bf4cfa55d717a9e BUG: 833727 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.com/3691 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 3 +- xlators/cluster/afr/src/afr-self-heal-common.c | 2 +- xlators/cluster/afr/src/afr-self-heal-data.c | 36 +++++++++++++----------- xlators/cluster/afr/src/afr-self-heal-metadata.c | 33 ++++++++++++++-------- xlators/cluster/afr/src/afr.h | 6 ++-- 5 files changed, 46 insertions(+), 34 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 7973dfdde04..f416ed921ce 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1220,7 +1220,7 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this, if ((local->success_count > 0) && split_brain && IA_ISREG (local->cont.lookup.inode->ia_type)) { local->self_heal.do_data_self_heal = _gf_true; - local->self_heal.do_gfid_self_heal = _gf_true; + local->self_heal.do_metadata_self_heal = _gf_true; gf_log (this->name, GF_LOG_WARNING, "split brain detected during lookup of %s.", local->loc.path); @@ -1504,6 +1504,7 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this) sh = &local->self_heal; split_brain = afr_is_split_brain (this, local->cont.lookup.inode); + split_brain = split_brain || local->cont.lookup.possible_spb; afr_detect_self_heal_by_lookup_status (local, this, split_brain); if (afr_lookup_gfid_missing_count (local, this)) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 9e493092e2a..be8809a5de7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2144,7 +2144,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) local = bgsh_frame->local; sh = &local->self_heal; - if (local->govinda_gOvinda) + if (local->govinda_gOvinda || sh->mdata_spb || sh->data_spb) split_brain = _gf_true; afr_set_split_brain (this, sh->inode, split_brain); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 90a2af18b5a..72bf8d4b917 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -740,16 +740,6 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, sh->sources, sh->success_children, AFR_DATA_TRANSACTION, NULL, _gf_true); - if ((nsources == 0) && !sh->sync_done) { - gf_log (this->name, GF_LOG_DEBUG, - "No self-heal needed for %s", - local->loc.path); - - local->govinda_gOvinda = 0; - afr_sh_data_finish (frame, this); - return 0; - } - if ((nsources == -1) && (priv->favorite_child != -1) && (sh->child_errno[priv->favorite_child] == 0)) { @@ -772,12 +762,12 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) "split-brain). Please delete the file from all but " "the preferred subvolume.", local->loc.path); - local->govinda_gOvinda = 1; + sh->data_spb = _gf_true; afr_sh_data_fail (frame, this); return 0; } - local->govinda_gOvinda = 0; + sh->data_spb = _gf_false; ret = afr_sh_inode_set_read_ctx (sh, this); if (ret) { gf_log (this->name, GF_LOG_DEBUG, @@ -790,6 +780,15 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) if (sh->sync_done) { afr_sh_data_setattr (frame, this); } else { + if (nsources == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "No self-heal needed for %s", + local->loc.path); + + afr_sh_data_finish (frame, this); + return 0; + } + afr_sh_data_fix (frame, this); } return 0; @@ -831,9 +830,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local, local->loc.path); switch (txn_type) { case AFR_DATA_TRANSACTION: - afr_set_split_brain (this, - local->cont.lookup.inode, - _gf_true); + local->cont.lookup.possible_spb = _gf_true; nsources = 1; sources[success_children[0]] = 1; break; @@ -1393,8 +1390,13 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - local->govinda_gOvinda = afr_is_split_brain (this, sh->inode); - + /* Self-heal completion cbk changes inode split-brain status based on + * govinda_gOvinda, mdata_spb, data_spb values. Initialize data_spb + * with current split-brain status. If for some reason self-heal + * fails(locking phase etc), it makes sure we retain the split-brain + * status before this self-heal started. + */ + sh->data_spb = afr_is_split_brain (this, sh->inode); if (sh->do_data_self_heal && afr_data_self_heal_enabled (priv->data_self_heal)) { afr_sh_data_open (frame, this); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 79718315cbf..6c3989e8490 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -50,7 +50,7 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; afr_sh_reset (frame, this); - if (local->govinda_gOvinda) { + if (sh->mdata_spb) { gf_log (this->name, GF_LOG_INFO, "split-brain detected, aborting selfheal of %s", local->loc.path); @@ -450,15 +450,6 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this, sh->pending_matrix, sh->sources, sh->success_children, AFR_METADATA_TRANSACTION, NULL, _gf_false); - if (nsources == 0) { - gf_log (this->name, GF_LOG_TRACE, - "No self-heal needed for %s", - local->loc.path); - - afr_sh_metadata_finish (frame, this); - goto out; - } - if ((nsources == -1) && (priv->favorite_child != -1) && (sh->child_errno[priv->favorite_child] == 0)) { @@ -480,7 +471,16 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this, "(possible split-brain). Please fix the file on " "all backend volumes", local->loc.path); - local->govinda_gOvinda = 1; + sh->mdata_spb = _gf_true; + + afr_sh_metadata_finish (frame, this); + goto out; + } + sh->mdata_spb = _gf_false; + if (nsources == 0) { + gf_log (this->name, GF_LOG_TRACE, + "No self-heal needed for %s", + local->loc.path); afr_sh_metadata_finish (frame, this); goto out; @@ -584,10 +584,19 @@ int afr_self_heal_metadata (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; afr_private_t *priv = this->private; - local = frame->local; + sh = &local->self_heal; + + /* Self-heal completion cbk changes inode split-brain status based on + * govinda_gOvinda, mdata_spb, data_spb values. Initialize mdata_spb + * with current split-brain status. If for some reason self-heal + * fails(locking phase etc), it makes sure we retain the split-brain + * status before this self-heal started. + */ + sh->mdata_spb = afr_is_split_brain (this, sh->inode); if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) { afr_sh_metadata_lock (frame, this); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 795db603528..fe0e2b2e6ed 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -261,6 +261,8 @@ typedef struct { int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this); void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this); + gf_boolean_t mdata_spb; + gf_boolean_t data_spb; call_frame_t *sh_frame; } afr_self_heal_t; @@ -368,10 +370,7 @@ typedef struct _afr_local { unsigned int call_count; unsigned int success_count; unsigned int enoent_count; - - unsigned int govinda_gOvinda; - unsigned int read_child_index; unsigned char read_child_returned; unsigned int first_up_child; @@ -439,6 +438,7 @@ typedef struct _afr_local { int32_t *sources; int32_t *success_children; gf_boolean_t fresh_lookup; + gf_boolean_t possible_spb; } lookup; struct { -- cgit