summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2012-06-27 16:42:35 +0530
committerAnand Avati <avati@redhat.com>2012-07-26 10:14:54 -0700
commitf153c835807ac31006ba690b1deb47b20b51bc83 (patch)
tree563b61874ec5304116d92f39bb6e1f749b1c6d3f /xlators
parentc2a7a22bfe18316eab441d49e515726e53f74582 (diff)
cluster/afr: Modified split-brain handling
RCA The bug is observed because the decision to mark a file in split-brain is taken outside appropriate locks. Lookup gathers xattrs outside any lock. The xattrs being in split-brain in lookup should only be taken as a hint. Appropriate inodelks should be taken before confirming a split-brain. Self-heal confirms this at the moment. If data/metadata self-heal is turned off, inspecting of xattrs could not be performed so split-brain behavior does not work correctly if the self-heal options are turned off. Fix Self-heals are launched to inspect xattrs even when the data/metadata self-heal options are turned off. The decision to heal data/metadata after the xattrs are inspected is based on whether the options are turned on/off. So decision to set/reset split-brain flag is taken inside appropriate locks. Testcases: tests 33-36 in https://github.com/pranithk/gluster-tests/blob/master/afr/self-heal.sh Change-Id: Ia8aeab08208b50c06609ad35a9d72f3d553ee343 BUG: 833727 Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: http://review.gluster.com/3626 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c81
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c3
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c63
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c50
-rw-r--r--xlators/cluster/afr/src/afr.h6
5 files changed, 146 insertions, 57 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index b6e16074364..70a9cd35463 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -908,6 +908,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (local->cont.lookup.success_children);
GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
}
{ /* getxattr */
@@ -1181,6 +1183,36 @@ afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
}
}
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
static void
afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
struct iatt *buf, struct iatt *lookup_buf)
@@ -1214,8 +1246,26 @@ afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
}
static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
- gf_boolean_t split_brain)
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
+{
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_WARNING,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
{
GF_ASSERT (local);
GF_ASSERT (this);
@@ -1233,15 +1283,6 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
goto out;
}
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
-
out:
return;
}
@@ -1252,6 +1293,8 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
GF_ASSERT (sh);
GF_ASSERT (priv);
+ if (sh->force_confirm_spb)
+ return _gf_true;
return (sh->do_gfid_self_heal
|| sh->do_missing_entry_self_heal
|| (afr_data_self_heal_enabled (priv->data_self_heal) &&
@@ -1516,13 +1559,11 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
int32_t child1 = -1;
int32_t child2 = -1;
afr_self_heal_t *sh = NULL;
- gf_boolean_t split_brain = _gf_false;
priv = this->private;
sh = &local->self_heal;
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- afr_detect_self_heal_by_lookup_status (local, this, split_brain);
+ afr_detect_self_heal_by_lookup_status (local, this);
if (afr_lookup_gfid_missing_count (local, this))
local->self_heal.do_gfid_self_heal = _gf_true;
@@ -1549,9 +1590,11 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
afr_lookup_set_self_heal_params_by_xattr (local, this,
xattr[child1]);
}
- if (afr_open_only_data_self_heal (priv->data_self_heal)
- && !split_brain)
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
}
int
@@ -2143,6 +2186,7 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
struct iatt *iatts = NULL;
int32_t *success_children = NULL;
int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
GF_ASSERT (local);
local->cont.lookup.xattrs = GF_CALLOC (child_count,
@@ -2175,6 +2219,11 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
goto out;
local->cont.lookup.sources = sources;
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
ret = 0;
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index f082685e207..48fa31d8689 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2095,6 +2095,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
shc->do_data_self_heal = sh->do_data_self_heal;
shc->do_metadata_self_heal = sh->do_metadata_self_heal;
shc->do_entry_self_heal = sh->do_entry_self_heal;
+ shc->force_confirm_spb = sh->force_confirm_spb;
shc->forced_merge = sh->forced_merge;
shc->background = sh->background;
shc->type = sh->type;
@@ -2162,7 +2163,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
local = bgsh_frame->local;
sh = &local->self_heal;
- if (local->govinda_gOvinda)
+ if (local->govinda_gOvinda || sh->mdata_spb || sh->data_spb)
split_brain = _gf_true;
afr_set_split_brain (this, sh->inode, split_brain);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 1672b4f282e..6619c1353c7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -688,16 +688,6 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
sh->sources, sh->success_children,
AFR_DATA_TRANSACTION, NULL, _gf_true);
- if ((nsources == 0) && !sh->sync_done) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
-
- local->govinda_gOvinda = 0;
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -720,12 +710,12 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
"split-brain). Please delete the file from all but "
"the preferred subvolume.", local->loc.path);
- local->govinda_gOvinda = 1;
+ sh->data_spb = _gf_true;
afr_sh_data_fail (frame, this);
return 0;
}
- local->govinda_gOvinda = 0;
+ sh->data_spb = _gf_false;
ret = afr_sh_inode_set_read_ctx (sh, this);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -738,7 +728,20 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
if (sh->sync_done) {
afr_sh_data_setattr (frame, this);
} else {
- afr_sh_data_fix (frame, this);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ afr_sh_data_fix (frame, this);
+ else
+ afr_sh_data_finish (frame, this);
}
return 0;
}
@@ -764,11 +767,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
bufs = local->cont.lookup.bufs;
success_children = local->cont.lookup.success_children;
- pending_matrix = afr_matrix_create (priv->child_count,
- priv->child_count);
- if (NULL == pending_matrix)
- goto out;
-
+ pending_matrix = local->cont.lookup.pending_matrix;
sources = local->cont.lookup.sources;
memset (sources, 0, sizeof (*sources) * priv->child_count);
@@ -780,9 +779,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
local->loc.path);
switch (txn_type) {
case AFR_DATA_TRANSACTION:
- afr_set_split_brain (this,
- local->cont.lookup.inode,
- _gf_true);
+ local->cont.lookup.possible_spb = _gf_true;
nsources = 1;
sources[success_children[0]] = 1;
break;
@@ -809,7 +806,6 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
sources,
priv->hash_mode, gfid);
out:
- afr_matrix_cleanup (pending_matrix, priv->child_count);
gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
read_child);
return read_child;
@@ -1351,6 +1347,17 @@ afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)
return 0;
}
+gf_boolean_t
+afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ return _gf_true;
+ return _gf_false;
+}
+
int
afr_self_heal_data (call_frame_t *frame, xlator_t *this)
{
@@ -1361,10 +1368,14 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- local->govinda_gOvinda = afr_is_split_brain (this, sh->inode);
-
- if (sh->do_data_self_heal &&
- afr_data_self_heal_enabled (priv->data_self_heal)) {
+ /* Self-heal completion cbk changes inode split-brain status based on
+ * govinda_gOvinda, mdata_spb, data_spb value.
+ * Initialize data_spb with current split-brain status.
+ * If for some reason self-heal fails(locking phase etc), it makes sure
+ * we retain the split-brain status before this self-heal started.
+ */
+ sh->data_spb = afr_is_split_brain (this, sh->inode);
+ if (afr_can_start_data_self_heal (sh, priv)) {
if (IA_ISREG (sh->type)) {
afr_sh_data_open (frame, this);
} else {
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 147ab98d409..79ce07d0039 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -50,7 +50,7 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
afr_sh_reset (frame, this);
- if (local->govinda_gOvinda) {
+ if (sh->mdata_spb) {
gf_log (this->name, GF_LOG_INFO,
"split-brain detected, aborting selfheal of %s",
local->loc.path);
@@ -383,15 +383,6 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
sh->pending_matrix, sh->sources,
sh->success_children,
AFR_METADATA_TRANSACTION, NULL, _gf_false);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -413,7 +404,17 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
"(possible split-brain). Please fix the file on "
"all backend volumes", local->loc.path);
- local->govinda_gOvinda = 1;
+ sh->mdata_spb = _gf_true;
+
+ afr_sh_metadata_finish (frame, this);
+ goto out;
+ }
+
+ sh->mdata_spb = _gf_false;
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
afr_sh_metadata_finish (frame, this);
goto out;
@@ -452,7 +453,10 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
sh->fresh_children);
}
- afr_sh_metadata_sync_prepare (frame, this);
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ afr_sh_metadata_sync_prepare (frame, this);
+ else
+ afr_sh_metadata_finish (frame, this);
out:
return;
}
@@ -512,17 +516,35 @@ afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
return 0;
}
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = this->private;
-
+ afr_self_heal_t *sh = &local->self_heal;
local = frame->local;
+ sh = &local->self_heal;
+
+ /* Self-heal completion cbk changes inode split-brain status based on
+ * govinda_gOvinda, mdata_spb, data_spb value.
+ * Initialize mdata_spb with current split-brain status.
+ * If for some reason self-heal fails(locking phase etc), it makes sure
+ * we retain the split-brain status before this self-heal started.
+ */
+ sh->mdata_spb = afr_is_split_brain (this, sh->inode);
- if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) {
+ if (afr_can_start_metadata_self_heal (sh, priv)) {
afr_sh_metadata_lock (frame, this);
} else {
afr_sh_metadata_done (frame, this);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index abb95533449..5ed478c4a2f 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -170,6 +170,8 @@ typedef struct {
gf_boolean_t do_entry_self_heal;
gf_boolean_t do_gfid_self_heal;
gf_boolean_t do_missing_entry_self_heal;
+ gf_boolean_t force_confirm_spb; /* Check for split-brains even when
+ self-heal is turned off */
gf_boolean_t forced_merge; /* Is this a self-heal triggered to
forcibly merge the directories? */
@@ -266,6 +268,8 @@ typedef struct {
int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
+ gf_boolean_t mdata_spb;
+ gf_boolean_t data_spb;
call_frame_t *sh_frame;
} afr_self_heal_t;
@@ -444,7 +448,9 @@ typedef struct _afr_local {
int32_t read_child;
int32_t *sources;
int32_t *success_children;
+ int32_t **pending_matrix;
gf_boolean_t fresh_lookup;
+ gf_boolean_t possible_spb;
} lookup;
struct {