summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src
diff options
context:
space:
mode:
authorkarthik-us <ksubrahm@redhat.com>2019-06-06 10:59:42 +0530
committerAmar Tumballi <amarts@redhat.com>2019-06-10 14:48:11 +0000
commit1b0b869d91d4e5bedc69922128551602dc4bbc13 (patch)
treeffe933d364081a54d62eab34644f712e5ce2475c /xlators/cluster/afr/src
parent5b35b18be6a04ed4dd69dc71e5fb436c212c15fc (diff)
Cluster/afr: Don't treat all bricks having metadata pending as split-brain
Problem: We currently don't have a roll-back/undoing of post-ops if quorum is not met. Though the FOP is still unwound with failure, the xattrs remain on the disk. Due to these partial post-ops and partial heals (healing only when 2 bricks are up), we can end up in metadata split-brain purely from the afr xattrs point of view i.e each brick is blamed by atleast one of the others for metadata. These scenarios are hit when there is frequent connect/disconnect of the client/shd to the bricks. Fix: Pick a source based on the xattr values. If 2 bricks blame one, the blamed one must be treated as sink. If there is no majority, all are sources. Once we pick a source, self-heal will then do the heal instead of erroring out due to split-brain. This patch also adds restriction of all the bricks to be up to perform metadata heal to avoid any metadata loss. Removed the test case tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t as it was doing metadata heal even when only 2 of 3 bricks were up. Change-Id: I07a9d62f84ceda329dcab1f02a33aeed258dcb09 fixes: bz#1717819 Signed-off-by: karthik-us <ksubrahm@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c4
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c2
2 files changed, 3 insertions, 3 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 81fd8d65076..298897526f7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1590,7 +1590,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION) {
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) {
min_participants = priv->child_count;
} else {
min_participants = AFR_SH_MIN_PARTICIPANTS;
@@ -1656,7 +1656,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION)
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
afr_selfheal_post_op_failure_accounting(priv, accused, sources,
locked_on);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index ba43341d9c0..ecfa791b8cb 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -398,7 +398,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}