summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t64
-rw-r--r--tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t130
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c4
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c2
4 files changed, 133 insertions, 67 deletions
diff --git a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
deleted file mode 100644
index 054a4ad..0000000
--- a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-cleanup;
-
-TEST glusterd
-TEST pidof glusterd
-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
-TEST $CLI volume start $V0
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
-TEST $CLI volume set $V0 cluster.metadata-self-heal off
-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
-TEST touch $M0/file
-
-# Kill B1, create a pending metadata heal.
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST setfattr -n user.xattr -v value1 $M0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
-
-# Kill B2, heal from B3 to B1.
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
-TEST kill_brick $V0 $H0 $B0/${V0}1
-TEST $CLI volume set $V0 cluster.self-heal-daemon on
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-$CLI volume heal $V0
-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata"
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
-
-# Create another pending metadata heal.
-TEST setfattr -n user.xattr -v value2 $M0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
-
-# Kill B1, heal from B3 to B2
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST $CLI volume set $V0 cluster.self-heal-daemon on
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-$CLI volume heal $V0
-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata"
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
-
-# ALL bricks up again.
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
-# B1 and B2 blame each other, B3 doesn't blame anyone.
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
-TEST $CLI volume set $V0 cluster.self-heal-daemon on
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
-TEST $CLI volume heal $V0
-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
-
-cleanup;
diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
new file mode 100644
index 0000000..94b8bf3
--- /dev/null
+++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume heal $V0 disable
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+###############################################################################
+# Case of 2 bricks blaming the third and the third blaming the other two.
+
+TEST mkdir $M0/dir
+
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata
+CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir)
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir)
+
+TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ]
+TEST setfattr -x user.metadata $M0/dir
+
+###############################################################################
+# Case of each brick blaming the next one in a cyclic manner
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# Mark cyclic xattrs and modify metadata directly on the bricks.
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file
+
+setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file))
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+TEST rm -f $M0/dir/file
+
+###############################################################################
+# Case of 2 bricks having quorum blaming and the other having only one blaming.
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir/file
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+
+# B0 must blame B2
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+
+# Modify the metadata directly on the bricks B1 & B2.
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+
+###############################################################################
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 2a00f5f..8d7699b 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1591,7 +1591,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION) {
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) {
min_participants = priv->child_count;
} else {
min_participants = AFR_SH_MIN_PARTICIPANTS;
@@ -1657,7 +1657,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION)
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
afr_selfheal_post_op_failure_accounting(priv, accused, sources,
locked_on);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index ba43341..ecfa791 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -398,7 +398,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}