diff options
4 files changed, 133 insertions, 67 deletions
diff --git a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t deleted file mode 100644 index 054a4adb90d..00000000000 --- a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} -TEST $CLI volume start $V0 -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST $CLI volume set $V0 cluster.metadata-self-heal off -TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; -TEST touch $M0/file - -# Kill B1, create a pending metadata heal. -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST setfattr -n user.xattr -v value1 $M0/file -EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file -EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file - -# Kill B2, heal from B3 to B1. -TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -$CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata" -TEST $CLI volume set $V0 cluster.self-heal-daemon off - -# Create another pending metadata heal. -TEST setfattr -n user.xattr -v value2 $M0/file -EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file -EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file - -# Kill B1, heal from B3 to B2 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -$CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata" -TEST $CLI volume set $V0 cluster.self-heal-daemon off - -# ALL bricks up again. -TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -# B1 and B2 blame each other, B3 doesn't blame anyone. -EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file -EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file -EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file -EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 - -cleanup; diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t new file mode 100644 index 00000000000..94b8bf3887e --- /dev/null +++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t @@ -0,0 +1,130 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +## Start and create a volume +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume info; + +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +TEST $CLI volume heal $V0 disable +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +############################################################################### +# Case of 2 bricks blaming the third and the third blaming the other two. + +TEST mkdir $M0/dir + +# B0 and B2 must blame B1 +TEST kill_brick $V0 $H0 $B0/$V0"1" +TEST setfattr -n user.metadata -v 1 $M0/dir +EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata +EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata +CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir) + +# B1 must blame B0 and B2 +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir +setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir + +# Launch heal +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir) +B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir) +B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir) + +TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ] +TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ] +TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ] +TEST setfattr -x user.metadata $M0/dir + +############################################################################### +# Case of each brick blaming the next one in a cyclic manner + +TEST $CLI volume heal $V0 disable +TEST `echo "hello" >> $M0/dir/file` +# Mark cyclic xattrs and modify metadata directly on the bricks. +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file +setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file + +setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file +setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file +setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file + +# Add entry to xattrop dir to trigger index heal. +xattrop_dir0=$(afr_get_index_path $B0/$V0"0") +base_entry_b0=`ls $xattrop_dir0` +gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file)) +ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str +EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0 + +# Launch heal +TEST $CLI volume heal $V0 enable +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file) +B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file) +B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file) + +TEST [ "$B0_XATTR" == "$B1_XATTR" ] +TEST [ "$B0_XATTR" == "$B2_XATTR" ] +TEST rm -f $M0/dir/file + +############################################################################### +# Case of 2 bricks having quorum blaming and the other having only one blaming. + +TEST $CLI volume heal $V0 disable +TEST `echo "hello" >> $M0/dir/file` +# B0 and B2 must blame B1 +TEST kill_brick $V0 $H0 $B0/$V0"1" +TEST setfattr -n user.metadata -v 1 $M0/dir/file +EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata +EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata + +# B1 must blame B0 and B2 +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file +setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file + +# B0 must blame B2 +setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file + +# Modify the metadata directly on the bricks B1 & B2. +setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file +setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file + +# Launch heal +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 + +B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file) +B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file) +B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file) + +TEST [ "$B0_XATTR" == "$B1_XATTR" ] +TEST [ "$B0_XATTR" == "$B2_XATTR" ] + +############################################################################### + +cleanup diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 2a00f5f9bfa..8d7699b227b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1591,7 +1591,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, } } - if (type == AFR_DATA_TRANSACTION) { + if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { min_participants = priv->child_count; } else { min_participants = AFR_SH_MIN_PARTICIPANTS; @@ -1657,7 +1657,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, } } - if (type == AFR_DATA_TRANSACTION) + if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) afr_selfheal_post_op_failure_accounting(priv, accused, sources, locked_on); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index ba43341d9c0..ecfa791b8cb 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -398,7 +398,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode) ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, data_lock); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { ret = -ENOTCONN; goto unlock; } |