diff options
Diffstat (limited to 'tests/basic/afr')
-rw-r--r-- | tests/basic/afr/afr-anon-inode-no-quorum.t | 63 | ||||
-rw-r--r-- | tests/basic/afr/afr-anon-inode.t | 114 | ||||
-rw-r--r-- | tests/basic/afr/durability-off.t | 2 | ||||
-rw-r--r-- | tests/basic/afr/entry-self-heal-anon-dir-off.t | 459 | ||||
-rw-r--r-- | tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t | 9 | ||||
-rw-r--r-- | tests/basic/afr/halo.t | 61 | ||||
-rw-r--r-- | tests/basic/afr/rename-data-loss.t | 72 | ||||
-rw-r--r-- | tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t | 124 | ||||
-rw-r--r-- | tests/basic/afr/split-brain-favorite-child-policy.t | 203 |
9 files changed, 899 insertions, 208 deletions
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t new file mode 100644 index 00000000000..896ba0c9b2c --- /dev/null +++ b/tests/basic/afr/afr-anon-inode-no-quorum.t @@ -0,0 +1,63 @@ +#!/bin/bash + +#Test that anon-inode entry is not cleaned up as long as there exists at least +#one valid entry +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST touch $M0/a $M0/b + +gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a)) +gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b)) +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/a $M0/a-new +TEST mv $M0/b $M0/b-new + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ! ls $M0/a +TEST ! ls $M0/b +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +#Make sure index heal doesn't happen after enabling heal +TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 +TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/* +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +#Allow time for a scan +sleep 5 +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b) +TEST rm -f $M0/a-new +TEST stat $M0/b-new + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new + +cleanup diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t new file mode 100644 index 00000000000..f4cf37a2fa0 --- /dev/null +++ b/tests/basic/afr/afr-anon-inode.t @@ -0,0 +1,114 @@ +#!/bin/bash +#Tests that afr-anon-inode test cases work fine as expected +#These are cases where in entry-heal/name-heal we dont know entry for an inode +#so these inodes are kept in a special directory + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode no +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode yes +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST mkdir -p $M0/d1/b $M0/d2/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/d2/a $M0/d1 +TEST mv $M0/d1/b $M0/d2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/${V0}1/$anon_inode_name ]] +TEST [[ -d $B0/${V0}2/$anon_inode_name ]] +anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name) +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name + +TEST ! ls $M0/$anon_inode_name +EXPECT "^4$" echo $(ls -a $M0 | wc -l) + +#Test purging code path by shd +TEST $CLI volume heal $V0 disable +TEST mkdir $M0/l0 $M0/l1 $M0/l2 +TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file +TEST ln $M0/del-file $M0/del-file-link +TEST ln $M0/l0/file $M0/l1/file-link1 +TEST ln $M0/l0/file $M0/l2/file-link2 +TEST mkdir -p $M0/del-recursive-dir/d1 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST rm -f $M0/del-file $M0/del-file-nolink +TEST rm -rf $M0/del-recursive-dir +TEST mv $M0/d1/a $M0/d2 +TEST mv $M0/l0/file $M0/l0/renamed-file +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0 + +nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink)) +link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file)) +dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir)) +rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a)) +rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file)) +TEST ! stat $M0/del-file +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST ! stat $M0/del-file-nolink +TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid +TEST ! stat $M0/del-recursive-dir +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +TEST ! stat $M0/d1/a +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST ! stat $M0/l0/file +TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +TEST ! stat $M0/l1/file-link1 +TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2 +TEST ! stat $M0/l2/file-link2 +TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid + +#Simulate only anon-inodes present in all bricks +TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2 + +#Test that shd doesn't cleanup anon-inodes when some bricks are down +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume heal $V0 enable +$CLI volume heal $V0 +sleep 5 #Allow time for completion of one scan +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2 + +#Test that rename indeed happened instead of rmdir/mkdir +renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a) +EXPECT "$rename_dir_inum" echo $renamed_dir_inum +cleanup; diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t index 155ffa09ef0..6e0f18b88f8 100644 --- a/tests/basic/afr/durability-off.t +++ b/tests/basic/afr/durability-off.t @@ -26,6 +26,8 @@ TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l) +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 #Test that fsyncs happen when durability is on TEST $CLI volume set $V0 cluster.ensure-durability on TEST $CLI volume set $V0 performance.strict-write-ordering on diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t new file mode 100644 index 00000000000..7bb6ee14193 --- /dev/null +++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t @@ -0,0 +1,459 @@ +#!/bin/bash + +#This file checks if missing entry self-heal and entry self-heal are working +#as expected. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_file_type { + stat -c "%a:%F:%g:%t:%T:%u" $1 +} + +function diff_dirs { + diff <(ls $1 | sort) <(ls $2 | sort) +} + +function heal_status { + local f1_path="${1}/${3}" + local f2_path="${2}/${3}" + local insync="" + diff_dirs $f1_path $f2_path + if [ $? -eq 0 ]; + then + insync="Y" + else + insync="N" + fi + local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path) + local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path) + local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path) + local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path) + local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path) + local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path) + if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi + if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi + if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi + if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi + if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi + if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi + echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2} +} + +function is_heal_done { + local zero_xattr="000000000000000000000000" + if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ]; + then + echo "Y" + else + echo "N" + fi +} + +function print_pending_heals { + local result=":" + for i in "$@"; + do + if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ]; + then + result="$result:$i" + fi + done +#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names + if [ $result == ":" ]; then result="~"; fi + echo $result +} + +zero_xattr="000000000000000000000000" +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.use-anonymous-inode off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 +cd $M0 +#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens +#spb is split-brain, fool is all fool + +#source_self_accusing means there exists source and a sink which self-accuses. +#This simulates failures where fops failed on the bricks without it going down. +#Something like EACCESS/EDQUOT etc + +TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing +TEST mkfifo source_deletions_heal/fifo +TEST mknod source_deletions_heal/block b 4 5 +TEST mknod source_deletions_heal/char c 1 5 +TEST touch source_deletions_heal/file +TEST ln -s source_deletions_heal/file source_deletions_heal/slink +TEST mkdir source_deletions_heal/dir1 +TEST mkdir source_deletions_heal/dir1/dir2 + +TEST mkfifo source_deletions_me/fifo +TEST mknod source_deletions_me/block b 4 5 +TEST mknod source_deletions_me/char c 1 5 +TEST touch source_deletions_me/file +TEST ln -s source_deletions_me/file source_deletions_me/slink +TEST mkdir source_deletions_me/dir1 +TEST mkdir source_deletions_me/dir1/dir2 + +TEST mkfifo source_self_accusing/fifo +TEST mknod source_self_accusing/block b 4 5 +TEST mknod source_self_accusing/char c 1 5 +TEST touch source_self_accusing/file +TEST ln -s source_self_accusing/file source_self_accusing/slink +TEST mkdir source_self_accusing/dir1 +TEST mkdir source_self_accusing/dir1/dir2 + +TEST kill_brick $V0 $H0 $B0/${V0}0 + +TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0 +TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1 +TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1 +TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1 + +#Test that the files are deleted +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/slink +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/slink +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/slink +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + + +TEST mkfifo source_creations_heal/fifo +TEST mknod source_creations_heal/block b 4 5 +TEST mknod source_creations_heal/char c 1 5 +TEST touch source_creations_heal/file +TEST ln -s source_creations_heal/file source_creations_heal/slink +TEST mkdir source_creations_heal/dir1 +TEST mkdir source_creations_heal/dir1/dir2 + +TEST mkfifo source_creations_me/fifo +TEST mknod source_creations_me/block b 4 5 +TEST mknod source_creations_me/char c 1 5 +TEST touch source_creations_me/file +TEST ln -s source_creations_me/file source_creations_me/slink +TEST mkdir source_creations_me/dir1 +TEST mkdir source_creations_me/dir1/dir2 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me} + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#simulate self-accusing for source_self_accusing +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +# Check if conservative merges happened correctly on _me_ dirs +TEST stat spb_me_heal/1 +TEST stat $B0/${V0}0/spb_me_heal/1 +TEST stat $B0/${V0}1/spb_me_heal/1 + +TEST stat spb_me_heal/0 +TEST stat $B0/${V0}0/spb_me_heal/0 +TEST stat $B0/${V0}1/spb_me_heal/0 + +TEST stat fool_me/1 +TEST stat $B0/${V0}0/fool_me/1 +TEST stat $B0/${V0}1/fool_me/1 + +TEST stat fool_me/0 +TEST stat $B0/${V0}0/fool_me/0 +TEST stat $B0/${V0}1/fool_me/0 + +TEST stat v1_fool_me/0 +TEST stat $B0/${V0}0/v1_fool_me/0 +TEST stat $B0/${V0}1/v1_fool_me/0 + +TEST stat v1_fool_me/1 +TEST stat $B0/${V0}0/v1_fool_me/1 +TEST stat $B0/${V0}1/v1_fool_me/1 + +TEST stat v1_dirty_me/0 +TEST stat $B0/${V0}0/v1_dirty_me/0 +TEST stat $B0/${V0}1/v1_dirty_me/0 + +#Check if files that have gfid-mismatches in _me_ are giving EIO +TEST ! stat spb_me/0 + +#Check if stale files are deleted on access +TEST ! stat source_deletions_me/fifo +TEST ! stat $B0/${V0}0/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat source_deletions_me/block +TEST ! stat $B0/${V0}0/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat source_deletions_me/char +TEST ! stat $B0/${V0}0/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2 +TEST ! stat source_deletions_me/dir1 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 + +#Test if the files created as part of access are healed correctly +r=$(get_file_type source_creations_me/fifo) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo +TEST [ -p source_creations_me/fifo ] + +r=$(get_file_type source_creations_me/block) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block +TEST [ -b source_creations_me/block ] + +r=$(get_file_type source_creations_me/char) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char +TEST [ -c source_creations_me/char ] + +r=$(get_file_type source_creations_me/file) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file +TEST [ -f source_creations_me/file ] + +r=$(get_file_type source_creations_me/slink) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink +TEST [ -h source_creations_me/slink ] + +r=$(get_file_type source_creations_me/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2 +TEST [ -d source_creations_me/dir1/dir2 ] + +r=$(get_file_type source_creations_me/dir1) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1 +TEST [ -d source_creations_me/dir1 ] + +#Trigger heal and check _heal dirs are healed properly +#Trigger change in event generation number. That way inodes would get refreshed during lookup +TEST kill_brick $V0 $H0 $B0/${V0}1 +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +TEST stat spb_heal +TEST stat spb_me_heal +TEST stat fool_heal +TEST stat fool_me +TEST stat v1_fool_heal +TEST stat v1_fool_me +TEST stat source_deletions_heal +TEST stat source_deletions_me +TEST stat source_self_accusing +TEST stat source_creations_heal +TEST stat source_creations_me +TEST stat v1_dirty_heal +TEST stat v1_dirty_me +TEST $CLI volume stop $V0 +TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/* + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 + +TEST $CLI volume stop $V0; + +#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal +create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +$CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0; +EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing + +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me + +#Don't access the files/dirs from mount point as that may cause self-heals +# Check if conservative merges happened correctly on heal dirs +TEST stat $B0/${V0}0/spb_heal/1 +TEST stat $B0/${V0}1/spb_heal/1 + +TEST stat $B0/${V0}0/spb_heal/0 +TEST stat $B0/${V0}1/spb_heal/0 + +TEST stat $B0/${V0}0/fool_heal/1 +TEST stat $B0/${V0}1/fool_heal/1 + +TEST stat $B0/${V0}0/fool_heal/0 +TEST stat $B0/${V0}1/fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/0 +TEST stat $B0/${V0}1/v1_fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/1 +TEST stat $B0/${V0}1/v1_fool_heal/1 + +TEST stat $B0/${V0}0/v1_dirty_heal/0 +TEST stat $B0/${V0}1/v1_dirty_heal/0 + +#Check if files that have gfid-mismatches in spb are giving EIO +TEST ! stat spb/0 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}0/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}0/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}0/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}0/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}0/source_self_accusing/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + +#Test if the files created as part of full self-heal correctly +r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo +TEST [ -p $B0/${V0}0/source_creations_heal/fifo ] +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/block) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/char) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char + +r=$(get_file_type $B0/${V0}0/source_creations_heal/file) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file +TEST [ -f $B0/${V0}0/source_creations_heal/file ] + +r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink +TEST [ -h $B0/${V0}0/source_creations_heal/slink ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ] + +cd - + +#Anonymous directory shouldn't be created +TEST mkdir $M0/rename-dir +before_rename=$(STAT_INO $B0/${V0}1/rename-dir) +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/rename-dir $M0/new-name +TEST $CLI volume start $V0 force +#'spb' is in split-brain so pending-heal-count will be 2 +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 +after_rename=$(STAT_INO $B0/${V0}1/new-name) +EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l) +EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l) +EXPECT_NOT "$before_rename" echo $after_rename +cleanup diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t index f4aa351e461..35e295dc170 100644 --- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t +++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t @@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ] #We know that second brick has the bigger size file BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1) -TEST ls $M0/f3 -TEST cat $M0/f3 +TEST ls $M0 #Trigger entry heal via readdir inode refresh +TEST cat $M0/f3 #Trigger data heal via readv inode refresh EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 #gfid split-brain should be resolved @@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 -TEST ls $M0/f4 -TEST cat $M0/f4 +TEST ls $M0 #Trigger entry heal via readdir inode refresh +TEST cat $M0/f4 #Trigger data heal via readv inode refresh EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 #gfid split-brain should be resolved @@ -227,4 +227,3 @@ HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\ -f1) TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ] cleanup; -#G_TESTDEF_TEST_STATUS_NETBSD7=1501390 diff --git a/tests/basic/afr/halo.t b/tests/basic/afr/halo.t new file mode 100644 index 00000000000..3f61f5a0402 --- /dev/null +++ b/tests/basic/afr/halo.t @@ -0,0 +1,61 @@ +#!/bin/bash +#Tests that halo basic functionality works as expected + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_up_child() +{ + if [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[0\]") ]; + then + echo 0 + elif [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[1\]") ] + then + echo 1 + fi +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.halo-enabled yes +TEST $CLI volume set $V0 cluster.halo-max-replicas 1 +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[0\]" +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[1\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]" + +up_id=$(get_up_child) +TEST [[ ! -z "$up_id" ]] + +down_id=$((1-up_id)) + +TEST kill_brick $V0 $H0 $B0/${V0}${up_id} +#As max-replicas is configured to be 1, down_child should be up now +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]" +EXPECT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${up_id}\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${down_id}\]" + +#Bring the brick back up and the state should be restored +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" + +up_id=$(get_up_child) +TEST [[ ! -z "$up_id" ]] +down_id=$((1-up_id)) +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]" + +cleanup; diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t new file mode 100644 index 00000000000..256ee2aafce --- /dev/null +++ b/tests/basic/afr/rename-data-loss.t @@ -0,0 +1,72 @@ +#!/bin/bash +#Self-heal tests +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 write-behind off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +cd $M0 +TEST `echo "line1" >> file1` +TEST mkdir dir1 +TEST mkdir dir2 +TEST mkdir -p dir1/dira/dirb +TEST `echo "line1">>dir1/dira/dirb/file1` +TEST mkdir delete_me +TEST `echo "line1" >> delete_me/file1` + +#brick0 has witnessed the second write while brick1 is down. +TEST kill_brick $V0 $H0 $B0/brick1 +TEST `echo "line2" >> file1` +TEST `echo "line2" >> dir1/dira/dirb/file1` +TEST `echo "line2" >> delete_me/file1` + +#Toggle the bricks that are up/down. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/brick0 + +#Rename when the 'source' brick0 for data-selfheals is down. +mv file1 file2 +mv dir1/dira dir2 + +#Delete a dir when brick0 is down. +rm -rf delete_me +cd - + +#Bring everything up and trigger heal +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1 + +#Remount to avoid reading from caches +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "line2" tail -1 $M0/file2 +EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1 +TEST ! stat $M0/delete_me/file1 +TEST ! stat $M0/delete_me + +anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/brick0/$anon_inode_name ]] +TEST [[ -d $B0/brick1/$anon_inode_name ]] +cleanup diff --git a/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t new file mode 100644 index 00000000000..7c249c4bcbd --- /dev/null +++ b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t @@ -0,0 +1,124 @@ +#!/bin/bash + +#Test the client side split-brain resolution +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +GET_MDATA_PATH=$(dirname $0)/../../utils +build_tester $GET_MDATA_PATH/get-mdata-xattr.c + +TEST glusterd +TEST pidof glusterd + +count_files () { + ls $1 | wc -l +} + +#Create replica 2 volume +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.quorum-type fixed +TEST $CLI volume set $V0 cluster.quorum-count 1 +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on + +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST mkdir $M0/data +TEST touch $M0/data/file + + +############ Client side healing using favorite-child-policy = mtime ################# +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +mtime1=$(get_mtime $B0/${V0}0/data/file) +mtime2=$(get_mtime $B0/${V0}1/data/file) +if (( $(echo "$mtime1 > $mtime2" | bc -l) )); then + LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1) +else + LATEST_MTIME_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1) +fi + +#file will be in split-brain +cat $M0/data/file > /dev/null +EXPECT "1" echo $? + +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime +TEST $CLI volume start $V0 force + +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0 +cat $M0/data/file > /dev/null +EXPECT "0" echo $? +M0_MD5=$(md5sum $M0/data/file | cut -d\ -f1) +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0 +TEST [ "$LATEST_MTIME_MD5" == "$M0_MD5" ] + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +B0_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1) +B1_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1) +TEST [ "$LATEST_MTIME_MD5" == "$B0_MD5" ] +TEST [ "$LATEST_MTIME_MD5" == "$B1_MD5" ] + +############ Client side directory conservative merge ################# +TEST $CLI volume reset $V0 cluster.favorite-child-policy +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST touch $M0/data/test +files=$(count_files $M0/data) +EXPECT "2" echo $files +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST touch $M0/data/test1 +files=$(count_files $M0/data) +EXPECT "2" echo $files + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#data dir will be in entry split-brain +ls $M0/data > /dev/null +EXPECT "2" echo $? + +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime + +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0 + + +ls $M0/data > /dev/null +EXPECT "0" echo $? + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0 +#Entry Split-brain is gone, but data self-heal is pending on the files +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 + +cat $M0/data/test > /dev/null +cat $M0/data/test1 > /dev/null + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +files=$(count_files $M0/data) +EXPECT "3" echo $files + +TEST force_umount $M0 +TEST rm $GET_MDATA_PATH/get-mdata-xattr + +cleanup diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/basic/afr/split-brain-favorite-child-policy.t deleted file mode 100644 index c268c125610..00000000000 --- a/tests/basic/afr/split-brain-favorite-child-policy.t +++ /dev/null @@ -1,203 +0,0 @@ -#!/bin/bash - -#Test the split-brain resolution CLI commands. -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc - -cleanup; - -TEST glusterd -TEST pidof glusterd - -#Create replica 2 volume -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} -TEST $CLI volume set $V0 performance.write-behind off -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST $CLI volume set $V0 cluster.entry-self-heal off -TEST $CLI volume set $V0 cluster.data-self-heal off -TEST $CLI volume set $V0 cluster.metadata-self-heal off -TEST $CLI volume set $V0 cluster.heal-timeout 5 -TEST $CLI volume start $V0 -TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 -TEST touch $M0/file - -############ Healing using favorite-child-policy = ctime ################# -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 - -#file still in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -# Umount to prevent further FOPS on the file, then find the brick with latest ctime. -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -ctime1=`stat -c "%.Z" $B0/${V0}0/file` -ctime2=`stat -c "%.Z" $B0/${V0}1/file` -if (( $(echo "$ctime1 > $ctime2" | bc -l) )); then - LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -else - LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -fi -TEST $CLI volume set $V0 cluster.favorite-child-policy ctime -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST [ "$LATEST_CTIME_MD5" == "$B0_MD5" ] -TEST [ "$LATEST_CTIME_MD5" == "$B1_MD5" ] -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 -cat $M0/file > /dev/null -EXPECT "0" echo $? - -############ Healing using favorite-child-policy = mtime ################# -TEST $CLI volume set $V0 cluster.favorite-child-policy none -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 - -#file still in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -#We know that the second brick has latest mtime. -LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST $CLI volume set $V0 cluster.favorite-child-policy mtime -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -cat $M0/file > /dev/null -EXPECT "0" echo $? -HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ] - -############ Healing using favorite-child-policy = size ################# -TEST $CLI volume set $V0 cluster.favorite-child-policy none -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 - -#file still in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -#We know that the second brick has the bigger size file. -BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST $CLI volume set $V0 cluster.favorite-child-policy size -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -cat $M0/file > /dev/null -EXPECT "0" echo $? -HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ] - -############ Healing using favorite-child-policy = majority on replica-3 ################# - -#Convert volume to replica-3 -TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 - -TEST $CLI volume set $V0 cluster.quorum-type none -TEST $CLI volume set $V0 cluster.favorite-child-policy none -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST kill_brick $V0 $H0 $B0/${V0}2 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 - -#file still in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -#We know that the second and third bricks agree with each other. Pick any one of them. -MAJORITY_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST $CLI volume set $V0 cluster.favorite-child-policy majority -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -cat $M0/file > /dev/null -EXPECT "0" echo $? -HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ] - -TEST force_umount $M0 -cleanup |