summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/afr.rc6
-rw-r--r--tests/basic/afr/afr-anon-inode-no-quorum.t63
-rw-r--r--tests/basic/afr/afr-anon-inode.t114
-rw-r--r--tests/basic/afr/entry-self-heal-anon-dir-off.t459
-rw-r--r--tests/basic/afr/rename-data-loss.t72
-rw-r--r--tests/bugs/replicate/bug-1744548-heal-timeout.t6
-rwxr-xr-xtests/features/trash.t74
-rw-r--r--tests/volume.rc2
-rw-r--r--xlators/cluster/afr/src/afr-common.c46
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c12
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c182
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c205
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c33
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h5
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c178
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h2
-rw-r--r--xlators/cluster/afr/src/afr.c41
-rw-r--r--xlators/cluster/afr/src/afr.h11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c39
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c5
20 files changed, 1427 insertions, 128 deletions
diff --git a/tests/afr.rc b/tests/afr.rc
index 5fc7fa1..2417899 100644
--- a/tests/afr.rc
+++ b/tests/afr.rc
@@ -115,3 +115,9 @@ function afr_private_key_value()
#xargs at the end will strip leading spaces
grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
}
+
+function afr_anon_entry_count()
+{
+ local b=$1
+ ls $b/.glusterfs-anonymous-inode* | wc -l
+}
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
new file mode 100644
index 0000000..896ba0c
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#Test that anon-inode entry is not cleaned up as long as there exists at least
+#one valid entry
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/a $M0/b
+
+gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
+gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/a $M0/a-new
+TEST mv $M0/b $M0/b-new
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! ls $M0/a
+TEST ! ls $M0/b
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+#Make sure index heal doesn't happen after enabling heal
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+#Allow time for a scan
+sleep 5
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
+TEST rm -f $M0/a-new
+TEST stat $M0/b-new
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
+
+cleanup
diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
new file mode 100644
index 0000000..f4cf37a
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+#Tests that afr-anon-inode test cases work fine as expected
+#These are cases where in entry-heal/name-heal we dont know entry for an inode
+#so these inodes are kept in a special directory
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode no
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST mkdir -p $M0/d1/b $M0/d2/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/d2/a $M0/d1
+TEST mv $M0/d1/b $M0/d2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
+TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
+anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
+
+TEST ! ls $M0/$anon_inode_name
+EXPECT "^4$" echo $(ls -a $M0 | wc -l)
+
+#Test purging code path by shd
+TEST $CLI volume heal $V0 disable
+TEST mkdir $M0/l0 $M0/l1 $M0/l2
+TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
+TEST ln $M0/del-file $M0/del-file-link
+TEST ln $M0/l0/file $M0/l1/file-link1
+TEST ln $M0/l0/file $M0/l2/file-link2
+TEST mkdir -p $M0/del-recursive-dir/d1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M0/del-file $M0/del-file-nolink
+TEST rm -rf $M0/del-recursive-dir
+TEST mv $M0/d1/a $M0/d2
+TEST mv $M0/l0/file $M0/l0/renamed-file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
+
+nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
+link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
+dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
+rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
+rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
+TEST ! stat $M0/del-file
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST ! stat $M0/del-file-nolink
+TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
+TEST ! stat $M0/del-recursive-dir
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+TEST ! stat $M0/d1/a
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST ! stat $M0/l0/file
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+TEST ! stat $M0/l1/file-link1
+TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
+TEST ! stat $M0/l2/file-link2
+TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
+
+#Simulate only anon-inodes present in all bricks
+TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
+
+#Test that shd doesn't cleanup anon-inodes when some bricks are down
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+$CLI volume heal $V0
+sleep 5 #Allow time for completion of one scan
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
+
+#Test that rename indeed happened instead of rmdir/mkdir
+renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
+EXPECT "$rename_dir_inum" echo $renamed_dir_inum
+cleanup;
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
new file mode 100644
index 0000000..7bb6ee1
--- /dev/null
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
@@ -0,0 +1,459 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_file_type {
+ stat -c "%a:%F:%g:%t:%T:%u" $1
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+
+function is_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+zero_xattr="000000000000000000000000"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.use-anonymous-inode off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
+cd $M0
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
+#spb is split-brain, fool is all fool
+
+#source_self_accusing means there exists source and a sink which self-accuses.
+#This simulates failures where fops failed on the bricks without it going down.
+#Something like EACCESS/EDQUOT etc
+
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
+TEST mkfifo source_deletions_heal/fifo
+TEST mknod source_deletions_heal/block b 4 5
+TEST mknod source_deletions_heal/char c 1 5
+TEST touch source_deletions_heal/file
+TEST ln -s source_deletions_heal/file source_deletions_heal/slink
+TEST mkdir source_deletions_heal/dir1
+TEST mkdir source_deletions_heal/dir1/dir2
+
+TEST mkfifo source_deletions_me/fifo
+TEST mknod source_deletions_me/block b 4 5
+TEST mknod source_deletions_me/char c 1 5
+TEST touch source_deletions_me/file
+TEST ln -s source_deletions_me/file source_deletions_me/slink
+TEST mkdir source_deletions_me/dir1
+TEST mkdir source_deletions_me/dir1/dir2
+
+TEST mkfifo source_self_accusing/fifo
+TEST mknod source_self_accusing/block b 4 5
+TEST mknod source_self_accusing/char c 1 5
+TEST touch source_self_accusing/file
+TEST ln -s source_self_accusing/file source_self_accusing/slink
+TEST mkdir source_self_accusing/dir1
+TEST mkdir source_self_accusing/dir1/dir2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
+
+#Test that the files are deleted
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+
+TEST mkfifo source_creations_heal/fifo
+TEST mknod source_creations_heal/block b 4 5
+TEST mknod source_creations_heal/char c 1 5
+TEST touch source_creations_heal/file
+TEST ln -s source_creations_heal/file source_creations_heal/slink
+TEST mkdir source_creations_heal/dir1
+TEST mkdir source_creations_heal/dir1/dir2
+
+TEST mkfifo source_creations_me/fifo
+TEST mknod source_creations_me/block b 4 5
+TEST mknod source_creations_me/char c 1 5
+TEST touch source_creations_me/file
+TEST ln -s source_creations_me/file source_creations_me/slink
+TEST mkdir source_creations_me/dir1
+TEST mkdir source_creations_me/dir1/dir2
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#simulate self-accusing for source_self_accusing
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Check if conservative merges happened correctly on _me_ dirs
+TEST stat spb_me_heal/1
+TEST stat $B0/${V0}0/spb_me_heal/1
+TEST stat $B0/${V0}1/spb_me_heal/1
+
+TEST stat spb_me_heal/0
+TEST stat $B0/${V0}0/spb_me_heal/0
+TEST stat $B0/${V0}1/spb_me_heal/0
+
+TEST stat fool_me/1
+TEST stat $B0/${V0}0/fool_me/1
+TEST stat $B0/${V0}1/fool_me/1
+
+TEST stat fool_me/0
+TEST stat $B0/${V0}0/fool_me/0
+TEST stat $B0/${V0}1/fool_me/0
+
+TEST stat v1_fool_me/0
+TEST stat $B0/${V0}0/v1_fool_me/0
+TEST stat $B0/${V0}1/v1_fool_me/0
+
+TEST stat v1_fool_me/1
+TEST stat $B0/${V0}0/v1_fool_me/1
+TEST stat $B0/${V0}1/v1_fool_me/1
+
+TEST stat v1_dirty_me/0
+TEST stat $B0/${V0}0/v1_dirty_me/0
+TEST stat $B0/${V0}1/v1_dirty_me/0
+
+#Check if files that have gfid-mismatches in _me_ are giving EIO
+TEST ! stat spb_me/0
+
+#Check if stale files are deleted on access
+TEST ! stat source_deletions_me/fifo
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat source_deletions_me/block
+TEST ! stat $B0/${V0}0/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat source_deletions_me/char
+TEST ! stat $B0/${V0}0/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
+TEST ! stat source_deletions_me/dir1
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+
+#Test if the files created as part of access are healed correctly
+r=$(get_file_type source_creations_me/fifo)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
+TEST [ -p source_creations_me/fifo ]
+
+r=$(get_file_type source_creations_me/block)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
+TEST [ -b source_creations_me/block ]
+
+r=$(get_file_type source_creations_me/char)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
+TEST [ -c source_creations_me/char ]
+
+r=$(get_file_type source_creations_me/file)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
+TEST [ -f source_creations_me/file ]
+
+r=$(get_file_type source_creations_me/slink)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
+TEST [ -h source_creations_me/slink ]
+
+r=$(get_file_type source_creations_me/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
+TEST [ -d source_creations_me/dir1/dir2 ]
+
+r=$(get_file_type source_creations_me/dir1)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
+TEST [ -d source_creations_me/dir1 ]
+
+#Trigger heal and check _heal dirs are healed properly
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
+TEST kill_brick $V0 $H0 $B0/${V0}1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST stat spb_heal
+TEST stat spb_me_heal
+TEST stat fool_heal
+TEST stat fool_me
+TEST stat v1_fool_heal
+TEST stat v1_fool_me
+TEST stat source_deletions_heal
+TEST stat source_deletions_me
+TEST stat source_self_accusing
+TEST stat source_creations_heal
+TEST stat source_creations_me
+TEST stat v1_dirty_heal
+TEST stat v1_dirty_me
+TEST $CLI volume stop $V0
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+
+TEST $CLI volume stop $V0;
+
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+$CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0;
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
+
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
+
+#Don't access the files/dirs from mount point as that may cause self-heals
+# Check if conservative merges happened correctly on heal dirs
+TEST stat $B0/${V0}0/spb_heal/1
+TEST stat $B0/${V0}1/spb_heal/1
+
+TEST stat $B0/${V0}0/spb_heal/0
+TEST stat $B0/${V0}1/spb_heal/0
+
+TEST stat $B0/${V0}0/fool_heal/1
+TEST stat $B0/${V0}1/fool_heal/1
+
+TEST stat $B0/${V0}0/fool_heal/0
+TEST stat $B0/${V0}1/fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/0
+TEST stat $B0/${V0}1/v1_fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/1
+TEST stat $B0/${V0}1/v1_fool_heal/1
+
+TEST stat $B0/${V0}0/v1_dirty_heal/0
+TEST stat $B0/${V0}1/v1_dirty_heal/0
+
+#Check if files that have gfid-mismatches in spb are giving EIO
+TEST ! stat spb/0
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}0/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}0/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+#Test if the files created as part of full self-heal correctly
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
+
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
+
+cd -
+
+#Anonymous directory shouldn't be created
+TEST mkdir $M0/rename-dir
+before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/rename-dir $M0/new-name
+TEST $CLI volume start $V0 force
+#'spb' is in split-brain so pending-heal-count will be 2
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+after_rename=$(STAT_INO $B0/${V0}1/new-name)
+EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
+EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
+EXPECT_NOT "$before_rename" echo $after_rename
+cleanup
diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
new file mode 100644
index 0000000..256ee2a
--- /dev/null
+++ b/tests/basic/afr/rename-data-loss.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#Self-heal tests
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST `echo "line1" >> file1`
+TEST mkdir dir1
+TEST mkdir dir2
+TEST mkdir -p dir1/dira/dirb
+TEST `echo "line1">>dir1/dira/dirb/file1`
+TEST mkdir delete_me
+TEST `echo "line1" >> delete_me/file1`
+
+#brick0 has witnessed the second write while brick1 is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST `echo "line2" >> file1`
+TEST `echo "line2" >> dir1/dira/dirb/file1`
+TEST `echo "line2" >> delete_me/file1`
+
+#Toggle the bricks that are up/down.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/brick0
+
+#Rename when the 'source' brick0 for data-selfheals is down.
+mv file1 file2
+mv dir1/dira dir2
+
+#Delete a dir when brick0 is down.
+rm -rf delete_me
+cd -
+
+#Bring everything up and trigger heal
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
+
+#Remount to avoid reading from caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "line2" tail -1 $M0/file2
+EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
+TEST ! stat $M0/delete_me/file1
+TEST ! stat $M0/delete_me
+
+anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/brick0/$anon_inode_name ]]
+TEST [[ -d $B0/brick1/$anon_inode_name ]]
+cleanup
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
index c208112..0115350 100644
--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0
TEST $CLI volume profile $V0 start
TEST $CLI volume profile $V0 info clear
TEST $CLI volume heal $V0 enable
-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count
+# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
+EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
# Check that a change in heal-timeout is honoured immediately.
TEST $CLI volume set $V0 cluster.heal-timeout 5
sleep 10
# Two crawls must have happened.
-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
# shd must not heal if it is disabled and heal-timeout is changed.
TEST $CLI volume heal $V0 disable
diff --git a/tests/features/trash.t b/tests/features/trash.t
index 472e909..da5b50b 100755
--- a/tests/features/trash.t
+++ b/tests/features/trash.t
@@ -94,105 +94,105 @@ wildcard_not_exists() {
if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
}
-# testing glusterd [1-3]
+# testing glusterd
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info
-# creating distributed volume [4]
+# creating distributed volume
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
-# checking volume status [5-7]
+# checking volume status
EXPECT "$V0" volinfo_field $V0 'Volume Name'
EXPECT 'Created' volinfo_field $V0 'Status'
EXPECT '2' brick_count $V0
-# test without enabling trash translator [8]
+# test without enabling trash translator
TEST start_vol $V0 $M0
-# test on enabling trash translator [9-10]
+# test on enabling trash translator
TEST $CLI volume set $V0 features.trash on
EXPECT 'on' volinfo_field $V0 'features.trash'
-# files directly under mount point [11]
+# files directly under mount point
create_files $M0/file1 $M0/file2
TEST file_exists $V0 file1 file2
-# perform unlink [12]
+# perform unlink
TEST unlink_op file1
-# perform truncate [13]
+# perform truncate
TEST truncate_op file2 4
-# create files directory hierarchy and check [14]
+# create files directory hierarchy and check
mkdir -p $M0/1/2/3
create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
-# perform unlink [15]
+# perform unlink
TEST unlink_op 1/2/3/foo1
-# perform truncate [16]
+# perform truncate
TEST truncate_op 1/2/3/foo2 4
# create a directory for eliminate pattern
mkdir $M0/a
-# set the eliminate pattern [17-18]
+# set the eliminate pattern
TEST $CLI volume set $V0 features.trash-eliminate-path /a
EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
-# create two files and check [19]
+# create two files and check
create_files $M0/a/test1 $M0/a/test2
TEST file_exists $V0 a/test1 a/test2
-# remove from eliminate pattern [20]
+# remove from eliminate pattern
rm -f $M0/a/test1
EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
-# truncate from eliminate path [21-23]
+# truncate from eliminate path
truncate -s 2 $M0/a/test2
TEST [ -e $M0/a/test2 ]
TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
-# set internal op on [24-25]
+# set internal op on
TEST $CLI volume set $V0 features.trash-internal-op on
EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
-# again create two files and check [26]
+# again create two files and check
create_files $M0/inop1 $M0/inop2
TEST file_exists $V0 inop1 inop2
-# perform unlink [27]
+# perform unlink
TEST unlink_op inop1
-# perform truncate [28]
+# perform truncate
TEST truncate_op inop2 4
-# remove one brick and restart the volume [28-31]
+# remove one brick and restart the volume
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0
TEST start_vol $V0 $M0 $M0/.trashcan
-# again create two files and check [33]
+# again create two files and check
create_files $M0/rebal1 $M0/rebal2
TEST file_exists $V0 rebal1 rebal2
-# add one brick [34-35]
+# add one brick
TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
TEST [ -d $B0/${V0}3 ]
-# perform rebalance [36]
+# perform rebalance
TEST $CLI volume rebalance $V0 start force
EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
#Find out which file was migrated to the new brick
file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
-# check whether rebalance was succesful [37-40]
+# check whether rebalance was succesful
EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
# force required in case rebalance is not over
TEST $CLI volume stop $V0 force
-# create a replicated volume [41]
+# create a replicated volume
TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
-# checking volume status [42-45]
+# checking volume status
EXPECT "$V1" volinfo_field $V1 'Volume Name'
EXPECT 'Replicate' volinfo_field $V1 'Type'
EXPECT 'Created' volinfo_field $V1 'Status'
EXPECT '2' brick_count $V1
-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50]
+# enable trash with options and start the replicate volume by disabling automatic self-heal
TEST $CLI volume set $V1 features.trash on
TEST $CLI volume set $V1 features.trash-internal-op on
EXPECT 'on' volinfo_field $V1 'features.trash'
EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
TEST start_vol $V1 $M1 $M1/.trashcan
-# mount and check for trash directory [51]
+# mount and check for trash directory
TEST [ -d $M1/.trashcan/internal_op ]
-# create a file and check [52]
+# create a file and check
touch $M1/self
TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
-# kill one brick and delete the file from mount point [53-54]
+# kill one brick and delete the file from mount point
kill_brick $V1 $H0 $B0/${V1}1
EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
rm -f $M1/self
EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
-# force start the volume and trigger the self-heal manually [55-57]
-TEST $CLI volume start $V1 force
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
-# Since we created the file under root of the volume, it will be
-# healed automatically
-
-# check for the removed file in trashcan [58]
-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self*
-
-# check renaming of trash directory through cli [59-62]
+# check renaming of trash directory through cli
TEST $CLI volume set $V0 trash-dir abc
TEST start_vol $V0 $M0 $M0/abc
TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
-# ensure that rename and delete operation on trash directory fails [63-65]
+# ensure that rename and delete operation on trash directory fails
rm -rf $M0/abc/internal_op
TEST [ -e $M0/abc/internal_op ]
rm -rf $M0/abc/
diff --git a/tests/volume.rc b/tests/volume.rc
index 3924bae..b38848c 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -999,4 +999,4 @@ function logging_time_check()
local logfile=`echo ${0##*/}`_glusterd1.log
cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l
-} \ No newline at end of file
+}
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 4c8fa31..1a4341a 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv)
return ENOTCONN;
}
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
void
afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
unsigned char *replies)
@@ -3978,11 +4013,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
return 0;
}
- if (__is_root_gfid(loc->parent->gfid)) {
- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
}
local = AFR_FRAME_INIT(frame, op_errno);
@@ -5660,6 +5694,7 @@ afr_priv_dump(xlator_t *this)
priv->background_self_heal_count);
gf_proc_dump_write("healers", "%d", priv->healers);
gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
if (priv->quorum_count == AFR_QUORUM_AUTO) {
gf_proc_dump_write("quorum-type", "auto");
} else if (priv->quorum_count == 0) {
@@ -6653,6 +6688,7 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->local);
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
GF_FREE(priv->child_up);
GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index f69013f..f8bf834 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -164,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
}
static void
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
int ret = -1;
gf_dirent_t *entry = NULL;
@@ -183,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
{
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
continue;
}
@@ -228,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
- local->fd);
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index f35c41d..a580a15 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2750,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
out:
return source;
}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index ac31751..64893f4 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -16,54 +16,170 @@
#include <glusterfs/syncop-utils.h>
#include <glusterfs/events.h>
-static int
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
xlator_t *subvol = NULL;
int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
loc_t loc = {
0,
};
- char g[64];
priv = this->private;
-
subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
loc.parent = inode_ref(dir);
gf_uuid_copy(loc.pargfid, dir->gfid);
loc.name = name;
- loc.inode = inode_ref(inode);
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink(subvol, &loc, NULL, NULL);
- break;
- }
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
}
+out:
loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
return ret;
}
int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
const char *name, inode_t *inode,
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
loc_t srcloc = {
0,
};
+ loc_t anonloc = {
+ 0,
+ };
xlator_t *this = frame->this;
afr_private_t *priv = NULL;
dict_t *xdata = NULL;
@@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
0,
};
unsigned char *newentry = NULL;
- char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0};
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
priv = this->private;
iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
"Invalid ia_type (%d) or gfid(%s). source brick=%d, "
"pargfid=%s, name=%s",
- iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source,
+ iatt->ia_type, iatt_uuid_str, source,
uuid_utoa_r(dir->gfid, dir_uuid_str), name);
ret = -EINVAL;
goto out;
@@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
srcloc.inode = inode_ref(inode);
gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
- if (iatt->ia_type != IA_IFDIR)
- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
newentry[dst] = 1;
ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
sources, newentry);
if (ret)
goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
}
mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
@@ -166,6 +297,7 @@ out:
GF_FREE(linkname);
loc_wipe(&loc);
loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
return ret;
}
@@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
xattr = dict_new();
if (!xattr)
return -ENOMEM;
@@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
replies);
if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
inode->ia_type);
/* Why is ret force-set to 0? We do not care about
* index purge failing for full heal as it is quite
@@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
@@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
/* The name indices under the pgfid index dir are guaranteed
* to be regular files. Hence the hardcoding.
*/
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
ret = 0;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index dd40c57..834aac8 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies)
{
- loc_t loc = {
- 0,
- };
int i = 0;
afr_private_t *priv = NULL;
- char g[64];
int ret = 0;
priv = this->private;
- loc.parent = inode_ref(parent);
- gf_uuid_copy(loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref(inode);
-
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
if (replies[i].op_ret)
continue;
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
- break;
- }
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
}
- loc_wipe(&loc);
-
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 7a038fa..48e6dbc 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -369,4 +369,9 @@ gf_boolean_t
afr_is_file_empty_on_all_children(afr_private_t *priv,
struct afr_reply *replies);
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index f2e0890..109fd4b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -222,7 +222,7 @@ out:
}
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type)
{
int ret = 0;
@@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = afr_shd_selfheal(healer, healer->subvol, gfid);
if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
if (ret == 2)
/* If bricks crashed in pre-op after creating indices/xattrop
@@ -843,6 +843,176 @@ out:
return need_heal;
}
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
+ }
+
if (ret == 0 && pre_crawl_xdata &&
!healer->crawl_event.heal_failed_count) {
afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 687c28e..18db728 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
char **path_p);
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 67e0a4d..df7366f 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
}
}
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
@@ -290,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
if (priv->shd.enabled) {
if ((priv->shd.enabled != enabled_old) ||
(timeout_old != priv->shd.timeout))
@@ -541,7 +566,9 @@ init(xlator_t *this)
GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
if (priv->quorum_count != 0)
priv->consistent_io = _gf_false;
@@ -553,6 +580,9 @@ init(xlator_t *this)
goto out;
}
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
@@ -561,7 +591,8 @@ init(xlator_t *this)
priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
ret = -ENOMEM;
goto out;
}
@@ -1286,6 +1317,14 @@ struct volume_options options[] = {
.tags = {"replicate"},
.description = "This option exists only for backward compatibility "
"and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
{.key = {NULL}},
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 2e60708..d62f9a9 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -42,6 +42,7 @@
#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
#define PFLAG_PENDING (1 << 0)
#define PFLAG_SBRAIN (1 << 1)
@@ -190,6 +191,7 @@ typedef struct _afr_private {
struct list_head ta_waitq;
struct list_head ta_onwireq;
+ unsigned char *anon_inode;
unsigned char *child_up;
unsigned char *halo_child_up;
int64_t *child_latency;
@@ -275,10 +277,15 @@ typedef struct _afr_private {
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
/*For lock healing.*/
struct list_head saved_locks;
struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
typedef enum {
@@ -1409,4 +1416,8 @@ afr_dom_lock_release(call_frame_t *frame);
void
afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 4e5712e..8d6fb5e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -3810,6 +3810,38 @@ out:
}
static int
+set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ int clusters)
+{
+ xlator_t *xlator = NULL;
+ int i = 0;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_9_0)
+ return 0;
+ xlator = first_of(graph);
+
+ for (i = 0; i < clusters; i++) {
+ ret = xlator_set_fixed_option(xlator, "volume-id",
+ uuid_utoa(volinfo->volume_id));
+ if (ret)
+ goto out;
+
+ xlator = xlator->next;
+ }
+
+out:
+ return ret;
+}
+
+static int
volgen_graph_build_afr_clusters(volgen_graph_t *graph,
glusterd_volinfo_t *volinfo)
{
@@ -3851,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
clusters = -1;
goto out;
}
+
+ ret = set_volfile_id_option(graph, volinfo, clusters);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+
if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index f74876e..398b4d7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3138,4 +3138,9 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.type = NO_DOC,
},
+ {.key = "cluster.use-anonymous-inode",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_9_0,
+ .value = "yes",
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
{.key = NULL}};