diff options
authorKotresh HR <>2016-09-06 18:28:42 +0530
committerRaghavendra Bhat <>2016-09-09 07:05:06 -0700
commit22ea98a31f147bcd1e4643c2b77f503c63b03a4e (patch)
parentcb15b3be846d6ff0be450b245aba17ba67457b1e (diff)
feature/bitrot: Fix recovery of corrupted hardlink
Problem: When a file with hardlink is corrupted in ec volume, the recovery steps mentioned was not working. Only name and metadata was healing but not the data. Cause: The bad file marker in the inode context is not removed. Hence when self heal tries to open the file for data healing, it fails with EIO. Background: The bitrot deletes inode context during forget. Briefly, the recovery steps involves following steps. 1. Delete the entry marked with bad file xattr from backend. Delete all the hardlinks including .glusters hardlink as well. 2. Access the each hardlink of the file including original from the mount. The step 2 will send lookup to the brick where the files are deleted from backend and returns with ENOENT. On ENOENT, server xlator forgets the inode if there are no dentries associated with it. But in case hardlinks, the forget won't be called as dentries (other hardlink files) are associated with the inode. Hence bitrot stube won't delete it's context failing the data self heal. Fix: Bitrot-stub should delete the inode context on getting ENOENT during lookup. >Change-Id: Ice6adc18625799e7afd842ab33b3517c2be264c1 >BUG: 1373520 >Signed-off-by: Kotresh HR <> >Reviewed-on: >Smoke: Gluster Build System <> >NetBSD-regression: NetBSD Build System <> >CentOS-regression: Gluster Build System <> >Reviewed-by: Raghavendra Bhat <> (cherry picked from commit b86a7de9b5ea9dcd0a630dbe09fce6d9ad0d8944) Change-Id: Ice6adc18625799e7afd842ab33b3517c2be264c1 BUG: 1374565 Signed-off-by: Kotresh HR <> Reviewed-on: Smoke: Gluster Build System <> NetBSD-regression: NetBSD Build System <> CentOS-regression: Gluster Build System <> Reviewed-by: Raghavendra Bhat <>
4 files changed, 80 insertions, 5 deletions
diff --git a/tests/bitrot/bug-1294786.t b/tests/bitrot/bug-1294786.t
index 4911c1d..d395d76 100644
--- a/tests/bitrot/bug-1294786.t
+++ b/tests/bitrot/bug-1294786.t
@@ -16,10 +16,6 @@ function get_node_uuid {
getfattr -n trusted.glusterfs.node-uuid --only-values $M0/FILE1 2>/dev/null
-function get_quarantine_count {
- ls -l "$B1/.glusterfs/quanrantine" | wc -l
TEST launch_cluster 2
@@ -64,7 +60,7 @@ TEST touch "$B1/.glusterfs/quanrantine/$gfid1"
TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid1"
TEST touch "$B1/.glusterfs/quanrantine/$gfid2"
TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid2"
-EXPECT "4" get_quarantine_count;
+EXPECT "4" get_quarantine_count "$B1";
TEST $CLI_1 volume stop $V0
TEST $CLI_1 volume start $V0
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
new file mode 100644
index 0000000..3a0ac52
--- /dev/null
+++ b/tests/bitrot/bug-1373520.t
@@ -0,0 +1,63 @@
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+TEST glusterd
+TEST pidof glusterd
+#Create a disperse volume
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+#Disable md-cache
+TEST $CLI volume set $V0 performance.stat-prefetch off
+#Mount the volume
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+#Enable bitrot
+TEST $CLI volume bitrot $V0 enable
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+#Create hardlink
+TEST `ln $M0/FILE1 $M0/HL_FILE1`
+#Corrupt file from back-end
+TEST stat $B0/${V0}5/FILE1
+SIZE=$(stat -c %s $B0/${V0}5/FILE1)
+echo "Corrupted data" >> $B0/${V0}5/FILE1
+gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1)
+#Manually set bad-file xattr
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/${V0}5/FILE1
+TEST touch "$B0/${V0}5/.glusterfs/quanrantine/$gfid1"
+TEST chmod 000 "$B0/${V0}5/.glusterfs/quanrantine/$gfid1"
+EXPECT "3" get_quarantine_count "$B0/${V0}5";
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+#Trigger lookup so that bitrot xlator marks file as bad in its inode context.
+TEST stat $M0/FILE1
+#Delete file and all links from backend
+TEST stat $B0/${V0}5/FILE1
+TEST `ls -li $B0/${V0}5/FILE1 | awk '{print $1}' | xargs find $B0/${V0}5/ -inum | xargs -r rm -rf`
+#Access files
+TEST cat $M0/FILE1
+TEST cat $M0/HL_FILE1
diff --git a/tests/volume.rc b/tests/volume.rc
index 77e080d..5ea75a5 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -587,6 +587,10 @@ function get_scrubd_count {
ps auxww | grep glusterfs | grep | grep -v grep | wc -l
+function get_quarantine_count {
+ ls -l "$1/.glusterfs/quanrantine" | wc -l
function get_quotad_count {
ps auxww | grep glusterfs | grep | grep -v grep | wc -l
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 67103f6..4e01f5c 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -2631,6 +2631,18 @@ br_stub_handle_lookup_error (xlator_t *this, inode_t *inode, int32_t op_errno)
UNLOCK (&inode->lock);
+ if (__br_stub_is_bad_object (ctx)) {
+ /* File is not present, might be deleted for recovery,
+ * del the bitrot inode context
+ */
+ ctx_addr = 0;
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (ctx_addr) {
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE (ctx);
+ }
+ }