summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2016-09-06 18:28:42 +0530
committerRaghavendra Bhat <raghavendra@redhat.com>2016-09-09 07:05:06 -0700
commit22ea98a31f147bcd1e4643c2b77f503c63b03a4e (patch)
treed0d4d2abc0886cd535d077e6fc4fb484465d2f73
parentcb15b3be846d6ff0be450b245aba17ba67457b1e (diff)
feature/bitrot: Fix recovery of corrupted hardlink
Problem: When a file with hardlink is corrupted in ec volume, the recovery steps mentioned was not working. Only name and metadata was healing but not the data. Cause: The bad file marker in the inode context is not removed. Hence when self heal tries to open the file for data healing, it fails with EIO. Background: The bitrot deletes inode context during forget. Briefly, the recovery steps involves following steps. 1. Delete the entry marked with bad file xattr from backend. Delete all the hardlinks including .glusters hardlink as well. 2. Access the each hardlink of the file including original from the mount. The step 2 will send lookup to the brick where the files are deleted from backend and returns with ENOENT. On ENOENT, server xlator forgets the inode if there are no dentries associated with it. But in case hardlinks, the forget won't be called as dentries (other hardlink files) are associated with the inode. Hence bitrot stube won't delete it's context failing the data self heal. Fix: Bitrot-stub should delete the inode context on getting ENOENT during lookup. >Change-Id: Ice6adc18625799e7afd842ab33b3517c2be264c1 >BUG: 1373520 >Signed-off-by: Kotresh HR <khiremat@redhat.com> >Reviewed-on: http://review.gluster.org/15408 >Smoke: Gluster Build System <jenkins@build.gluster.org> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >CentOS-regression: Gluster Build System <jenkins@build.gluster.org> >Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com> (cherry picked from commit b86a7de9b5ea9dcd0a630dbe09fce6d9ad0d8944) Change-Id: Ice6adc18625799e7afd842ab33b3517c2be264c1 BUG: 1374565 Signed-off-by: Kotresh HR <khiremat@redhat.com> Reviewed-on: http://review.gluster.org/15433 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
-rw-r--r--tests/bitrot/bug-1294786.t6
-rw-r--r--tests/bitrot/bug-1373520.t63
-rw-r--r--tests/volume.rc4
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c12
4 files changed, 80 insertions, 5 deletions
diff --git a/tests/bitrot/bug-1294786.t b/tests/bitrot/bug-1294786.t
index 4911c1dd6c2..d395d76b7ed 100644
--- a/tests/bitrot/bug-1294786.t
+++ b/tests/bitrot/bug-1294786.t
@@ -16,10 +16,6 @@ function get_node_uuid {
getfattr -n trusted.glusterfs.node-uuid --only-values $M0/FILE1 2>/dev/null
}
-function get_quarantine_count {
- ls -l "$B1/.glusterfs/quanrantine" | wc -l
-}
-
cleanup;
TEST launch_cluster 2
@@ -64,7 +60,7 @@ TEST touch "$B1/.glusterfs/quanrantine/$gfid1"
TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid1"
TEST touch "$B1/.glusterfs/quanrantine/$gfid2"
TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid2"
-EXPECT "4" get_quarantine_count;
+EXPECT "4" get_quarantine_count "$B1";
TEST $CLI_1 volume stop $V0
TEST $CLI_1 volume start $V0
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
new file mode 100644
index 00000000000..3a0ac5293e0
--- /dev/null
+++ b/tests/bitrot/bug-1373520.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create a disperse volume
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+#Disable md-cache
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+#Mount the volume
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Enable bitrot
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+#Create hardlink
+TEST `ln $M0/FILE1 $M0/HL_FILE1`
+
+#Corrupt file from back-end
+TEST stat $B0/${V0}5/FILE1
+SIZE=$(stat -c %s $B0/${V0}5/FILE1)
+echo "Corrupted data" >> $B0/${V0}5/FILE1
+gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1)
+
+#Manually set bad-file xattr
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/${V0}5/FILE1
+TEST touch "$B0/${V0}5/.glusterfs/quanrantine/$gfid1"
+TEST chmod 000 "$B0/${V0}5/.glusterfs/quanrantine/$gfid1"
+EXPECT "3" get_quarantine_count "$B0/${V0}5";
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Trigger lookup so that bitrot xlator marks file as bad in its inode context.
+TEST stat $M0/FILE1
+
+#Delete file and all links from backend
+TEST stat $B0/${V0}5/FILE1
+TEST `ls -li $B0/${V0}5/FILE1 | awk '{print $1}' | xargs find $B0/${V0}5/ -inum | xargs -r rm -rf`
+
+#Access files
+TEST cat $M0/FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/FILE1
+
+TEST cat $M0/HL_FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/HL_FILE1
+
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index 77e080d152a..5ea75a51d22 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -587,6 +587,10 @@ function get_scrubd_count {
ps auxww | grep glusterfs | grep scrub.pid | grep -v grep | wc -l
}
+function get_quarantine_count {
+ ls -l "$1/.glusterfs/quanrantine" | wc -l
+}
+
function get_quotad_count {
ps auxww | grep glusterfs | grep quotad.pid | grep -v grep | wc -l
}
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 67103f6b5e1..4e01f5c86b1 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -2631,6 +2631,18 @@ br_stub_handle_lookup_error (xlator_t *this, inode_t *inode, int32_t op_errno)
}
UNLOCK (&inode->lock);
+ if (__br_stub_is_bad_object (ctx)) {
+ /* File is not present, might be deleted for recovery,
+ * del the bitrot inode context
+ */
+ ctx_addr = 0;
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (ctx_addr) {
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE (ctx);
+ }
+ }
+
out:
return;
}