From b86a7de9b5ea9dcd0a630dbe09fce6d9ad0d8944 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Tue, 6 Sep 2016 18:28:42 +0530 Subject: feature/bitrot: Fix recovery of corrupted hardlink Problem: When a file with hardlink is corrupted in ec volume, the recovery steps mentioned was not working. Only name and metadata was healing but not the data. Cause: The bad file marker in the inode context is not removed. Hence when self heal tries to open the file for data healing, it fails with EIO. Background: The bitrot deletes inode context during forget. Briefly, the recovery steps involves following steps. 1. Delete the entry marked with bad file xattr from backend. Delete all the hardlinks including .glusters hardlink as well. 2. Access the each hardlink of the file including original from the mount. The step 2 will send lookup to the brick where the files are deleted from backend and returns with ENOENT. On ENOENT, server xlator forgets the inode if there are no dentries associated with it. But in case hardlinks, the forget won't be called as dentries (other hardlink files) are associated with the inode. Hence bitrot stube won't delete it's context failing the data self heal. Fix: Bitrot-stub should delete the inode context on getting ENOENT during lookup. Change-Id: Ice6adc18625799e7afd842ab33b3517c2be264c1 BUG: 1373520 Signed-off-by: Kotresh HR Reviewed-on: http://review.gluster.org/15408 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Raghavendra Bhat --- tests/bitrot/bug-1294786.t | 6 +---- tests/bitrot/bug-1373520.t | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 tests/bitrot/bug-1373520.t (limited to 'tests/bitrot') diff --git a/tests/bitrot/bug-1294786.t b/tests/bitrot/bug-1294786.t index 4911c1dd6c2..d395d76b7ed 100644 --- a/tests/bitrot/bug-1294786.t +++ b/tests/bitrot/bug-1294786.t @@ -16,10 +16,6 @@ function get_node_uuid { getfattr -n trusted.glusterfs.node-uuid --only-values $M0/FILE1 2>/dev/null } -function get_quarantine_count { - ls -l "$B1/.glusterfs/quanrantine" | wc -l -} - cleanup; TEST launch_cluster 2 @@ -64,7 +60,7 @@ TEST touch "$B1/.glusterfs/quanrantine/$gfid1" TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid1" TEST touch "$B1/.glusterfs/quanrantine/$gfid2" TEST chmod 000 "$B1/.glusterfs/quanrantine/$gfid2" -EXPECT "4" get_quarantine_count; +EXPECT "4" get_quarantine_count "$B1"; TEST $CLI_1 volume stop $V0 TEST $CLI_1 volume start $V0 diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t new file mode 100644 index 00000000000..3a0ac5293e0 --- /dev/null +++ b/tests/bitrot/bug-1373520.t @@ -0,0 +1,63 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +#Create a disperse volume +TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status' + +#Disable md-cache +TEST $CLI volume set $V0 performance.stat-prefetch off + +#Mount the volume +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 + +#Enable bitrot +TEST $CLI volume bitrot $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count + +#Create sample file +TEST `echo "1234" > $M0/FILE1` +#Create hardlink +TEST `ln $M0/FILE1 $M0/HL_FILE1` + +#Corrupt file from back-end +TEST stat $B0/${V0}5/FILE1 +SIZE=$(stat -c %s $B0/${V0}5/FILE1) +echo "Corrupted data" >> $B0/${V0}5/FILE1 +gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1) + +#Manually set bad-file xattr +TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/${V0}5/FILE1 +TEST touch "$B0/${V0}5/.glusterfs/quanrantine/$gfid1" +TEST chmod 000 "$B0/${V0}5/.glusterfs/quanrantine/$gfid1" +EXPECT "3" get_quarantine_count "$B0/${V0}5"; + +TEST $CLI volume stop $V0 +TEST $CLI volume start $V0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count + +#Trigger lookup so that bitrot xlator marks file as bad in its inode context. +TEST stat $M0/FILE1 + +#Delete file and all links from backend +TEST stat $B0/${V0}5/FILE1 +TEST `ls -li $B0/${V0}5/FILE1 | awk '{print $1}' | xargs find $B0/${V0}5/ -inum | xargs -r rm -rf` + +#Access files +TEST cat $M0/FILE1 +EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/FILE1 + +TEST cat $M0/HL_FILE1 +EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/HL_FILE1 + +cleanup; -- cgit