summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/gfid_unsplit_shd.t98
-rw-r--r--tests/basic/shd_autofix_nogfid.t68
-rwxr-xr-xtests/features/brick-min-free-space.t8
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c79
-rw-r--r--xlators/features/marker/src/marker.c7
-rw-r--r--xlators/protocol/server/src/server-resolve.c5
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c4
7 files changed, 251 insertions, 18 deletions
diff --git a/tests/basic/gfid_unsplit_shd.t b/tests/basic/gfid_unsplit_shd.t
new file mode 100644
index 00000000000..25fab290177
--- /dev/null
+++ b/tests/basic/gfid_unsplit_shd.t
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+# Setup a cluster with 3 replicas, and fav child by majority on
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3};
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+#EST $CLI volume set $V0 cluster.favorite-child-by-majority off
+#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
+#EST $CLI volume set $V0 cluster.favorite-child-by-size off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+sleep 5
+
+# Part I: FUSE Test
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \
+ --attribute-timeout=0 --entry-timeout=0
+
+cd $M0
+mkdir foo
+dd if=/dev/urandom of=foo/splitfile bs=128k count=5 2>/dev/null
+
+MD5=$(md5sum foo/splitfile | cut -d\ -f1)
+
+sleep 1
+cd ~
+
+GFID_PARENT_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/foo 2>/dev/null | grep trusted.gfid | cut -d= -f2)
+GFID_PARENT_FORMATTED=$(echo "$GFID_PARENT_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')
+GFID_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/foo/splitfile 2>/dev/null | grep trusted.gfid | cut -d= -f2)
+GFID_FORMATTED=$(echo "$GFID_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')
+GFID_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')"
+
+# Create a split-brain by downing a brick, and flipping the
+# gfid on the down brick, then bring the brick back up.
+
+# For good measure kill the first brick so the inode cache is wiped, we don't
+# want any funny business
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+pkill -f gluster/glustershd
+
+rm -f $GFID_LINK_B1
+TEST setfattr -n "trusted.gfid" -v "0xfd551a5cfddd4c1aa4d096ef09ef5c08" $B0/${V0}1/foo/splitfile
+sleep 1
+TEST touch $B0/${V0}1/foo/splitfile
+
+mkdir -p $B0/${V0}1/.glusterfs/fd/55
+ln $B0/${V0}1/foo/splitfile $B0/${V0}1/.glusterfs/fd/55/fd551a5c-fddd-4c1a-a4d0-96ef09ef5c08
+cd ~
+
+touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_FORMATTED
+touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_PARENT_FORMATTED
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+sleep 5
+
+EXPECT_WITHIN 60 "0" get_pending_heal_count $V0
+
+TEST stat $B0/${V0}1/foo/splitfile
+
+cd $M0
+
+# Tickle the file to trigger the gfid unsplit
+TEST stat foo/splitfile
+sleep 1
+
+# Verify the file is readable
+TEST dd if=foo/splitfile of=/dev/null 2>/dev/null
+
+# Verify entry healing happened on the back-end regardless of the
+# gfid-splitbrain state of the directory.
+TEST stat $B0/${V0}1/foo/splitfile
+
+# Verify the MD5 signature of the file
+HEALED_MD5=$(md5sum foo/splitfile | cut -d\ -f1)
+TEST [ "$MD5" == "$HEALED_MD5" ]
+
+# Verify the file can be removed
+TEST rm -f foo/splitfile
+cd ~
+
+cleanup
diff --git a/tests/basic/shd_autofix_nogfid.t b/tests/basic/shd_autofix_nogfid.t
new file mode 100644
index 00000000000..5a6ed66f522
--- /dev/null
+++ b/tests/basic/shd_autofix_nogfid.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+# Setup a cluster with 3 replicas, and fav child by majority on
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3};
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume set $V0 nfs.disable on
+TEST $CLI volume set $V0 cluster.quorum-type auto
+TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
+#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+sleep 5
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \
+ --attribute-timeout=0 --entry-timeout=0
+
+# Kill the SHD while we setup the test
+pkill -f gluster/glustershd
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+mkdir $M0/foo
+dd if=/dev/urandom of=$M0/foo/testfile bs=128k count=5 2>/dev/null
+MD5=$(md5sum $M0/foo/testfile | cut -d\ -f1)
+
+mkdir $B0/${V0}1/foo
+
+# Kick off the SHD and wait 30 seconds for healing to take place
+TEST gluster vol start $V0 force
+EXPECT_WITHIN 30 "0" get_pending_heal_count $V0
+
+# Verify the file was healed back to brick 1
+TEST stat $B0/${V0}1/foo/testfile
+
+# Part II: Test recovery for a file without a GFID
+# Kill the SHD while we setup the test
+pkill -f gluster/glustershd
+TEST kill_brick $V0 $H0 $B0/${V0}1
+rm -f $GFID_LINK_B1
+rm -f $B0/${V0}1/foo/testfile
+touch $B0/${V0}1/foo/testfile
+
+# Queue the directories for healing, don't bother the queue the file
+# as this shouldn't be required.
+touch $B0/${V0}3/.glusterfs/indices/xattrop/00000000-0000-0000-0000-000000000001
+touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_PARENT_FORMATTED
+
+TEST gluster vol start $V0 force
+EXPECT_WITHIN 30 "0" get_pending_heal_count $V0
+TEST stat $B0/${V0}1/foo/testfile
+
+# Prove the directory and file are removable
+TEST rm -f $B0/${V0}1/foo/testfile
+TEST rmdir $B0/${V0}1/foo
+
+cleanup
diff --git a/tests/features/brick-min-free-space.t b/tests/features/brick-min-free-space.t
index 4372998681f..0fc5a241534 100755
--- a/tests/features/brick-min-free-space.t
+++ b/tests/features/brick-min-free-space.t
@@ -41,6 +41,8 @@ TEST dd if=/dev/zero of=$M0/test bs=1M count=10 oflag=direct
TEST $CLI volume set $V0 storage.freespace-check-interval 1
TEST $CLI volume set $V0 storage.min-free-disk 8388608
+sleep 5
+
# Now even a tiny write ought fail.
TEST ! dd if=/dev/zero of=$M0/test1 bs=1M count=1 oflag=direct
TEST rm $M0/test1
@@ -48,6 +50,8 @@ TEST rm $M0/test1
# Repeat using percent syntax.
TEST $CLI volume set $V0 storage.min-free-disk 33%
+sleep 5
+
TEST ! dd if=/dev/zero of=$M0/test1 bs=4K count=1 oflag=direct
TEST rm $M0/test1
@@ -87,6 +91,8 @@ TEST dd if=/dev/zero of=$M0/test bs=1M count=10 oflag=direct
TEST $CLI volume set $V0 storage.freespace-check-interval 1
TEST $CLI volume set $V0 storage.min-free-disk 8388608
+sleep 5
+
# Now even a tiny write ought fail.
TEST ! dd if=/dev/zero of=$M0/test1 bs=1M count=1 oflag=direct
TEST rm $M0/test1
@@ -94,6 +100,8 @@ TEST rm $M0/test1
# Repeat using percent syntax.
TEST $CLI volume set $V0 storage.min-free-disk 33%
+sleep 5
+
TEST ! dd if=/dev/zero of=$M0/test1 bs=4K count=1 oflag=direct
TEST rm $M0/test1
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 776cc9c5c21..13e82f9aad4 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -66,7 +66,30 @@ afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,
ret = syncop_unlink (subvol, &loc, NULL, NULL);
break;
}
- }
+ /* Handle edge case where directories exist in a partially
+ * created state: empty, without a gfid assigned. We need to
+ * remove these bad dirs so the normal entry heal process
+ * can take place.
+ */
+ } else if (replies[child].valid &&
+ replies[child].op_ret == -1 &&
+ replies[child].op_errno == ENODATA &&
+ gf_uuid_is_null (replies[child].poststat.ia_gfid)) {
+ if (replies[child].poststat.ia_type == IA_INVAL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging orphaned (gfid-less) dir "
+ "%s/%s (%s) on %s",
+ uuid_utoa (dir->gfid), name,
+ uuid_utoa_r (replies[child].poststat.ia_gfid,
+ g), subvol->name);
+ /* We will only do this for _directories_, and this
+ * will only succeed for directories _without_
+ * data. The file case is handled well already
+ * through the metadata self-heal process.
+ */
+ ret = syncop_rmdir (subvol, &loc, 1, NULL, NULL);
+ }
+ }
loc_wipe (&loc);
@@ -302,13 +325,11 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
/* Returning EIO here isn't needed if GFID forced heal is
* enabled.
*/
- if (!priv->gfid_splitbrain_forced_heal) {
- /* In case of a gfid or type mismatch on the entry, return -1.*/
- ret = afr_selfheal_detect_gfid_and_type_mismatch (this,
- replies, fd->inode->gfid, name, source);
- if (ret < 0)
- return ret;
- }
+ /* In case of a gfid or type mismatch on the entry, return -1.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch (this,
+ replies, fd->inode->gfid, name, source);
+ if (ret < 0)
+ return ret;
for (i = 0; i < priv->child_count; i++) {
if (i == source || !healed_sinks[i])
@@ -317,10 +338,20 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (replies[i].op_errno != ENOENT)
continue;
- ret = afr_selfheal_recreate_entry (frame, i, source, sources,
- fd->inode, name, inode,
- replies);
- }
+ /* Re-create the entry in the event the child
+ * does not have it, or the entry does not have
+ * a gfid. In the latter case we'll only do
+ * this for now if it's directory, this can be
+ * widened to include files at a later time.
+ */
+ if (replies[i].op_errno == ENOENT ||
+ (replies[i].op_errno == ENODATA &&
+ gf_uuid_is_null (replies[i].poststat.ia_gfid))) {
+ ret = afr_selfheal_recreate_entry (
+ frame, i, source, sources, fd->inode, name, inode,
+ replies);
+ }
+ }
return ret;
}
@@ -690,10 +721,34 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this,
!strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR))
continue;
+ /* Common Case: First do a cheap normal entry_dirent
+ * flow */
ret = afr_selfheal_entry_dirent (iter_frame, this, fd,
entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
+
+ /* Edge Case: Do name heal to fix gfid split
+ * brains and other damage to directory
+ * entries.
+ */
+ if (ret) {
+ /* If the cheap flow didn't work, let's head
+ * into the name self-heal flow. Here we'll
+ * inspect for GFID split-brains and fix if
+ * found. Then send it back to the normal
+ * entry_dirent flow.
+ */
+ ret = afr_selfheal_name (this, fd->inode->gfid,
+ entry->d_name, NULL);
+ if (!ret) {
+ ret = afr_selfheal_entry_dirent (
+ iter_frame, this, fd,
+ entry->d_name, loc.inode, subvol,
+ local->need_full_crawl);
+ }
+ }
+
AFR_STACK_RESET (iter_frame);
if (iter_frame->local == NULL) {
ret = -ENOTCONN;
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
index f578f6c3f44..be98f2a1cca 100644
--- a/xlators/features/marker/src/marker.c
+++ b/xlators/features/marker/src/marker.c
@@ -1598,9 +1598,10 @@ marker_get_oldpath_contribution (call_frame_t *lk_frame, void *cookie,
*/
MARKER_SET_UID_GID (frame, local, frame->root);
- if (gf_uuid_is_null (oplocal->loc.gfid))
- gf_uuid_copy (oplocal->loc.gfid,
- oplocal->loc.inode->gfid);
+ if (gf_uuid_is_null (oplocal->loc.gfid)) {
+ gf_uuid_copy (oplocal->loc.gfid,
+ oplocal->loc.inode->gfid);
+ }
GF_UUID_ASSERT (oplocal->loc.gfid);
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
index 1ad45394dd7..6f621119278 100644
--- a/xlators/protocol/server/src/server-resolve.c
+++ b/xlators/protocol/server/src/server-resolve.c
@@ -58,6 +58,10 @@ resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
resolve = state->resolve_now;
resolve_loc = &resolve->resolve_loc;
+ if (!state->loc.inode && inode) {
+ state->loc.inode = inode;
+ }
+
if (op_ret == -1) {
if (op_errno == ENOENT) {
gf_msg_debug (this->name, 0, "%s/%s: failed to resolve"
@@ -71,7 +75,6 @@ resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa (resolve_loc->pargfid),
resolve_loc->name, strerror (op_errno));
}
- goto out;
}
link_inode = inode_link (inode, resolve_loc->parent,
diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c
index c22f79fa872..ee8ce825098 100644
--- a/xlators/protocol/server/src/server-rpc-fops.c
+++ b/xlators/protocol/server/src/server-rpc-fops.c
@@ -4648,7 +4648,7 @@ server3_3_unlink (rpcsvc_request_t *req)
goto out;
}
- state->resolve.type = RESOLVE_MUST;
+ state->resolve.type = RESOLVE_MAY;
state->resolve.bname = gf_strdup (args.bname);
memcpy (state->resolve.pargfid, args.pargfid, 16);
@@ -5646,7 +5646,7 @@ server3_3_rmdir (rpcsvc_request_t *req)
goto out;
}
- state->resolve.type = RESOLVE_MUST;
+ state->resolve.type = RESOLVE_MAY;
memcpy (state->resolve.pargfid, args.pargfid, 16);
state->resolve.bname = gf_strdup (args.bname);