diff options
| -rw-r--r-- | tests/basic/gfid_unsplit_shd.t | 98 | ||||
| -rw-r--r-- | tests/basic/shd_autofix_nogfid.t | 68 | ||||
| -rwxr-xr-x | tests/features/brick-min-free-space.t | 8 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 79 | ||||
| -rw-r--r-- | xlators/features/marker/src/marker.c | 7 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-resolve.c | 5 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-rpc-fops.c | 4 |
7 files changed, 251 insertions, 18 deletions
diff --git a/tests/basic/gfid_unsplit_shd.t b/tests/basic/gfid_unsplit_shd.t new file mode 100644 index 00000000000..25fab290177 --- /dev/null +++ b/tests/basic/gfid_unsplit_shd.t @@ -0,0 +1,98 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +# Setup a cluster with 3 replicas, and fav child by majority on +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}; +TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume set $V0 cluster.self-heal-daemon on +TEST $CLI volume set $V0 nfs.disable off +TEST $CLI volume set $V0 cluster.quorum-type none +TEST $CLI volume set $V0 cluster.heal-timeout 5 +TEST $CLI volume set $V0 cluster.favorite-child-policy majority +#EST $CLI volume set $V0 cluster.favorite-child-by-majority off +#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on +#EST $CLI volume set $V0 cluster.favorite-child-by-size off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 +sleep 5 + +# Part I: FUSE Test +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \ + --attribute-timeout=0 --entry-timeout=0 + +cd $M0 +mkdir foo +dd if=/dev/urandom of=foo/splitfile bs=128k count=5 2>/dev/null + +MD5=$(md5sum foo/splitfile | cut -d\ -f1) + +sleep 1 +cd ~ + +GFID_PARENT_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/foo 2>/dev/null | grep trusted.gfid | cut -d= -f2) +GFID_PARENT_FORMATTED=$(echo "$GFID_PARENT_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}') +GFID_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/foo/splitfile 2>/dev/null | grep trusted.gfid | cut -d= -f2) +GFID_FORMATTED=$(echo "$GFID_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}') +GFID_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')" + +# Create a split-brain by downing a brick, and flipping the +# gfid on the down brick, then bring the brick back up. + +# For good measure kill the first brick so the inode cache is wiped, we don't +# want any funny business +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume start $V0 force +pkill -f gluster/glustershd + +rm -f $GFID_LINK_B1 +TEST setfattr -n "trusted.gfid" -v "0xfd551a5cfddd4c1aa4d096ef09ef5c08" $B0/${V0}1/foo/splitfile +sleep 1 +TEST touch $B0/${V0}1/foo/splitfile + +mkdir -p $B0/${V0}1/.glusterfs/fd/55 +ln $B0/${V0}1/foo/splitfile $B0/${V0}1/.glusterfs/fd/55/fd551a5c-fddd-4c1a-a4d0-96ef09ef5c08 +cd ~ + +touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_FORMATTED +touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_PARENT_FORMATTED + +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +sleep 5 + +EXPECT_WITHIN 60 "0" get_pending_heal_count $V0 + +TEST stat $B0/${V0}1/foo/splitfile + +cd $M0 + +# Tickle the file to trigger the gfid unsplit +TEST stat foo/splitfile +sleep 1 + +# Verify the file is readable +TEST dd if=foo/splitfile of=/dev/null 2>/dev/null + +# Verify entry healing happened on the back-end regardless of the +# gfid-splitbrain state of the directory. +TEST stat $B0/${V0}1/foo/splitfile + +# Verify the MD5 signature of the file +HEALED_MD5=$(md5sum foo/splitfile | cut -d\ -f1) +TEST [ "$MD5" == "$HEALED_MD5" ] + +# Verify the file can be removed +TEST rm -f foo/splitfile +cd ~ + +cleanup diff --git a/tests/basic/shd_autofix_nogfid.t b/tests/basic/shd_autofix_nogfid.t new file mode 100644 index 00000000000..5a6ed66f522 --- /dev/null +++ b/tests/basic/shd_autofix_nogfid.t @@ -0,0 +1,68 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +# Setup a cluster with 3 replicas, and fav child by majority on +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}; +TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume set $V0 cluster.self-heal-daemon on +TEST $CLI volume set $V0 nfs.disable on +TEST $CLI volume set $V0 cluster.quorum-type auto +TEST $CLI volume set $V0 cluster.favorite-child-policy majority +#EST $CLI volume set $V0 cluster.favorite-child-by-majority on +#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 +sleep 5 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \ + --attribute-timeout=0 --entry-timeout=0 + +# Kill the SHD while we setup the test +pkill -f gluster/glustershd +TEST kill_brick $V0 $H0 $B0/${V0}1 + +mkdir $M0/foo +dd if=/dev/urandom of=$M0/foo/testfile bs=128k count=5 2>/dev/null +MD5=$(md5sum $M0/foo/testfile | cut -d\ -f1) + +mkdir $B0/${V0}1/foo + +# Kick off the SHD and wait 30 seconds for healing to take place +TEST gluster vol start $V0 force +EXPECT_WITHIN 30 "0" get_pending_heal_count $V0 + +# Verify the file was healed back to brick 1 +TEST stat $B0/${V0}1/foo/testfile + +# Part II: Test recovery for a file without a GFID +# Kill the SHD while we setup the test +pkill -f gluster/glustershd +TEST kill_brick $V0 $H0 $B0/${V0}1 +rm -f $GFID_LINK_B1 +rm -f $B0/${V0}1/foo/testfile +touch $B0/${V0}1/foo/testfile + +# Queue the directories for healing, don't bother the queue the file +# as this shouldn't be required. +touch $B0/${V0}3/.glusterfs/indices/xattrop/00000000-0000-0000-0000-000000000001 +touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_PARENT_FORMATTED + +TEST gluster vol start $V0 force +EXPECT_WITHIN 30 "0" get_pending_heal_count $V0 +TEST stat $B0/${V0}1/foo/testfile + +# Prove the directory and file are removable +TEST rm -f $B0/${V0}1/foo/testfile +TEST rmdir $B0/${V0}1/foo + +cleanup diff --git a/tests/features/brick-min-free-space.t b/tests/features/brick-min-free-space.t index 4372998681f..0fc5a241534 100755 --- a/tests/features/brick-min-free-space.t +++ b/tests/features/brick-min-free-space.t @@ -41,6 +41,8 @@ TEST dd if=/dev/zero of=$M0/test bs=1M count=10 oflag=direct TEST $CLI volume set $V0 storage.freespace-check-interval 1 TEST $CLI volume set $V0 storage.min-free-disk 8388608 +sleep 5 + # Now even a tiny write ought fail. TEST ! dd if=/dev/zero of=$M0/test1 bs=1M count=1 oflag=direct TEST rm $M0/test1 @@ -48,6 +50,8 @@ TEST rm $M0/test1 # Repeat using percent syntax. TEST $CLI volume set $V0 storage.min-free-disk 33% +sleep 5 + TEST ! dd if=/dev/zero of=$M0/test1 bs=4K count=1 oflag=direct TEST rm $M0/test1 @@ -87,6 +91,8 @@ TEST dd if=/dev/zero of=$M0/test bs=1M count=10 oflag=direct TEST $CLI volume set $V0 storage.freespace-check-interval 1 TEST $CLI volume set $V0 storage.min-free-disk 8388608 +sleep 5 + # Now even a tiny write ought fail. TEST ! dd if=/dev/zero of=$M0/test1 bs=1M count=1 oflag=direct TEST rm $M0/test1 @@ -94,6 +100,8 @@ TEST rm $M0/test1 # Repeat using percent syntax. TEST $CLI volume set $V0 storage.min-free-disk 33% +sleep 5 + TEST ! dd if=/dev/zero of=$M0/test1 bs=4K count=1 oflag=direct TEST rm $M0/test1 diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 776cc9c5c21..13e82f9aad4 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -66,7 +66,30 @@ afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name, ret = syncop_unlink (subvol, &loc, NULL, NULL); break; } - } + /* Handle edge case where directories exist in a partially + * created state: empty, without a gfid assigned. We need to + * remove these bad dirs so the normal entry heal process + * can take place. + */ + } else if (replies[child].valid && + replies[child].op_ret == -1 && + replies[child].op_errno == ENODATA && + gf_uuid_is_null (replies[child].poststat.ia_gfid)) { + if (replies[child].poststat.ia_type == IA_INVAL) { + gf_log (this->name, GF_LOG_WARNING, + "expunging orphaned (gfid-less) dir " + "%s/%s (%s) on %s", + uuid_utoa (dir->gfid), name, + uuid_utoa_r (replies[child].poststat.ia_gfid, + g), subvol->name); + /* We will only do this for _directories_, and this + * will only succeed for directories _without_ + * data. The file case is handled well already + * through the metadata self-heal process. + */ + ret = syncop_rmdir (subvol, &loc, 1, NULL, NULL); + } + } loc_wipe (&loc); @@ -302,13 +325,11 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, /* Returning EIO here isn't needed if GFID forced heal is * enabled. */ - if (!priv->gfid_splitbrain_forced_heal) { - /* In case of a gfid or type mismatch on the entry, return -1.*/ - ret = afr_selfheal_detect_gfid_and_type_mismatch (this, - replies, fd->inode->gfid, name, source); - if (ret < 0) - return ret; - } + /* In case of a gfid or type mismatch on the entry, return -1.*/ + ret = afr_selfheal_detect_gfid_and_type_mismatch (this, + replies, fd->inode->gfid, name, source); + if (ret < 0) + return ret; for (i = 0; i < priv->child_count; i++) { if (i == source || !healed_sinks[i]) @@ -317,10 +338,20 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, if (replies[i].op_errno != ENOENT) continue; - ret = afr_selfheal_recreate_entry (frame, i, source, sources, - fd->inode, name, inode, - replies); - } + /* Re-create the entry in the event the child + * does not have it, or the entry does not have + * a gfid. In the latter case we'll only do + * this for now if it's directory, this can be + * widened to include files at a later time. + */ + if (replies[i].op_errno == ENOENT || + (replies[i].op_errno == ENODATA && + gf_uuid_is_null (replies[i].poststat.ia_gfid))) { + ret = afr_selfheal_recreate_entry ( + frame, i, source, sources, fd->inode, name, inode, + replies); + } + } return ret; } @@ -690,10 +721,34 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) continue; + /* Common Case: First do a cheap normal entry_dirent + * flow */ ret = afr_selfheal_entry_dirent (iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); + + /* Edge Case: Do name heal to fix gfid split + * brains and other damage to directory + * entries. + */ + if (ret) { + /* If the cheap flow didn't work, let's head + * into the name self-heal flow. Here we'll + * inspect for GFID split-brains and fix if + * found. Then send it back to the normal + * entry_dirent flow. + */ + ret = afr_selfheal_name (this, fd->inode->gfid, + entry->d_name, NULL); + if (!ret) { + ret = afr_selfheal_entry_dirent ( + iter_frame, this, fd, + entry->d_name, loc.inode, subvol, + local->need_full_crawl); + } + } + AFR_STACK_RESET (iter_frame); if (iter_frame->local == NULL) { ret = -ENOTCONN; diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index f578f6c3f44..be98f2a1cca 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1598,9 +1598,10 @@ marker_get_oldpath_contribution (call_frame_t *lk_frame, void *cookie, */ MARKER_SET_UID_GID (frame, local, frame->root); - if (gf_uuid_is_null (oplocal->loc.gfid)) - gf_uuid_copy (oplocal->loc.gfid, - oplocal->loc.inode->gfid); + if (gf_uuid_is_null (oplocal->loc.gfid)) { + gf_uuid_copy (oplocal->loc.gfid, + oplocal->loc.inode->gfid); + } GF_UUID_ASSERT (oplocal->loc.gfid); diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c index 1ad45394dd7..6f621119278 100644 --- a/xlators/protocol/server/src/server-resolve.c +++ b/xlators/protocol/server/src/server-resolve.c @@ -58,6 +58,10 @@ resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, resolve = state->resolve_now; resolve_loc = &resolve->resolve_loc; + if (!state->loc.inode && inode) { + state->loc.inode = inode; + } + if (op_ret == -1) { if (op_errno == ENOENT) { gf_msg_debug (this->name, 0, "%s/%s: failed to resolve" @@ -71,7 +75,6 @@ resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, uuid_utoa (resolve_loc->pargfid), resolve_loc->name, strerror (op_errno)); } - goto out; } link_inode = inode_link (inode, resolve_loc->parent, diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index c22f79fa872..ee8ce825098 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -4648,7 +4648,7 @@ server3_3_unlink (rpcsvc_request_t *req) goto out; } - state->resolve.type = RESOLVE_MUST; + state->resolve.type = RESOLVE_MAY; state->resolve.bname = gf_strdup (args.bname); memcpy (state->resolve.pargfid, args.pargfid, 16); @@ -5646,7 +5646,7 @@ server3_3_rmdir (rpcsvc_request_t *req) goto out; } - state->resolve.type = RESOLVE_MUST; + state->resolve.type = RESOLVE_MAY; memcpy (state->resolve.pargfid, args.pargfid, 16); state->resolve.bname = gf_strdup (args.bname); |
