diff options
| -rw-r--r-- | tests/basic/afr/gfid-unsplit-shd.t (renamed from tests/basic/gfid_unsplit_shd.t) | 4 | ||||
| -rw-r--r-- | tests/basic/afr/gfid-unsplit-type-mismatch.t (renamed from tests/basic/gfid_unsplit_type_mismatch.t) | 11 | ||||
| -rw-r--r-- | tests/basic/afr/gfid-unsplit.t (renamed from tests/basic/gfid_unsplit.t) | 10 | ||||
| -rw-r--r-- | tests/basic/afr/shd-autofix-nogfid.t (renamed from tests/basic/shd_autofix_nogfid.t) | 4 | ||||
| -rw-r--r-- | tests/basic/afr/shd-force-inspect.t (renamed from tests/basic/shd_force_inspect.t) | 4 | ||||
| -rw-r--r-- | tests/basic/afr/shd-pgfid-heal.t | 81 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 8 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 118 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 2 | ||||
| -rw-r--r-- | xlators/features/marker/src/marker.c | 7 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 8 |
12 files changed, 226 insertions, 32 deletions
diff --git a/tests/basic/gfid_unsplit_shd.t b/tests/basic/afr/gfid-unsplit-shd.t index 25fab290177..77da5243724 100644 --- a/tests/basic/gfid_unsplit_shd.t +++ b/tests/basic/afr/gfid-unsplit-shd.t @@ -1,7 +1,7 @@ #!/bin/bash -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc cleanup; diff --git a/tests/basic/gfid_unsplit_type_mismatch.t b/tests/basic/afr/gfid-unsplit-type-mismatch.t index 51e6a36445b..9e205021a0d 100644 --- a/tests/basic/gfid_unsplit_type_mismatch.t +++ b/tests/basic/afr/gfid-unsplit-type-mismatch.t @@ -1,7 +1,7 @@ #!/bin/bash -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc cleanup; @@ -13,9 +13,9 @@ TEST $CLI volume info; TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}; TEST $CLI volume set $V0 cluster.choose-local off TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST $CLI volume set $V0 nfs.disable off +TEST $CLI volume set $V0 nfs.disable on TEST $CLI volume set $V0 cluster.quorum-type none -TEST $CLI volume set $V0 cluster.favorite-child-policy majority +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime #EST $CLI volume set $V0 cluster.favorite-child-by-majority on #EST $CLI volume set $V0 cluster.favorite-child-by-mtime on TEST $CLI volume set $V0 cluster.metadata-self-heal off @@ -24,6 +24,8 @@ TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume start $V0 sleep 5 +pkill -f gluster/glustershd + # Part I: FUSE Test TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \ --attribute-timeout=0 --entry-timeout=0 @@ -69,7 +71,6 @@ sleep 1 # Verify the file is readable TEST dd if=splitfile of=/dev/null 2>/dev/null - # Verify entry healing happened on the back-end regardless of the # gfid-splitbrain state of the directory. TEST stat $B0/${V0}1/splitfile diff --git a/tests/basic/gfid_unsplit.t b/tests/basic/afr/gfid-unsplit.t index 0df96bd5ed6..0b883ab658f 100644 --- a/tests/basic/gfid_unsplit.t +++ b/tests/basic/afr/gfid-unsplit.t @@ -1,8 +1,8 @@ #!/bin/bash -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc -. $(dirname $0)/../nfs.rc +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../nfs.rc cleanup; @@ -19,7 +19,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 nfs.disable off #EST $CLI volume set $V0 cluster.favorite-child-by-majority on #EST $CLI volume set $V0 cluster.favorite-child-by-mtime on -TEST $CLI volume set $V0 cluster.favorite-child-policy majority +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off @@ -39,6 +39,8 @@ MD5=$(md5sum $M0/splitfile | cut -d\ -f1) TEST kill_brick $V0 $H0 $B0/${V0}1 GFID_DIR_B1="$B0/${V0}1/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}1/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')" rm -rf $GFID_DIR_B1 +mkdir -p $B0/${V0}1/.glusterfs/fd/55 +ln $B0/${V0}1/splitfile $B0/${V0}1/.glusterfs/fd/55/fd551a5c-fddd-4c1a-a4d0-96ef09ef5c08 TEST setfattr -n "trusted.gfid" -v "0xfd551a5cfddd4c1aa4d096ef09ef5c08" $B0/${V0}1/splitfile GFID_DIR_B3="$B0/${V0}3/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}3/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')" diff --git a/tests/basic/shd_autofix_nogfid.t b/tests/basic/afr/shd-autofix-nogfid.t index 5a6ed66f522..7c9026dce62 100644 --- a/tests/basic/shd_autofix_nogfid.t +++ b/tests/basic/afr/shd-autofix-nogfid.t @@ -1,7 +1,7 @@ #!/bin/bash -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc cleanup; diff --git a/tests/basic/shd_force_inspect.t b/tests/basic/afr/shd-force-inspect.t index ebf3f7a17ad..caceb841322 100644 --- a/tests/basic/shd_force_inspect.t +++ b/tests/basic/afr/shd-force-inspect.t @@ -1,7 +1,7 @@ #!/bin/bash -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc cleanup; diff --git a/tests/basic/afr/shd-pgfid-heal.t b/tests/basic/afr/shd-pgfid-heal.t new file mode 100644 index 00000000000..d12d29e13ba --- /dev/null +++ b/tests/basic/afr/shd-pgfid-heal.t @@ -0,0 +1,81 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +# Setup a cluster with 3 replicas, and fav child by majority on +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}; +TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume set $V0 cluster.self-heal-daemon on +TEST $CLI volume set $V0 nfs.disable on +TEST $CLI volume set $V0 cluster.quorum-type none +#EST $CLI volume set $V0 cluster.favorite-child-by-majority on +#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on +TEST $CLI volume set $V0 cluster.favorite-child-policy majority +TEST $CLI volume set $V0 storage.build-pgfid on +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 +sleep 5 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \ + --attribute-timeout=0 --entry-timeout=0 + +cd $M0 +mkdir -p a/b/c +dd if=/dev/urandom of=a/b/c/testfile bs=128k count=5 2>/dev/null +MD5=$(md5sum a/b/c/testfile | cut -d\ -f1) + +# Kill the SHD while we setup the test +pkill -f gluster/glustershd +# Kill the brick as well such that +TEST kill_brick $V0 $H0 $B0/${V0}1 + +# Grab the GFID of the file and parent dir +GFID_PARENT_B_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/a/b 2>/dev/null | grep trusted.gfid | cut -d= -f2) +GFID_PARENT_B_FORMATTED=$(echo "$GFID_PARENT_B_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}') +GFID_PARENT_B_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_PARENT_B_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')" +GFID_PARENT_C_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/a/b/c 2>/dev/null | grep trusted.gfid | cut -d= -f2) +GFID_PARENT_C_FORMATTED=$(echo "$GFID_PARENT_C_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}') +GFID_PARENT_C_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_PARENT_C_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')" +GFID_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/a/b/c/testfile 2>/dev/null | grep trusted.gfid | cut -d= -f2) +GFID_FORMATTED=$(echo "$GFID_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}') +GFID_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')" + +# +# Here we are going to create a situation such that a file 3 +# levels deep into the FS requires healing, along with 2 levels +# of parent directories. The only signal SHD has is that the +# file itself needs healing. The directory (entry) heals are +# missing; simulating a crash or some sort of bug that we need +# to be able to recover from. +# + +# Nuke the file from brick 1, along with the parent directories +# and all backend hard/symbolic links +rm -f $B0/${V0}1/a/b/c/testfile +rm -f $GFID_LINK_B1 +rmdir $B0/${V0}1/a/b/c +rm -f $GFID_PARENT_C_LINK_B1 +rmdir $B0/${V0}1/a/b +rm -f $GFID_PARENT_B_LINK_B1 + +# Now manually queue up the parent directory for healing +touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_FORMATTED + +# Kick off the SHD and wait 30 seconds for healing to take place +TEST gluster vol start patchy force +EXPECT_WITHIN 30 "0" get_pending_heal_count $V0 +sleep 5 + +# Verify the file was healed back to brick 1 +TEST stat $B0/${V0}1/a/b/c/testfile + +cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index cfea53208c8..333ea888395 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1381,6 +1381,12 @@ afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req) "Unable to set list-xattr in dict "); } + ret = dict_set_int32 (xattr_req, GET_ANCESTRY_PATH_KEY, 42); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to set ancestry path key in dict "); + } + return ret; } @@ -1751,6 +1757,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) GF_FREE (local->readable); GF_FREE (local->readable2); + GF_FREE (local->heal_ancestry_path); + if (local->inode) inode_unref (local->inode); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 12b031d78e6..98ad65f29fd 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1212,7 +1212,6 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, } } - /* count the number of dirty fops witnessed */ for (i = 0; i < priv->child_count; i++) witness[i] += dirty[i]; @@ -1220,6 +1219,67 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, return 0; } +/* + * This function will examine a reply and look for a PGFID xattr + * and if found will record this in the frame's local struct. + * + * This can then be used to fall-back to healing the parent + * directory in cases where metadata/data healing isn't yet + * possible because an entry heal of the parent directory has not + * yet taken place. + * + * This is critical for a couple reasons: + * 1. General healing predictability - When the SHD + * attempts to heal a given GFID, it should be able + * to do so without having to wait for some other + * dependent heal to take place. + * 2. Reliability - In some cases the parent directory + * may require healing, but the req'd entry in the + * indices/xattrop directory may not exist + * (e.g. bugs/crashes etc). This feature removes + * + */ +void +_afr_set_heal_pgfid_from_reply (xlator_t *this, afr_local_t *local, + struct afr_reply reply) +{ + data_pair_t *trav = reply.xdata->members_list; + uuid_t *pgfid = NULL; + int32_t ret = 0; + int32_t pgfid_prefix_len = sizeof (PGFID_XATTR_KEY_PREFIX) - 1; + char *pgfid_str = NULL; + data_t *ancestry_path_data = NULL; + char *ancestry_path = "Unknown"; + + pgfid = &local->heal_pgfid; + + while (trav) { + if (!strncmp (PGFID_XATTR_KEY_PREFIX, trav->key, + pgfid_prefix_len)) { + pgfid_str = trav->key + pgfid_prefix_len; + ret = gf_uuid_parse (pgfid_str, *pgfid); + break; + } + trav = trav->next; + } + + if (!ret && !gf_uuid_is_null (*pgfid)) { + if (!dict_lookup (reply.xdata, + "glusterfs.ancestry.path", + &ancestry_path_data)) { + ancestry_path = data_to_str ( + ancestry_path_data); + /* Allocation free'd on local destroy */ + local->heal_ancestry_path = + gf_strdup (ancestry_path); + } + gf_log (this->name, GF_LOG_DEBUG, + "Found pgfid (%s) for %s", + uuid_utoa (*pgfid), + ancestry_path); + } +} + void afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, int source, unsigned char *sources, @@ -1788,6 +1848,8 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, gf_boolean_t *entry_selfheal) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; + inode_t *inode = NULL; int i = 0; int valid_cnt = 0; @@ -1796,6 +1858,7 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, int ret = -1; priv = this->private; + local = frame->local; inode = afr_inode_find (this, gfid); if (!inode) @@ -1813,6 +1876,10 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, if (replies[i].op_ret == -1) continue; + if (gf_uuid_is_null(local->heal_pgfid)) + _afr_set_heal_pgfid_from_reply (this, + frame->local, replies[i]); + /* The data segment of the changelog can be non-zero to indicate * the directory needs a full heal. So the check below ensures * it's not a directory before setting the data_selfheal boolean. @@ -2073,6 +2140,7 @@ afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid) &data_selfheal, &metadata_selfheal, &entry_selfheal); + if (ret) goto out; @@ -2119,10 +2187,16 @@ int afr_selfheal (xlator_t *this, uuid_t gfid) { int ret = -1; - call_frame_t *frame = NULL; - afr_local_t *local = NULL; + gf_boolean_t tried_parent = _gf_false; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + char *ancestry_path = "Unknown"; + char *pgfid_str = NULL; + char *gfid_str = NULL; + +heal_gfid: + frame = afr_frame_create (this); - frame = afr_frame_create (this); if (!frame) return ret; @@ -2131,6 +2205,42 @@ afr_selfheal (xlator_t *this, uuid_t gfid) ret = afr_selfheal_do (frame, this, gfid); + if (tried_parent == _gf_false && ret && + !gf_uuid_is_null (local->heal_pgfid)) { + tried_parent = _gf_true; + pgfid_str = alloca (strlen (UUID0_STR) + 1); + gfid_str = alloca (strlen (UUID0_STR) + 1); + uuid_utoa_r (local->heal_pgfid, pgfid_str); + uuid_utoa_r (gfid, gfid_str); + if (local->heal_ancestry_path) + ancestry_path = local->heal_ancestry_path; + gf_log (this->name, GF_LOG_INFO, + "PGFID Healing - Heal failed for %s (%s), " + "but found parent gfid (%s), attempting to heal " + "parent directory by gfid.", + gfid_str, + ancestry_path, + pgfid_str); + ret = afr_selfheal (this, local->heal_pgfid); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "PGFID Healing - Healing of parent gfid " + "(%s) unsuccessful! Healing of %s (%s) " + "failed.", + pgfid_str, + gfid_str, + ancestry_path); + } else { + gf_log (this->name, GF_LOG_INFO, + "PGFID Healing - Healing of parent gfid %s " + "successful! Re-attempting heal of %s (%s).", + pgfid_str, + gfid_str, + ancestry_path); + goto heal_gfid; + } + } + if (frame) AFR_STACK_DESTROY (frame); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 13e82f9aad4..78c9f0a3ab9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -688,7 +688,6 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; afr_private_t *priv = NULL; gf_boolean_t mismatch = _gf_false; - afr_local_t *iter_local = NULL; afr_local_t *local = NULL; loc_t loc = {0,}; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index aa2af38e8cf..abd2f470131 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -825,6 +825,8 @@ typedef struct _afr_local { gf_boolean_t need_full_crawl; gf_boolean_t is_read_txn; loc_t *unsplit_locs; /* Un-split targets */ + uuid_t heal_pgfid; /* pgfid of file being healed */ + char *heal_ancestry_path; /* Full path if avail */ } afr_local_t; diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index be98f2a1cca..9201f38f7ff 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -390,13 +390,6 @@ _is_quota_internal_xattr (dict_t *d, char *k, data_t *v, void *data) if (fnmatch ("trusted.glusterfs.quota*", k, 0) == 0) return _gf_true; - /* It would be nice if posix filters pgfid xattrs. But since marker - * also takes up responsibility to clean these up, adding the filtering - * here (Check 'quota_xattr_cleaner') - */ - if (fnmatch (PGFID_XATTR_KEY_PREFIX"*", k, 0) == 0) - return _gf_true; - return _gf_false; } diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 3cbb947d6b2..eb0986ffb6a 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -212,7 +212,7 @@ posix_lookup (call_frame_t *frame, xlator_t *this, } if (priv->update_pgfid_nlinks) { - if (!gf_uuid_is_null (loc->pargfid) && !IA_ISDIR (buf.ia_type)) { + if (!gf_uuid_is_null (loc->pargfid)) { MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, loc->pargfid); @@ -2543,8 +2543,7 @@ posix_rename (call_frame_t *frame, xlator_t *this, LOCK (&oldloc->inode->lock); { - if (!IA_ISDIR (oldloc->inode->ia_type) - && priv->update_pgfid_nlinks) { + if (priv->update_pgfid_nlinks) { MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, oldloc->pargfid); @@ -2610,8 +2609,7 @@ posix_rename (call_frame_t *frame, xlator_t *this, P_MSG_SET_XDATA_FAIL, "failed to set " GET_LINK_COUNT" for %s", real_newpath); - if (!IA_ISDIR (oldloc->inode->ia_type) - && priv->update_pgfid_nlinks) { + if (priv->update_pgfid_nlinks) { MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, newloc->pargfid); |
