summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/afr/gfid-unsplit-shd.t (renamed from tests/basic/gfid_unsplit_shd.t)4
-rw-r--r--tests/basic/afr/gfid-unsplit-type-mismatch.t (renamed from tests/basic/gfid_unsplit_type_mismatch.t)11
-rw-r--r--tests/basic/afr/gfid-unsplit.t (renamed from tests/basic/gfid_unsplit.t)10
-rw-r--r--tests/basic/afr/shd-autofix-nogfid.t (renamed from tests/basic/shd_autofix_nogfid.t)4
-rw-r--r--tests/basic/afr/shd-force-inspect.t (renamed from tests/basic/shd_force_inspect.t)4
-rw-r--r--tests/basic/afr/shd-pgfid-heal.t81
-rw-r--r--xlators/cluster/afr/src/afr-common.c8
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c118
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c1
-rw-r--r--xlators/cluster/afr/src/afr.h2
-rw-r--r--xlators/features/marker/src/marker.c7
-rw-r--r--xlators/storage/posix/src/posix.c8
12 files changed, 226 insertions, 32 deletions
diff --git a/tests/basic/gfid_unsplit_shd.t b/tests/basic/afr/gfid-unsplit-shd.t
index 25fab290177..77da5243724 100644
--- a/tests/basic/gfid_unsplit_shd.t
+++ b/tests/basic/afr/gfid-unsplit-shd.t
@@ -1,7 +1,7 @@
#!/bin/bash
-. $(dirname $0)/../include.rc
-. $(dirname $0)/../volume.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
cleanup;
diff --git a/tests/basic/gfid_unsplit_type_mismatch.t b/tests/basic/afr/gfid-unsplit-type-mismatch.t
index 51e6a36445b..9e205021a0d 100644
--- a/tests/basic/gfid_unsplit_type_mismatch.t
+++ b/tests/basic/afr/gfid-unsplit-type-mismatch.t
@@ -1,7 +1,7 @@
#!/bin/bash
-. $(dirname $0)/../include.rc
-. $(dirname $0)/../volume.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
cleanup;
@@ -13,9 +13,9 @@ TEST $CLI volume info;
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3};
TEST $CLI volume set $V0 cluster.choose-local off
TEST $CLI volume set $V0 cluster.self-heal-daemon off
-TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.disable on
TEST $CLI volume set $V0 cluster.quorum-type none
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
TEST $CLI volume set $V0 cluster.metadata-self-heal off
@@ -24,6 +24,8 @@ TEST $CLI volume set $V0 cluster.entry-self-heal off
TEST $CLI volume start $V0
sleep 5
+pkill -f gluster/glustershd
+
# Part I: FUSE Test
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \
--attribute-timeout=0 --entry-timeout=0
@@ -69,7 +71,6 @@ sleep 1
# Verify the file is readable
TEST dd if=splitfile of=/dev/null 2>/dev/null
-
# Verify entry healing happened on the back-end regardless of the
# gfid-splitbrain state of the directory.
TEST stat $B0/${V0}1/splitfile
diff --git a/tests/basic/gfid_unsplit.t b/tests/basic/afr/gfid-unsplit.t
index 0df96bd5ed6..0b883ab658f 100644
--- a/tests/basic/gfid_unsplit.t
+++ b/tests/basic/afr/gfid-unsplit.t
@@ -1,8 +1,8 @@
#!/bin/bash
-. $(dirname $0)/../include.rc
-. $(dirname $0)/../volume.rc
-. $(dirname $0)/../nfs.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
cleanup;
@@ -19,7 +19,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 nfs.disable off
#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
@@ -39,6 +39,8 @@ MD5=$(md5sum $M0/splitfile | cut -d\ -f1)
TEST kill_brick $V0 $H0 $B0/${V0}1
GFID_DIR_B1="$B0/${V0}1/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}1/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')"
rm -rf $GFID_DIR_B1
+mkdir -p $B0/${V0}1/.glusterfs/fd/55
+ln $B0/${V0}1/splitfile $B0/${V0}1/.glusterfs/fd/55/fd551a5c-fddd-4c1a-a4d0-96ef09ef5c08
TEST setfattr -n "trusted.gfid" -v "0xfd551a5cfddd4c1aa4d096ef09ef5c08" $B0/${V0}1/splitfile
GFID_DIR_B3="$B0/${V0}3/.glusterfs/$(getfattr -n trusted.gfid -e hex $B0/${V0}3/splitfile 2>/dev/null | grep ^trusted | cut -d= -f2 | awk '{print substr($0,3,2)}')"
diff --git a/tests/basic/shd_autofix_nogfid.t b/tests/basic/afr/shd-autofix-nogfid.t
index 5a6ed66f522..7c9026dce62 100644
--- a/tests/basic/shd_autofix_nogfid.t
+++ b/tests/basic/afr/shd-autofix-nogfid.t
@@ -1,7 +1,7 @@
#!/bin/bash
-. $(dirname $0)/../include.rc
-. $(dirname $0)/../volume.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
cleanup;
diff --git a/tests/basic/shd_force_inspect.t b/tests/basic/afr/shd-force-inspect.t
index ebf3f7a17ad..caceb841322 100644
--- a/tests/basic/shd_force_inspect.t
+++ b/tests/basic/afr/shd-force-inspect.t
@@ -1,7 +1,7 @@
#!/bin/bash
-. $(dirname $0)/../include.rc
-. $(dirname $0)/../volume.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
cleanup;
diff --git a/tests/basic/afr/shd-pgfid-heal.t b/tests/basic/afr/shd-pgfid-heal.t
new file mode 100644
index 00000000000..d12d29e13ba
--- /dev/null
+++ b/tests/basic/afr/shd-pgfid-heal.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+# Setup a cluster with 3 replicas, and fav child by majority on
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3};
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume set $V0 nfs.disable on
+TEST $CLI volume set $V0 cluster.quorum-type none
+#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
+#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
+TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+TEST $CLI volume set $V0 storage.build-pgfid on
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+sleep 5
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 \
+ --attribute-timeout=0 --entry-timeout=0
+
+cd $M0
+mkdir -p a/b/c
+dd if=/dev/urandom of=a/b/c/testfile bs=128k count=5 2>/dev/null
+MD5=$(md5sum a/b/c/testfile | cut -d\ -f1)
+
+# Kill the SHD while we setup the test
+pkill -f gluster/glustershd
+# Kill the brick as well such that
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Grab the GFID of the file and parent dir
+GFID_PARENT_B_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/a/b 2>/dev/null | grep trusted.gfid | cut -d= -f2)
+GFID_PARENT_B_FORMATTED=$(echo "$GFID_PARENT_B_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')
+GFID_PARENT_B_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_PARENT_B_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')"
+GFID_PARENT_C_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/a/b/c 2>/dev/null | grep trusted.gfid | cut -d= -f2)
+GFID_PARENT_C_FORMATTED=$(echo "$GFID_PARENT_C_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')
+GFID_PARENT_C_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_PARENT_C_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')"
+GFID_RAW=$(getfattr -n trusted.gfid -e hex $B0/${V0}1/a/b/c/testfile 2>/dev/null | grep trusted.gfid | cut -d= -f2)
+GFID_FORMATTED=$(echo "$GFID_RAW" | awk '{print substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')
+GFID_LINK_B1="$B0/${V0}1/.glusterfs/$(echo $GFID_RAW | awk '{print substr($0,3,2)"/"substr($0,5,2)"/"substr($1,3,8)"-"substr($1,11,4)"-"substr($1,15,4)"-"substr($1,19,4)"-"substr($1,23,12)}')"
+
+#
+# Here we are going to create a situation such that a file 3
+# levels deep into the FS requires healing, along with 2 levels
+# of parent directories. The only signal SHD has is that the
+# file itself needs healing. The directory (entry) heals are
+# missing; simulating a crash or some sort of bug that we need
+# to be able to recover from.
+#
+
+# Nuke the file from brick 1, along with the parent directories
+# and all backend hard/symbolic links
+rm -f $B0/${V0}1/a/b/c/testfile
+rm -f $GFID_LINK_B1
+rmdir $B0/${V0}1/a/b/c
+rm -f $GFID_PARENT_C_LINK_B1
+rmdir $B0/${V0}1/a/b
+rm -f $GFID_PARENT_B_LINK_B1
+
+# Now manually queue up the parent directory for healing
+touch $B0/${V0}3/.glusterfs/indices/xattrop/$GFID_FORMATTED
+
+# Kick off the SHD and wait 30 seconds for healing to take place
+TEST gluster vol start patchy force
+EXPECT_WITHIN 30 "0" get_pending_heal_count $V0
+sleep 5
+
+# Verify the file was healed back to brick 1
+TEST stat $B0/${V0}1/a/b/c/testfile
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index cfea53208c8..333ea888395 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1381,6 +1381,12 @@ afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req)
"Unable to set list-xattr in dict ");
}
+ ret = dict_set_int32 (xattr_req, GET_ANCESTRY_PATH_KEY, 42);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unable to set ancestry path key in dict ");
+ }
+
return ret;
}
@@ -1751,6 +1757,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (local->readable);
GF_FREE (local->readable2);
+ GF_FREE (local->heal_ancestry_path);
+
if (local->inode)
inode_unref (local->inode);
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 12b031d78e6..98ad65f29fd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1212,7 +1212,6 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
}
}
-
/* count the number of dirty fops witnessed */
for (i = 0; i < priv->child_count; i++)
witness[i] += dirty[i];
@@ -1220,6 +1219,67 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
return 0;
}
+/*
+ * This function will examine a reply and look for a PGFID xattr
+ * and if found will record this in the frame's local struct.
+ *
+ * This can then be used to fall-back to healing the parent
+ * directory in cases where metadata/data healing isn't yet
+ * possible because an entry heal of the parent directory has not
+ * yet taken place.
+ *
+ * This is critical for a couple reasons:
+ * 1. General healing predictability - When the SHD
+ * attempts to heal a given GFID, it should be able
+ * to do so without having to wait for some other
+ * dependent heal to take place.
+ * 2. Reliability - In some cases the parent directory
+ * may require healing, but the req'd entry in the
+ * indices/xattrop directory may not exist
+ * (e.g. bugs/crashes etc). This feature removes
+ *
+ */
+void
+_afr_set_heal_pgfid_from_reply (xlator_t *this, afr_local_t *local,
+ struct afr_reply reply)
+{
+ data_pair_t *trav = reply.xdata->members_list;
+ uuid_t *pgfid = NULL;
+ int32_t ret = 0;
+ int32_t pgfid_prefix_len = sizeof (PGFID_XATTR_KEY_PREFIX) - 1;
+ char *pgfid_str = NULL;
+ data_t *ancestry_path_data = NULL;
+ char *ancestry_path = "Unknown";
+
+ pgfid = &local->heal_pgfid;
+
+ while (trav) {
+ if (!strncmp (PGFID_XATTR_KEY_PREFIX, trav->key,
+ pgfid_prefix_len)) {
+ pgfid_str = trav->key + pgfid_prefix_len;
+ ret = gf_uuid_parse (pgfid_str, *pgfid);
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if (!ret && !gf_uuid_is_null (*pgfid)) {
+ if (!dict_lookup (reply.xdata,
+ "glusterfs.ancestry.path",
+ &ancestry_path_data)) {
+ ancestry_path = data_to_str (
+ ancestry_path_data);
+ /* Allocation free'd on local destroy */
+ local->heal_ancestry_path =
+ gf_strdup (ancestry_path);
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Found pgfid (%s) for %s",
+ uuid_utoa (*pgfid),
+ ancestry_path);
+ }
+}
+
void
afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
int source, unsigned char *sources,
@@ -1788,6 +1848,8 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
gf_boolean_t *entry_selfheal)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
inode_t *inode = NULL;
int i = 0;
int valid_cnt = 0;
@@ -1796,6 +1858,7 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
int ret = -1;
priv = this->private;
+ local = frame->local;
inode = afr_inode_find (this, gfid);
if (!inode)
@@ -1813,6 +1876,10 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
if (replies[i].op_ret == -1)
continue;
+ if (gf_uuid_is_null(local->heal_pgfid))
+ _afr_set_heal_pgfid_from_reply (this,
+ frame->local, replies[i]);
+
/* The data segment of the changelog can be non-zero to indicate
* the directory needs a full heal. So the check below ensures
* it's not a directory before setting the data_selfheal boolean.
@@ -2073,6 +2140,7 @@ afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid)
&data_selfheal,
&metadata_selfheal,
&entry_selfheal);
+
if (ret)
goto out;
@@ -2119,10 +2187,16 @@ int
afr_selfheal (xlator_t *this, uuid_t gfid)
{
int ret = -1;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
+ gf_boolean_t tried_parent = _gf_false;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ char *ancestry_path = "Unknown";
+ char *pgfid_str = NULL;
+ char *gfid_str = NULL;
+
+heal_gfid:
+ frame = afr_frame_create (this);
- frame = afr_frame_create (this);
if (!frame)
return ret;
@@ -2131,6 +2205,42 @@ afr_selfheal (xlator_t *this, uuid_t gfid)
ret = afr_selfheal_do (frame, this, gfid);
+ if (tried_parent == _gf_false && ret &&
+ !gf_uuid_is_null (local->heal_pgfid)) {
+ tried_parent = _gf_true;
+ pgfid_str = alloca (strlen (UUID0_STR) + 1);
+ gfid_str = alloca (strlen (UUID0_STR) + 1);
+ uuid_utoa_r (local->heal_pgfid, pgfid_str);
+ uuid_utoa_r (gfid, gfid_str);
+ if (local->heal_ancestry_path)
+ ancestry_path = local->heal_ancestry_path;
+ gf_log (this->name, GF_LOG_INFO,
+ "PGFID Healing - Heal failed for %s (%s), "
+ "but found parent gfid (%s), attempting to heal "
+ "parent directory by gfid.",
+ gfid_str,
+ ancestry_path,
+ pgfid_str);
+ ret = afr_selfheal (this, local->heal_pgfid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "PGFID Healing - Healing of parent gfid "
+ "(%s) unsuccessful! Healing of %s (%s) "
+ "failed.",
+ pgfid_str,
+ gfid_str,
+ ancestry_path);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "PGFID Healing - Healing of parent gfid %s "
+ "successful! Re-attempting heal of %s (%s).",
+ pgfid_str,
+ gfid_str,
+ ancestry_path);
+ goto heal_gfid;
+ }
+ }
+
if (frame)
AFR_STACK_DESTROY (frame);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 13e82f9aad4..78c9f0a3ab9 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -688,7 +688,6 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
afr_private_t *priv = NULL;
gf_boolean_t mismatch = _gf_false;
- afr_local_t *iter_local = NULL;
afr_local_t *local = NULL;
loc_t loc = {0,};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index aa2af38e8cf..abd2f470131 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -825,6 +825,8 @@ typedef struct _afr_local {
gf_boolean_t need_full_crawl;
gf_boolean_t is_read_txn;
loc_t *unsplit_locs; /* Un-split targets */
+ uuid_t heal_pgfid; /* pgfid of file being healed */
+ char *heal_ancestry_path; /* Full path if avail */
} afr_local_t;
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
index be98f2a1cca..9201f38f7ff 100644
--- a/xlators/features/marker/src/marker.c
+++ b/xlators/features/marker/src/marker.c
@@ -390,13 +390,6 @@ _is_quota_internal_xattr (dict_t *d, char *k, data_t *v, void *data)
if (fnmatch ("trusted.glusterfs.quota*", k, 0) == 0)
return _gf_true;
- /* It would be nice if posix filters pgfid xattrs. But since marker
- * also takes up responsibility to clean these up, adding the filtering
- * here (Check 'quota_xattr_cleaner')
- */
- if (fnmatch (PGFID_XATTR_KEY_PREFIX"*", k, 0) == 0)
- return _gf_true;
-
return _gf_false;
}
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 3cbb947d6b2..eb0986ffb6a 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -212,7 +212,7 @@ posix_lookup (call_frame_t *frame, xlator_t *this,
}
if (priv->update_pgfid_nlinks) {
- if (!gf_uuid_is_null (loc->pargfid) && !IA_ISDIR (buf.ia_type)) {
+ if (!gf_uuid_is_null (loc->pargfid)) {
MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
PGFID_XATTR_KEY_PREFIX,
loc->pargfid);
@@ -2543,8 +2543,7 @@ posix_rename (call_frame_t *frame, xlator_t *this,
LOCK (&oldloc->inode->lock);
{
- if (!IA_ISDIR (oldloc->inode->ia_type)
- && priv->update_pgfid_nlinks) {
+ if (priv->update_pgfid_nlinks) {
MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
PGFID_XATTR_KEY_PREFIX,
oldloc->pargfid);
@@ -2610,8 +2609,7 @@ posix_rename (call_frame_t *frame, xlator_t *this,
P_MSG_SET_XDATA_FAIL, "failed to set "
GET_LINK_COUNT" for %s", real_newpath);
- if (!IA_ISDIR (oldloc->inode->ia_type)
- && priv->update_pgfid_nlinks) {
+ if (priv->update_pgfid_nlinks) {
MAKE_PGFID_XATTR_KEY (pgfid_xattr_key,
PGFID_XATTR_KEY_PREFIX,
newloc->pargfid);