summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2017-09-20 12:16:06 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2017-10-09 06:23:08 +0000
commit20fa80057eb430fd72b4fa31b9b65598b8ec1265 (patch)
tree3e1dd8c0cb7a394ebcdc7004ccaddb935c67e4d6
parent938addeb7ec634e431c2c8c0a768a2a9ed056c0d (diff)
afr: heal gfid as a part of entry heal
Problem: If a brick crashes after an entry (file or dir) is created but before gfid is assigned, the good bricks will have pending entry heal xattrs but the heal won't complete because afr_selfheal_recreate_entry() tries to create the entry again and it fails with EEXIST. Fix: We could have fixed posx_mknod/mkdir etc to assign the gfid if the file already exists but the right thing to do seems to be to trigger a lookup on the bad brick and let it heal the gfid instead of winding an mknod/mkdir in the first place. Change-Id: I82f76665a7541f1893ef8d847b78af6466aff1ff BUG: 1493415 Signed-off-by: Ravishankar N <ravishankar@redhat.com>
-rw-r--r--tests/bugs/replicate/bug-1493415-gfid-heal.t68
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c78
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c41
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c64
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h4
5 files changed, 188 insertions, 67 deletions
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
new file mode 100644
index 00000000000..125c35a7a21
--- /dev/null
+++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST $CLI volume set $V0 self-heal-daemon off
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+
+#------------------------------------------------------------------------------#
+TEST touch $M0/f1
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
+# brick crash at the point where file got created but no xattrs were set.
+TEST setfattr -x trusted.gfid $B0/${V0}1/f1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+# Assume there were no pending xattrs on parent dir due to 1st brick crashing
+# too. Then name heal from client must heal the gfid.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST stat $M0/f1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+#------------------------------------------------------------------------------#
+TEST mkdir $M0/dir
+TEST touch $M0/dir/f2
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2)
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
+
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
+# brick crash at the point where file got created but no xattrs were set.
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+#Now simulate setting of pending entry xattr on parent dir of 1st brick.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
+create_brick_xattrop_entry $B0/${V0}0 dir
+
+#Trigger entry-heal via shd
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+#------------------------------------------------------------------------------#
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 55b978bf962..20e81dd43e0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -20,6 +20,84 @@ void
afr_heal_synctask (xlator_t *this, afr_local_t *local);
int
+afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies,
+ int source, void *gfid)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_on = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ dict_t *xdata = NULL;
+ loc_t loc = {0, };
+ int ret = 0;
+ int i = 0;
+
+ priv = this->private;
+ wind_on = alloca0 (priv->child_count);
+ ia_type = replies[source].poststat.ia_type;
+
+ /* gfid heal on those subvolumes that do not have gfid associated
+ * with the inode and update those replies.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+ if (!gf_uuid_is_null (replies[i].poststat.ia_gfid) ||
+ replies[i].poststat.ia_type != ia_type)
+ continue;
+
+ wind_on[i] = 1;
+ }
+
+ if (AFR_COUNT(wind_on, priv->child_count) == 0)
+ return 0;
+
+ xdata = dict_new ();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frame = afr_frame_create (this);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ local = frame->local;
+ loc.parent = inode_ref (parent);
+ gf_uuid_copy (loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
+
+ AFR_ONLIST (wind_on, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ afr_reply_wipe (&replies[i]);
+ afr_reply_copy (&replies[i], &local->replies[i]);
+ }
+out:
+ loc_wipe (&loc);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+ if (xdata)
+ dict_unref (xdata);
+
+ return ret;
+}
+
+int
afr_gfid_sbrain_source_from_src_brick (xlator_t *this,
struct afr_reply *replies,
char *src_brick)
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index afa3f9044e1..647dd71911b 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -64,7 +64,6 @@ afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,
return ret;
}
-
int
afr_selfheal_recreate_entry (call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
@@ -164,7 +163,6 @@ out:
return ret;
}
-
static int
__afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
char *name, inode_t *inode, int source,
@@ -187,6 +185,14 @@ __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
(replies[source].op_errno != ENOENT))
return -replies[source].op_errno;
+ if (replies[source].op_ret == 0) {
+ ret = afr_lookup_and_heal_gfid (this, fd->inode, name,
+ inode, replies, source,
+ &replies[source].poststat.ia_gfid);
+ if (ret)
+ return ret;
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (!healed_sinks[i])
continue;
@@ -223,8 +229,12 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
int i = 0;
int ret = -1;
afr_private_t *priv = NULL;
+ void *gfid = NULL;
+ ia_type_t ia_type = IA_INVAL;
priv = this->private;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ ia_type = replies[src_idx].poststat.ia_type;
for (i = 0; i < priv->child_count; i++) {
if (i == src_idx)
@@ -236,8 +246,8 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
if (replies[i].op_ret != 0)
continue;
- if (gf_uuid_compare (replies[src_idx].poststat.ia_gfid,
- replies[i].poststat.ia_gfid)) {
+ if (gf_uuid_compare (gfid, replies[i].poststat.ia_gfid) &&
+ (ia_type == replies[i].poststat.ia_type)) {
ret = afr_gfid_split_brain_source (this, replies, inode,
pargfid, bname,
src_idx, i,
@@ -251,8 +261,7 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
return ret;
}
- if ((replies[src_idx].poststat.ia_type) !=
- (replies[i].poststat.ia_type)) {
+ if (ia_type != replies[i].poststat.ia_type) {
gf_msg (this->name, GF_LOG_ERROR, 0,
AFR_MSG_SPLIT_BRAIN, "Type mismatch detected "
"for <gfid:%s>/%s>, %s on %s and %s on %s. "
@@ -310,6 +319,12 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
}
+ ret = afr_lookup_and_heal_gfid (this, fd->inode, name, inode, replies,
+ source,
+ &replies[source].poststat.ia_gfid);
+ if (ret)
+ return ret;
+
/* In case of type mismatch / unable to resolve gfid mismatch on the
* entry, return -1.*/
ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies, inode,
@@ -542,9 +557,19 @@ afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this,
struct afr_reply *replies = NULL;
struct afr_reply *par_replies = NULL;
afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
priv = this->private;
+ xattr = dict_new ();
+ if (!xattr)
+ return -ENOMEM;
+ ret = dict_set_int32 (xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref (xattr);
+ return -1;
+ }
+
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
@@ -576,7 +601,7 @@ afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this,
inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name,
replies, locked_on,
- NULL);
+ xattr);
if (!inode) {
ret = -ENOMEM;
goto unlock;
@@ -608,6 +633,8 @@ unlock:
afr_replies_wipe (replies, priv->child_count);
if (par_replies)
afr_replies_wipe (par_replies, priv->child_count);
+ if (xattr)
+ dict_unref (xattr);
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 2aad4c75bee..352d151207e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -18,50 +18,18 @@ int
__afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies, void *gfid,
- unsigned char *locked_on,
+ unsigned char *locked_on, int source,
gf_boolean_t is_gfid_absent)
{
int ret = 0;
int up_count = 0;
int locked_count = 0;
- int i = 0;
afr_private_t *priv = NULL;
- dict_t *xdata = NULL;
- loc_t loc = {0, };
- call_frame_t *new_frame = NULL;
- afr_local_t *new_local = NULL;
- unsigned char *wind_on = NULL;
priv = this->private;
- wind_on = alloca0 (priv->child_count);
-
- new_frame = afr_frame_create (this);
- if (!new_frame) {
- ret = -ENOMEM;
- goto out;
- }
-
- new_local = new_frame->local;
gf_uuid_copy (parent->gfid, pargfid);
- xdata = dict_new ();
- if (!xdata) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16);
- if (ret) {
- ret = -ENOMEM;
- goto out;
- }
-
- loc.parent = inode_ref (parent);
- loc.inode = inode_ref (inode);
- gf_uuid_copy (loc.pargfid, pargfid);
- loc.name = bname;
-
if (is_gfid_absent) {
/* Ensure all children of AFR are up before performing gfid heal, to
* guard against the possibility of gfid split brain. */
@@ -79,34 +47,10 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid,
}
}
- /* gfid heal on those subvolumes that do not have gfid associated
- * with the inode and update those replies.
- */
- for (i = 0; i < priv->child_count; i++) {
- if (replies[i].valid && replies[i].op_ret == 0 &&
- !gf_uuid_is_null (replies[i].poststat.ia_gfid) &&
- !gf_uuid_compare (replies[i].poststat.ia_gfid, gfid))
- continue;
- wind_on[i] = 1;
- }
-
- AFR_ONLIST (wind_on, new_frame, afr_selfheal_discover_cbk, lookup,
- &loc, xdata);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!wind_on[i])
- continue;
- afr_reply_wipe (&replies[i]);
- afr_reply_copy (&replies[i], &new_local->replies[i]);
- }
+ afr_lookup_and_heal_gfid (this, parent, bname, inode, replies, source,
+ gfid);
out:
- loc_wipe (&loc);
- if (xdata)
- dict_unref (xdata);
- if (new_frame)
- AFR_STACK_DESTROY (new_frame);
-
return ret;
}
@@ -481,7 +425,7 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode,
- replies, gfid, locked_on,
+ replies, gfid, locked_on, source,
is_gfid_absent);
if (ret)
return ret;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index b54da1facfd..8b0047df28f 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -110,6 +110,10 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode);
int
afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode);
+int
+afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies, int source,
+ void *gfid);
int
afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,