summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkarthik-us <ksubrahm@redhat.com>2017-03-09 18:08:28 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2017-04-20 20:38:54 -0400
commit799a2ff8299db6d6dc75f1533f4bd5a3bb72164d (patch)
tree7a35badad1ad8cd42e6a1d8cff61d7522ff7bf03
parent98dc1f08c114adea1f4133c12dff0d4c3d75b30d (diff)
cluster/afr: GFID split brain resolution with favorite-child-policy
Problem: Currently the automatic split brain resolution with favorite child policy is not resolving the GFID split brains. Fix: When there is a GFID split brain and the favorite child policy is set to size/mtime/ctime/majority, based on the policy decide on the source and sinks. Delete the entry from the sinks and recreate it from the source. Mark the appropriate pending attributes and resolve the GFID split brain. When the heal takes place it will complete the pending heals and reset the attributes. Change-Id: Ie30e5373f94ca6f276745d9c3ad662b8acca6946 BUG: 1430719 Signed-off-by: karthik-us <ksubrahm@redhat.com> Reviewed-on: https://review.gluster.org/16878 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Ravishankar N <ravishankar@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r--tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t228
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c196
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h11
3 files changed, 390 insertions, 45 deletions
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
new file mode 100644
index 00000000000..2f14f838e49
--- /dev/null
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
@@ -0,0 +1,228 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+##### Healing with favorite-child-policy = mtime ######
+##### and self-heal-daemon ######
+
+TEST $CLI volume set $V0 favorite-child-policy mtime
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Sink based on mtime" > $M0/f1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Source based on mtime" > $M0/f1
+
+#Gfids of file f1 on bricks 0 & 1 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#We know that first brick has the latest mtime
+LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | cut -d\ -f1)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}1/f1 | cut -d\ -f1)
+TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ]
+
+TEST $CLI volume set $V0 self-heal-daemon off
+
+
+##### Healing with favorite-child-policy = ctime ######
+##### and self-heal-daemon ######
+
+#gfid split-brain resolution should work even when the granular-enrty-heal is
+#enabled
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST $CLI volume set $V0 favorite-child-policy ctime
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Sink based on ctime" > $M0/f2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Source based on ctime" > $M0/f2
+
+#Gfids of file f2 on bricks 0 & 1 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f2)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#We know that second brick has the latest ctime
+LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/f2 | cut -d\ -f1)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}0/f2 | cut -d\ -f1)
+TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ]
+
+
+#Add one more brick, and heal.
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 self-heal-daemon off
+
+
+##### Healing using favorite-child-policy = size #####
+##### and client side heal #####
+
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Set the quorum-type to none, and create a gfid split brain
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Smallest file" > $M0/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Second smallest file" > $M0/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Biggest among the three files" > $M0/f3
+
+#Bring back the down bricks.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Gfids of file f3 on all the bricks should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3)
+TEST [ "$gfid_0" != "$gfid_1" ]
+TEST [ "$gfid_0" != "$gfid_2" ]
+TEST [ "$gfid_1" != "$gfid_2" ]
+
+#We know that second brick has the bigger size file
+BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)
+
+TEST ls $M0/f3
+TEST cat $M0/f3
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_2" == "$gfid_1" ]
+
+HEALED_MD5_1=$(md5sum $B0/${V0}0/f3 | cut -d\ -f1)
+HEALED_MD5_2=$(md5sum $B0/${V0}2/f3 | cut -d\ -f1)
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_1" ]
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_2" ]
+
+
+##### Healing using favorite-child-policy = majority #####
+##### and client side heal #####
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Does not agree with bricks 0 & 1" > $M0/f4
+
+TEST $CLI v start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Agree on bricks 0 & 1" > $M0/f4
+
+#Gfids of file f4 on bricks 0 & 1 should be same and bricks 0 & 2 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f4)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f4)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_0" != "$gfid_2" ]
+
+#We know that first and second bricks agree with each other. Pick any one of
+#them as source
+MAJORITY_MD5=$(md5sum $B0/${V0}0/f4 | cut -d\ -f1)
+
+#Bring back the down brick and heal.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST ls $M0/f4
+TEST cat $M0/f4
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4)
+TEST [ "$gfid_0" == "$gfid_2" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\ -f1)
+TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 413425ccad9..82ae6432d7d 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -17,6 +17,105 @@
#include "syncop-utils.h"
#include "events.h"
+int
+afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies,
+ int child_count)
+{
+ int j = 0;
+ int i = 0;
+ int src = -1;
+ int votes[child_count];
+
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+
+ votes[i] = 1;
+ for (j = i+1; j < child_count; j++) {
+ if ((!gf_uuid_compare (replies[i].poststat.ia_gfid,
+ replies[j].poststat.ia_gfid)))
+ votes[i]++;
+ if (votes[i] > child_count / 2) {
+ src = i;
+ goto out;
+ }
+ }
+ }
+
+out:
+ return src;
+}
+
+int
+afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src)
+{
+ afr_private_t *priv = NULL;
+ char g1[64] = {0,};
+ char g2[64] = {0,};
+ int up_count = 0;
+
+ priv = this->private;
+ up_count = AFR_COUNT (locked_on, priv->child_count);
+ if (up_count != priv->child_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN,
+ "All the bricks should be up to resolve the gfid split "
+ "brain");
+ goto out;
+ }
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ *src = afr_sh_fav_by_size (this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ *src = afr_sh_fav_by_mtime (this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ *src = afr_sh_fav_by_ctime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ if (priv->child_count != 2)
+ *src = afr_selfheal_gfid_mismatch_by_majority (replies,
+ priv->child_count);
+ else
+ *src = -1;
+
+ if (*src == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN, "No majority to resolve "
+ "gfid split brain");
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ if (*src == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and"
+ " %s on %s. Skipping conservative merge on the file.",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1),
+ priv->children[child_idx]->name,
+ uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2),
+ priv->children[src_idx]->name);
+ gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file="
+ "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
+ "child-%d=%s;gfid-%d=%s", this->name,
+ uuid_utoa (pargfid), bname, child_idx,
+ priv->children[child_idx]->name, child_idx,
+ uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1),
+ src_idx, priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2));
+ return -1;
+ }
+ return 0;
+}
+
static int
afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,
inode_t *inode, int child, struct afr_reply *replies)
@@ -206,13 +305,15 @@ __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
static int
afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
struct afr_reply *replies,
- uuid_t pargfid, char *bname,
- int src_idx)
+ inode_t *inode,
+ uuid_t pargfid,
+ char *bname, int src_idx,
+ unsigned char *locked_on,
+ int *src)
{
- int i = 0;
- char g1[64] = {0,};
- char g2[64] = {0,};
- afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ afr_private_t *priv = NULL;
priv = this->private;
@@ -227,46 +328,33 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
continue;
if (gf_uuid_compare (replies[src_idx].poststat.ia_gfid,
- replies[i].poststat.ia_gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN, "Gfid mismatch "
- "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
- "Skipping conservative merge on the file.",
- uuid_utoa (pargfid), bname,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
- priv->children[i]->name,
- uuid_utoa_r (replies[src_idx].poststat.ia_gfid,
- g2), priv->children[src_idx]->name);
- gf_event (EVENT_AFR_SPLIT_BRAIN,
- "subvol=%s;type=gfid;file=<gfid:%s>/%s>;count=2;"
- "child-%d=%s;gfid-%d=%s;child-%d=%s;gfid-%d=%s",
- this->name, uuid_utoa (pargfid), bname, i,
- priv->children[i]->name, i,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
- src_idx, priv->children[src_idx]->name, src_idx,
- uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2));
- return -1;
+ replies[i].poststat.ia_gfid)) {
+ ret = afr_gfid_split_brain_source (this, replies, inode,
+ pargfid, bname,
+ src_idx, i,
+ locked_on, src);
+ return ret;
}
if ((replies[src_idx].poststat.ia_type) !=
(replies[i].poststat.ia_type)) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN, "Type mismatch "
- "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
+ AFR_MSG_SPLIT_BRAIN, "Type mismatch detected "
+ "for <gfid:%s>/%s>, %s on %s and %s on %s. "
"Skipping conservative merge on the file.",
uuid_utoa (pargfid), bname,
- gf_inode_type_to_str (replies[i].poststat.ia_type),
+ gf_inode_type_to_str (replies[i].poststat.ia_type),
priv->children[i]->name,
- gf_inode_type_to_str (replies[src_idx].poststat.ia_type),
+ gf_inode_type_to_str (replies[src_idx].poststat.ia_type),
priv->children[src_idx]->name);
- gf_event (EVENT_AFR_SPLIT_BRAIN,
- "subvol=%s;type=file;file=<gfid:%s>/%s>;count=2;"
- "child-%d=%s;type-%d=%s;child-%d=%s;type-%d=%s",
- this->name, uuid_utoa (pargfid), bname, i,
- priv->children[i]->name, i,
- gf_inode_type_to_str(replies[i].poststat.ia_type),
- src_idx, priv->children[src_idx]->name, src_idx,
- gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
+ gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
+ "%d=%s;child-%d=%s;type-%d=%s",
+ this->name, uuid_utoa (pargfid), bname, i,
+ priv->children[i]->name, i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type),
+ src_idx, priv->children[src_idx]->name, src_idx,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
return -1;
}
}
@@ -283,11 +371,12 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = 0;
int i = 0;
int source = -1;
+ int src = -1;
afr_private_t *priv = NULL;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (replies[i].valid && replies[i].op_ret == 0) {
source = i;
break;
@@ -306,24 +395,41 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
}
- /* In case of a gfid or type mismatch on the entry, return -1.*/
- ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies,
+ /* In case of type mismatch / unable to resolve gfid mismatch on the
+ * entry, return -1.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies, inode,
fd->inode->gfid,
- name, source);
+ name, source,
+ locked_on, &src);
if (ret < 0)
return ret;
+ if (src != -1) {
+ source = src;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != src && replies[i].valid &&
+ gf_uuid_compare (replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid)) {
+ sources[i] = 0;
+ }
+ }
+ }
for (i = 0; i < priv->child_count; i++) {
if (i == source || !healed_sinks[i])
continue;
- if (replies[i].op_errno != ENOENT)
+ if (src != -1) {
+ if (!gf_uuid_compare (replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid))
+ continue;
+ } else if (replies[i].op_errno != ENOENT) {
continue;
+ }
- ret = afr_selfheal_recreate_entry (frame, i, source, sources,
- fd->inode, name, inode,
- replies);
+ ret |= afr_selfheal_recreate_entry (frame, i, source, sources,
+ fd->inode, name, inode,
+ replies);
}
return ret;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 735e520070e..2e22ac2d7a1 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -319,4 +319,15 @@ afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources,
int
afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf);
+
+int
+afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode);
+int
+afr_sh_fav_by_mtime (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode);
+int
+afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode);
+
#endif /* !_AFR_SELFHEAL_H */