summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/replicate/bug-1180545.t48
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c20
2 files changed, 66 insertions, 2 deletions
diff --git a/tests/bugs/replicate/bug-1180545.t b/tests/bugs/replicate/bug-1180545.t
new file mode 100644
index 00000000000..748d5defa91
--- /dev/null
+++ b/tests/bugs/replicate/bug-1180545.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+#Create gfid split-brain of directory and check if conservative merge
+#completes successfully.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 cluster.heal-timeout 60
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Create files with alternate brick down. One file has gfid mismatch.
+TEST mkdir $M0/DIR
+
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST touch $M0/DIR/FILE
+TEST touch $M0/DIR/file{1..5}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/DIR/FILE
+TEST touch $M0/DIR/file{6..10}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Trigger heal and verify number of entries in backend
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/brick0
+EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/brick1
+#Two entries for DIR and two for FILE
+EXPECT_WITHIN $HEAL_TIMEOUT "4" afr_get_pending_heal_count $V0
+TEST diff <(ls $B0/brick0/DIR) <(ls $B0/brick1/DIR)
+cleanup
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index b78bfa99f20..28e332db740 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -503,6 +503,7 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this,
call_frame_t *iter_frame = NULL;
xlator_t *subvol = NULL;
afr_private_t *priv = NULL;
+ gf_boolean_t mismatch = _gf_false;
priv = this->private;
subvol = priv->children[child];
@@ -532,6 +533,11 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this,
entry->d_name);
AFR_STACK_RESET (iter_frame);
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+ }
if (ret)
break;
}
@@ -542,6 +548,9 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this,
}
AFR_STACK_DESTROY (iter_frame);
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
return ret;
}
@@ -552,6 +561,7 @@ afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
{
int i = 0;
afr_private_t *priv = NULL;
+ gf_boolean_t mismatch = _gf_false;
int ret = 0;
priv = this->private;
@@ -563,14 +573,20 @@ afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (i != source && !healed_sinks[i])
continue;
ret = afr_selfheal_entry_do_subvol (frame, this, fd, i);
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ continue;
+ }
if (ret)
break;
}
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
return ret;
}
-
-
static int
__afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
unsigned char *locked_on)