cluster/afr: Send truncate on arbiter brick from SHD

Problem: In an arbiter volume configuration SHD will not send any writes onto the arbiter brick even if there is data pending marker for the arbiter brick. If we have a arbiter setup on the geo-rep master and there are data pending markers for the files on arbiter brick, SHD will not mark any data changelog during healing. While syncing the data from master to slave, if the arbiter-brick is considered as ACTIVE, then there is a chance that slave will miss out some data. If the arbiter brick is being newly added or replaced there is a chance of slave missing all the data during sync. Fix: If there is data pending marker for the arbiter brick, send truncate on the arbiter brick during heal, so that it will record truncate as the data transaction in changelog. Change-Id: I3242ba6cea6da495c418ef860d9c3359c5459dec fixes: bz#1687687 Signed-off-by: karthik-us <ksubrahm@redhat.com>
author: karthik-us <ksubrahm@redhat.com> 2019-03-07 22:26:49 +0530
committer: karthik-us <ksubrahm@redhat.com> 2019-03-12 12:51:26 +0530
commit: a086c6473875ab94a4a08baf454eac567f575b06 (patch)
tree: b9782f5c93f5e86e36cfc0512460a57fb55b7433
parent: a00953ed212a7071b152c4afccd35b92fa5a682a (diff)
3 files changed, 52 insertions, 16 deletions
diff --git a/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
new file mode 100644
index 00000000000..78581e99614
--- /dev/null
+++ b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+CHANGELOG_PATH_0="$B0/${V0}2/.glusterfs/changelogs"
+ROLLOVER_TIME=100
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/file1 bs=128K count=5
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume profile $V0 info
+truncate_count=$($CLI volume profile $V0 info | grep TRUNCATE | awk '{count += $8} END {print count}')
+
+EXPECT "1" echo $truncate_count
+EXPECT "1" check_changelog_op ${CHANGELOG_PATH_0} "^ D "
+
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index 261c6554d46..87ca9589305 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -879,5 +879,5 @@ function check_changelog_op {
         local clog_path=$1
         local op=$2
 
-        $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep $op | wc -l
+        $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l
 }
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 9b296b9ad23..2aed60baa1a 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -382,17 +382,18 @@ __afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
 {
     afr_local_t *local = NULL;
     afr_private_t *priv = NULL;
-    unsigned char arbiter_sink_status = 0;
     int i = 0;
 
     local = frame->local;
     priv = this->private;
 
-    if (priv->arbiter_count) {
-        arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
-        healed_sinks[ARBITER_BRICK_INDEX] = 0;
-    }
-
+    /* This will send truncate on the arbiter brick as well if it is marked as
+     * sink. If changelog is enabled on the volume it captures truncate as a
+     * data transactions on the arbiter brick. This will help geo-rep to
+     * properly sync the data from master to slave if arbiter is the ACTIVE
+     * brick during syncing and which had got some entries healed for data as
+     * part of self heal.
+     */
     AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, size,
                NULL);
 
@@ -403,8 +404,6 @@ __afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
             */
             healed_sinks[i] = 0;
 
-    if (arbiter_sink_status)
-        healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
     return 0;
 }
 
@@ -700,19 +699,18 @@ __afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd,
             goto unlock;
         }
 
-        if (priv->arbiter_count &&
-            AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
-            healed_sinks[ARBITER_BRICK_INDEX]) {
-            is_arbiter_the_only_sink = _gf_true;
-            goto restore_time;
-        }
-
         ret = __afr_selfheal_truncate_sinks(
             frame, this, fd, healed_sinks,
             locked_replies[source].poststat.ia_size);
         if (ret < 0)
             goto unlock;
 
+        if (priv->arbiter_count &&
+            AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
+            healed_sinks[ARBITER_BRICK_INDEX]) {
+            is_arbiter_the_only_sink = _gf_true;
+            goto restore_time;
+        }
         ret = 0;
     }
 unlock:
author	karthik-us <ksubrahm@redhat.com>	2019-03-07 22:26:49 +0530
committer	karthik-us <ksubrahm@redhat.com>	2019-03-12 12:51:26 +0530
commit	a086c6473875ab94a4a08baf454eac567f575b06 (patch)
tree	b9782f5c93f5e86e36cfc0512460a57fb55b7433
parent	a00953ed212a7071b152c4afccd35b92fa5a682a (diff)