summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2020-02-04 18:42:33 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2020-03-13 13:20:37 +0000
commitb164a74884becef281b57ef93428bb740e3e342e (patch)
tree996aee140f4bd86adf2d18a311ff23dd08d0e176
parenteb916c057036db8289b41265797e5dce066d1512 (diff)
cluster/afr: Fixes for halo
Current implementation assumes that ping-event will come after connect event but that may not be the case in the cases where after socket connection fds need to be re-opened which would consume more time. So handle any order of the ping/child-up events. fixes: bz#1800583 Change-Id: I6bcdc0caa503bdc039ef2b4739fbf4afae121f05 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
-rw-r--r--tests/afr.rc10
-rw-r--r--tests/basic/afr/halo.t61
-rw-r--r--tests/volume.rc2
-rw-r--r--xlators/cluster/afr/src/afr-common.c19
-rw-r--r--xlators/cluster/afr/src/afr.c4
-rw-r--r--xlators/cluster/afr/src/afr.h1
6 files changed, 91 insertions, 6 deletions
diff --git a/tests/afr.rc b/tests/afr.rc
index 35f352df78f..5fc7fa1898d 100644
--- a/tests/afr.rc
+++ b/tests/afr.rc
@@ -105,3 +105,13 @@ function get_quorum_type()
local repl_id="$3"
cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
}
+
+function afr_private_key_value()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ local key=$4
+#xargs at the end will strip leading spaces
+ grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
+}
diff --git a/tests/basic/afr/halo.t b/tests/basic/afr/halo.t
new file mode 100644
index 00000000000..3f61f5a0402
--- /dev/null
+++ b/tests/basic/afr/halo.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#Tests that halo basic functionality works as expected
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_up_child()
+{
+ if [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[0\]") ];
+ then
+ echo 0
+ elif [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[1\]") ]
+ then
+ echo 1
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.halo-enabled yes
+TEST $CLI volume set $V0 cluster.halo-max-replicas 1
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[0\]"
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[1\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+
+down_id=$((1-up_id))
+
+TEST kill_brick $V0 $H0 $B0/${V0}${up_id}
+#As max-replicas is configured to be 1, down_child should be up now
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${down_id}\]"
+
+#Bring the brick back up and the state should be restored
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+down_id=$((1-up_id))
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index 288d491de39..bc768c9434f 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -193,7 +193,7 @@ function afr_child_up_status_meta {
local mnt=$1
local repl=$2
local child=$3
- grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
+ grep -E "^child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
}
function client_connected_status_meta {
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 5462f0c7f53..4ee83659c6e 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -5665,6 +5665,8 @@ afr_priv_dump(xlator_t *this)
GF_ATOMIC_GET(priv->pending_reads[i]));
sprintf(key, "child_latency[%d]", i);
gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
}
gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
@@ -5841,7 +5843,7 @@ find_best_down_child(xlator_t *this)
priv = this->private;
for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
priv->child_latency[i] < best_latency) {
best_child = i;
best_latency = priv->child_latency[i];
@@ -5913,7 +5915,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child down.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_DOWN;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
}
} else if (child_latency_msec < halo_max_latency_msec &&
priv->child_up[idx] == 0) {
@@ -5925,7 +5929,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child up.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_UP;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
} else {
gf_log(child_xlator->name, GF_LOG_INFO,
"Not marking child %d up, "
@@ -5992,7 +5998,10 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
if (child_latency_msec < 0) {
/*set to INT64_MAX-1 so that it is found for best_down_child*/
- priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
}
/*
@@ -6081,6 +6090,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*/
if (child_latency_msec < 0) {
priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
}
priv->child_up[idx] = 0;
@@ -6661,6 +6671,7 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
LOCK_DESTROY(&priv->lock);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index ec7aa226821..a38489d9932 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -556,8 +556,10 @@ init(xlator_t *this)
priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) {
ret = -ENOMEM;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 88456562610..7f50a27e6c9 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -191,6 +191,7 @@ typedef struct _afr_private {
struct list_head ta_onwireq;
unsigned char *child_up;
+ unsigned char *halo_child_up;
int64_t *child_latency;
unsigned char *local;