From 3f0658b54c92131ec468a66b9fe0c3ac86e42061 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Fri, 30 Aug 2019 10:30:31 +0530 Subject: afr: wake up index healer threads (Backport of https://review.gluster.org/#/c/glusterfs/+/23288/) ...whenever shd is re-enabled after disabling or there is a change in `cluster.heal-timeout`, without needing to restart shd or waiting for the current `cluster.heal-timeout` seconds to expire. See BZ 1743988 for more details. Change-Id: Ia5ebd7c8e9f5b54cba3199c141fdd1af2f9b9bfe fixes: bz#1743988 Reported-by: Glen Kiessling Signed-off-by: Ravishankar N --- tests/bugs/replicate/bug-1744548-heal-timeout.t | 42 +++++++++++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 6 ++-- xlators/cluster/afr/src/afr-self-heald.c | 14 ++++++--- xlators/cluster/afr/src/afr-self-heald.h | 3 -- xlators/cluster/afr/src/afr.c | 10 ++++++ xlators/cluster/afr/src/afr.h | 2 ++ 6 files changed, 66 insertions(+), 11 deletions(-) create mode 100644 tests/bugs/replicate/bug-1744548-heal-timeout.t diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t new file mode 100644 index 00000000000..3cb73bcad52 --- /dev/null +++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t @@ -0,0 +1,42 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume heal $V0 disable +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST ! $CLI volume heal $V0 + +# Enable shd and verify that index crawl is triggered immediately. +TEST $CLI volume profile $V0 start +TEST $CLI volume profile $V0 info clear +TEST $CLI volume heal $V0 enable +TEST $CLI volume heal $V0 +# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +TEST [ "$COUNT" == "333" ] + +# Check that a change in heal-timeout is honoured immediately. +TEST $CLI volume set $V0 cluster.heal-timeout 5 +sleep 10 +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +# Two crawls must have happened. +TEST [ "$COUNT" == "666" ] + +# shd must not heal if it is disabled and heal-timeout is changed. +TEST $CLI volume heal $V0 disable +TEST $CLI volume profile $V0 info clear +TEST $CLI volume set $V0 cluster.heal-timeout 6 +sleep 6 +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +TEST [ -z $COUNT ] +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 5d56efca777..5f8159304cf 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5626,10 +5626,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2) * b) Already heard from everyone, but we now got a child-up * event. */ - if (have_heard_from_all && priv->shd.iamshd) { - for (i = 0; i < priv->child_count; i++) - if (priv->child_up[i]) - afr_selfheal_childup(this, i); + if (have_heard_from_all) { + afr_selfheal_childup(this, priv); } } out: diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 7a2f43c646a..824f8c0127c 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -1258,12 +1258,18 @@ out: return ret; } -int -afr_selfheal_childup(xlator_t *this, int subvol) +void +afr_selfheal_childup(xlator_t *this, afr_private_t *priv) { - afr_shd_index_healer_spawn(this, subvol); + int subvol = 0; - return 0; + if (!priv->shd.iamshd) + return; + for (subvol = 0; subvol < priv->child_count; subvol++) + if (priv->child_up[subvol]) + afr_shd_index_healer_spawn(this, subvol); + + return; } int diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 7de7c431460..19905394540 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -59,9 +59,6 @@ typedef struct { uint32_t halo_max_latency_msec; } afr_self_heald_t; -int -afr_selfheal_childup(xlator_t *this, int subvol); - int afr_selfheal_daemon_init(xlator_t *this); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 0a5bab89bcf..0b27f507843 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -141,6 +141,7 @@ reconfigure(xlator_t *this, dict_t *options) afr_private_t *priv = NULL; xlator_t *read_subvol = NULL; int read_subvol_index = -1; + int timeout_old = 0; int ret = -1; int index = -1; char *qtype = NULL; @@ -150,6 +151,7 @@ reconfigure(xlator_t *this, dict_t *options) char *locking_scheme = NULL; gf_boolean_t consistent_io = _gf_false; gf_boolean_t choose_local_old = _gf_false; + gf_boolean_t enabled_old = _gf_false; priv = this->private; @@ -255,11 +257,13 @@ reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options, bool, out); + enabled_old = priv->shd.enabled; GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out); GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool, out); + timeout_old = priv->shd.timeout; GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out); GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options, @@ -283,6 +287,12 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + if (priv->shd.enabled) { + if ((priv->shd.enabled != enabled_old) || + (timeout_old != priv->shd.timeout)) + afr_selfheal_childup(this, priv); + } + ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 88e327bb00e..600c7551801 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1339,4 +1339,6 @@ afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this, void afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); +void +afr_selfheal_childup(xlator_t *this, afr_private_t *priv); #endif /* __AFR_H__ */ -- cgit