summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-08-24 07:16:44 +0000
committerRaghavendra Bhat <raghavendra@redhat.com>2015-09-09 04:53:48 -0700
commite6388d647c221e8041bc1d426fae2eaeef864aec (patch)
tree3b88b52df28a02dc4149b084dc165241ddcbe210
parent68a07320202efe24bb59937850e4bf24c6aad34c (diff)
afr: launch index heal on local subvols up on a child-up event
Backport of http://review.gluster.org/#/c/11912/ Problem: When a replica's child goes down and comes up, the index heal is triggered only on the child that just came up. This does not serve the intended purpose as the list of files that need to be healed to this child is actually captured on the other child of the replica. Fix: Launch index-heal on all local children of the replica xlator which just received a child up. Note that afr_selfheal_childup() eventually calls afr_shd_index_healer() which will not run the heal on non-local children. Change-Id: I524fda17c28864758b35679cfb232f81f8374571 BUG: 1256245 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/11994 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com> Tested-by: Raghavendra Bhat <raghavendra@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c28
1 files changed, 11 insertions, 17 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 5de6b52b274..3c3e65b4bfd 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3614,7 +3614,6 @@ afr_notify (xlator_t *this, int32_t event,
int idx = -1;
int ret = -1;
int call_psh = 0;
- int up_child = -1;
dict_t *input = NULL;
dict_t *output = NULL;
gf_boolean_t had_quorum = _gf_false;
@@ -3675,7 +3674,6 @@ afr_notify (xlator_t *this, int32_t event,
priv->child_up[idx] = 1;
call_psh = 1;
- up_child = idx;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 1)
up_children++;
@@ -3811,22 +3809,18 @@ afr_notify (xlator_t *this, int32_t event,
if (propagate)
ret = default_notify (this, event, data);
- if (!had_heard_from_all && have_heard_from_all && priv->shd.iamshd) {
- /*
- * Since self-heal is supposed to be launched only after
- * the responses from all the bricks are collected,
- * launch self-heals now on all up subvols.
- */
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i])
- afr_selfheal_childup (this, i);
- } else if (have_heard_from_all && call_psh && priv->shd.iamshd) {
- /*
- * Already heard from everyone. Just launch heal on now up
- * subvolume.
+ if ((!had_heard_from_all) || call_psh) {
+ /* Launch self-heal on all local subvolumes if:
+ * a) We have_heard_from_all for the first time
+ * b) Already heard from everyone, but we now got a child-up
+ * event.
*/
- afr_selfheal_childup (this, up_child);
- }
+ if (have_heard_from_all && priv->shd.iamshd) {
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i])
+ afr_selfheal_childup (this, i);
+ }
+ }
out:
return ret;
}