From 971b386fb575f0a216983ea6cad09c42f9b0fb2f Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Fri, 13 Apr 2018 08:52:06 +0530 Subject: cluster/afr: Keep child-up until ping-event Problem: If we have 2 bricks, brick-A and brick-B with brick-A within halo-max-latency and brick-B more than halo-max-latency. If we set both halo-min, halo-max replicas as '1'. In this case, brick-A comes online and then ping-latency will be updated for it. When brick-B comes online, we have 2 up-bricks, so the code tries to find the brick with worst latency to mark it down. Since Brick-B just came online it always had '0' latency so brick-B used to be marked offline and Brick-B would eventually be the one to be online even when brick-A is more suited. Fix: Consider latency of just-up child as HALO_MAX_LATENCY so that worst-child until ping-latency is found as the just-up brick. Also keep ping-latency as -1 until child-up during initialization. BUG: 1567881 fixes bz#1567881 Change-Id: I148262fe505468190f0eb99225d0f6d57cdb6f04 Signed-off-by: Pranith Kumar K --- xlators/cluster/afr/src/afr-common.c | 58 +++++++++++++++++++++--------------- xlators/cluster/afr/src/afr.c | 6 +++- xlators/cluster/afr/src/afr.h | 1 + 3 files changed, 40 insertions(+), 25 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 21d0fee141f..01a5db54bdd 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4903,14 +4903,36 @@ __afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator, } } +static int64_t +afr_get_halo_latency (xlator_t *this) +{ + afr_private_t *priv = NULL; + int64_t halo_max_latency_msec = 0; + + priv = this->private; + + if (priv->shd.iamshd) { + halo_max_latency_msec = priv->shd.halo_max_latency_msec; + } else if (priv->nfsd.iamnfsd) { + halo_max_latency_msec = + priv->nfsd.halo_max_latency_msec; + } else { + halo_max_latency_msec = priv->halo_max_latency_msec; + } + gf_msg_debug (this->name, 0, "Using halo latency %ld", + halo_max_latency_msec); + return halo_max_latency_msec; +} + void __afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator, - const int idx, int64_t halo_max_latency_msec, + const int idx, int64_t child_latency_msec, int32_t *event, int32_t *call_psh, int32_t *up_child) { afr_private_t *priv = NULL; int up_children = 0; int worst_up_child = -1; + int64_t halo_max_latency_msec = afr_get_halo_latency (this); priv = this->private; @@ -4928,6 +4950,15 @@ __afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator, *call_psh = 1; *up_child = idx; up_children = __afr_get_up_children_count (priv); + /* + * If this is an _actual_ CHILD_UP event, we + * want to set the child_latency to MAX to indicate + * the child needs ping data to be available before doing child-up + */ + if (child_latency_msec < 0 && priv->halo_enabled) { + /*set to INT64_MAX-1 so that it is found for best_down_child*/ + priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; + } /* * Handle the edge case where we exceed @@ -4950,6 +4981,7 @@ __afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator, up_children--; } } + if (up_children > priv->halo_max_replicas && !priv->shd.iamshd) { worst_up_child = find_worst_up_child (this); @@ -5052,28 +5084,6 @@ __afr_handle_child_down_event (xlator_t *this, xlator_t *child_xlator, priv->last_event[idx] = *event; } -static int64_t -afr_get_halo_latency (xlator_t *this) -{ - afr_private_t *priv = NULL; - int64_t halo_max_latency_msec = 0; - - priv = this->private; - - if (priv->shd.iamshd) { - halo_max_latency_msec = priv->shd.halo_max_latency_msec; - } else if (priv->nfsd.iamnfsd) { - halo_max_latency_msec = - priv->nfsd.halo_max_latency_msec; - } else { - halo_max_latency_msec = priv->halo_max_latency_msec; - } - gf_msg_debug (this->name, 0, "Using halo latency %ld", - halo_max_latency_msec); - return halo_max_latency_msec; -} - - int32_t afr_notify (xlator_t *this, int32_t event, void *data, void *data2) @@ -5184,7 +5194,7 @@ afr_notify (xlator_t *this, int32_t event, break; case GF_EVENT_CHILD_UP: __afr_handle_child_up_event (this, child_xlator, - idx, halo_max_latency_msec, &event, &call_psh, + idx, child_latency_msec, &event, &call_psh, &up_child); break; diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index ad5446191ec..cb4b1537984 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -534,6 +534,10 @@ init (xlator_t *this) ret = -ENOMEM; goto out; } + /*Initialize to -ve ping timeout so that they are not considered + * in child-up events until ping-event comes*/ + for (i = 0; i < child_count; i++) + priv->child_latency[i] = -1; priv->children = GF_CALLOC (sizeof (xlator_t *), child_count, gf_afr_mt_xlator_t); @@ -773,7 +777,7 @@ struct volume_options options[] = { { .key = {"halo-max-latency"}, .type = GF_OPTION_TYPE_INT, .min = 1, - .max = 99999, + .max = AFR_HALO_MAX_LATENCY, .default_value = "5", .op_version = {GD_OP_VERSION_3_11_0}, .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 129670517f3..b96be62a910 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -36,6 +36,7 @@ #define ARBITER_BRICK_INDEX 2 +#define AFR_HALO_MAX_LATENCY 99999 typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol); -- cgit