summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2018-04-13 08:52:06 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2018-04-25 05:47:48 +0000
commit971b386fb575f0a216983ea6cad09c42f9b0fb2f (patch)
tree08df2fa930207a37337ec5ae9042f79af8b37453
parent93464ffd6dcbe2a2f91c0e35f933f814f523e9be (diff)
cluster/afr: Keep child-up until ping-event
Problem: If we have 2 bricks, brick-A and brick-B with brick-A within halo-max-latency and brick-B more than halo-max-latency. If we set both halo-min, halo-max replicas as '1'. In this case, brick-A comes online and then ping-latency will be updated for it. When brick-B comes online, we have 2 up-bricks, so the code tries to find the brick with worst latency to mark it down. Since Brick-B just came online it always had '0' latency so brick-B used to be marked offline and Brick-B would eventually be the one to be online even when brick-A is more suited. Fix: Consider latency of just-up child as HALO_MAX_LATENCY so that worst-child until ping-latency is found as the just-up brick. Also keep ping-latency as -1 until child-up during initialization. BUG: 1567881 fixes bz#1567881 Change-Id: I148262fe505468190f0eb99225d0f6d57cdb6f04 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c58
-rw-r--r--xlators/cluster/afr/src/afr.c6
-rw-r--r--xlators/cluster/afr/src/afr.h1
3 files changed, 40 insertions, 25 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 21d0fee141f..01a5db54bdd 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4903,14 +4903,36 @@ __afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator,
}
}
+static int64_t
+afr_get_halo_latency (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int64_t halo_max_latency_msec = 0;
+
+ priv = this->private;
+
+ if (priv->shd.iamshd) {
+ halo_max_latency_msec = priv->shd.halo_max_latency_msec;
+ } else if (priv->nfsd.iamnfsd) {
+ halo_max_latency_msec =
+ priv->nfsd.halo_max_latency_msec;
+ } else {
+ halo_max_latency_msec = priv->halo_max_latency_msec;
+ }
+ gf_msg_debug (this->name, 0, "Using halo latency %ld",
+ halo_max_latency_msec);
+ return halo_max_latency_msec;
+}
+
void
__afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator,
- const int idx, int64_t halo_max_latency_msec,
+ const int idx, int64_t child_latency_msec,
int32_t *event, int32_t *call_psh, int32_t *up_child)
{
afr_private_t *priv = NULL;
int up_children = 0;
int worst_up_child = -1;
+ int64_t halo_max_latency_msec = afr_get_halo_latency (this);
priv = this->private;
@@ -4928,6 +4950,15 @@ __afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator,
*call_psh = 1;
*up_child = idx;
up_children = __afr_get_up_children_count (priv);
+ /*
+ * If this is an _actual_ CHILD_UP event, we
+ * want to set the child_latency to MAX to indicate
+ * the child needs ping data to be available before doing child-up
+ */
+ if (child_latency_msec < 0 && priv->halo_enabled) {
+ /*set to INT64_MAX-1 so that it is found for best_down_child*/
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
/*
* Handle the edge case where we exceed
@@ -4950,6 +4981,7 @@ __afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator,
up_children--;
}
}
+
if (up_children > priv->halo_max_replicas &&
!priv->shd.iamshd) {
worst_up_child = find_worst_up_child (this);
@@ -5052,28 +5084,6 @@ __afr_handle_child_down_event (xlator_t *this, xlator_t *child_xlator,
priv->last_event[idx] = *event;
}
-static int64_t
-afr_get_halo_latency (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int64_t halo_max_latency_msec = 0;
-
- priv = this->private;
-
- if (priv->shd.iamshd) {
- halo_max_latency_msec = priv->shd.halo_max_latency_msec;
- } else if (priv->nfsd.iamnfsd) {
- halo_max_latency_msec =
- priv->nfsd.halo_max_latency_msec;
- } else {
- halo_max_latency_msec = priv->halo_max_latency_msec;
- }
- gf_msg_debug (this->name, 0, "Using halo latency %ld",
- halo_max_latency_msec);
- return halo_max_latency_msec;
-}
-
-
int32_t
afr_notify (xlator_t *this, int32_t event,
void *data, void *data2)
@@ -5184,7 +5194,7 @@ afr_notify (xlator_t *this, int32_t event,
break;
case GF_EVENT_CHILD_UP:
__afr_handle_child_up_event (this, child_xlator,
- idx, halo_max_latency_msec, &event, &call_psh,
+ idx, child_latency_msec, &event, &call_psh,
&up_child);
break;
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index ad5446191ec..cb4b1537984 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -534,6 +534,10 @@ init (xlator_t *this)
ret = -ENOMEM;
goto out;
}
+ /*Initialize to -ve ping timeout so that they are not considered
+ * in child-up events until ping-event comes*/
+ for (i = 0; i < child_count; i++)
+ priv->child_latency[i] = -1;
priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
gf_afr_mt_xlator_t);
@@ -773,7 +777,7 @@ struct volume_options options[] = {
{ .key = {"halo-max-latency"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
- .max = 99999,
+ .max = AFR_HALO_MAX_LATENCY,
.default_value = "5",
.op_version = {GD_OP_VERSION_3_11_0},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 129670517f3..b96be62a910 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -36,6 +36,7 @@
#define ARBITER_BRICK_INDEX 2
+#define AFR_HALO_MAX_LATENCY 99999
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol);