summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2015-02-01 15:03:46 +0530
committerRaghavendra Bhat <raghavendra@redhat.com>2015-03-30 00:20:56 -0700
commitbd7f4451aef70c4c968d3ca4e5996ffc96cf64fa (patch)
treeb8a279f90338010e364eb68cc91387ba945a276e
parentd1eb4f520b35c1057c7cb3427a51dd6ae75cc61f (diff)
cluster/ec: Wait for all bricks to notify before notifying parent
Backport of http://review.gluster.org/9523 This is to prevent spurious heals that can result in self-heal. BUG: 1188471 Change-Id: Iaea335d59431d8d85a236963a365f5c791fc7c49 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/9552 Reviewed-by: Xavier Hernandez <xhernandez@datalab.es> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
-rw-r--r--xlators/cluster/ec/src/ec.c48
1 files changed, 34 insertions, 14 deletions
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index bb79f2cf02c..1d681d14c84 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -192,14 +192,21 @@ int32_t reconfigure(xlator_t * this, dict_t * options)
glusterfs_event_t
ec_get_event_from_state (ec_t *ec)
{
- int down_count = 0;
+ int down_count = 0;
- if (ec->xl_up_count >= ec->fragments)
- return GF_EVENT_CHILD_UP;
+ if (ec->xl_up_count >= ec->fragments) {
+ /* If ec is up but some subvolumes are yet to notify, give
+ * grace time for other subvols to notify to prevent start of
+ * I/O which may result in self-heals */
+ if (ec->timer && ec->xl_notify_count < ec->nodes)
+ return GF_EVENT_MAXVAL;
- down_count = ec->xl_notify_count - ec->xl_up_count;
- if (down_count > ec->redundancy)
- return GF_EVENT_CHILD_DOWN;
+ return GF_EVENT_CHILD_UP;
+ } else {
+ down_count = ec->xl_notify_count - ec->xl_up_count;
+ if (down_count > ec->redundancy)
+ return GF_EVENT_CHILD_DOWN;
+ }
return GF_EVENT_MAXVAL;
}
@@ -229,9 +236,10 @@ ec_down (xlator_t *this, ec_t *ec)
}
void
-ec_notify_down (void *data)
+ec_notify_cbk (void *data)
{
ec_t *ec = data;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
LOCK(&ec->lock);
{
@@ -246,26 +254,38 @@ ec_notify_down (void *data)
gf_timer_call_cancel (ec->xl->ctx, ec->timer);
ec->timer = NULL;
- if (GF_EVENT_MAXVAL == ec_get_event_from_state (ec)) {
- /* Change the state as if the bricks are down */
+ event = ec_get_event_from_state (ec);
+ /* If event is still MAXVAL then enough subvolumes didn't
+ * notify, treat it as CHILD_DOWN. */
+ if (event == GF_EVENT_MAXVAL) {
+ event = GF_EVENT_CHILD_DOWN;
ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
ec->xl_notify_count = ec->nodes;
- default_notify (ec->xl, GF_EVENT_CHILD_DOWN, NULL);
+ } else if (event == GF_EVENT_CHILD_UP) {
+ /* Rest of the bricks are still not coming up,
+ * notify that ec is up. Files/directories will be
+ * healed as in when they come up. */
+ ec_up (ec->xl, ec);
}
+
+ /* CHILD_DOWN should not come here as no grace period is given
+ * for notifying CHILD_DOWN. */
+
+ default_notify (ec->xl, event, NULL);
}
unlock:
UNLOCK(&ec->lock);
}
void
-ec_launch_notify_child_down_timer (xlator_t *this, ec_t *ec)
+ec_launch_notify_timer (xlator_t *this, ec_t *ec)
{
struct timespec delay = {0, };
gf_log (this->name, GF_LOG_DEBUG, "Initiating child-down timer");
delay.tv_sec = 10;
delay.tv_nsec = 0;
- ec->timer = gf_timer_call_after (this->ctx, delay, ec_notify_down, ec);
+ ec->timer = gf_timer_call_after (this->ctx, delay, ec_notify_cbk, ec);
if (ec->timer == NULL) {
gf_log(this->name, GF_LOG_ERROR, "Cannot create timer "
"for delayed initialization");
@@ -315,10 +335,10 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (event == GF_EVENT_PARENT_UP) {
/*
- * Start a timer which sends CHILD_DOWN event to parent
+ * Start a timer which sends appropriate event to parent
* xlator to prevent the 'mount' syscall from hanging.
*/
- ec_launch_notify_child_down_timer (this, ec);
+ ec_launch_notify_timer (this, ec);
goto unlock;
}