diff options
author | Mohammed Rafi KC <rkavunga@redhat.com> | 2019-06-02 01:36:33 +0530 |
---|---|---|
committer | Pranith Kumar K <pkarampu@redhat.com> | 2019-06-08 17:50:10 +0530 |
commit | 4cfc5788af2488d173ac033850370c4f9ed7a05e (patch) | |
tree | 98a97731e3c76fca7c6f6c06dd2622fe3228adbe /xlators/cluster/ec/src/ec.c | |
parent | 9e0de2b634b888dd069e908b7745197d20fe7036 (diff) |
ec/fini: Fix race between xlator cleanup and on going async fop
Problem:
While we process a cleanup, there is a chance for a race between
async operations, for example ec_launch_replace_heal. So this can
lead to invalid mem access.
Solution:
Just like we track on going heal fops, we can also track fops like
ec_launch_replace_heal, so that we can decide when to send a
PARENT_DOWN request.
Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298
fixes: bz#1703948
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec.c')
-rw-r--r-- | xlators/cluster/ec/src/ec.c | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 95100951220..b7acc666afc 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -355,6 +355,7 @@ ec_notify_cbk(void *data) ec_t *ec = data; glusterfs_event_t event = GF_EVENT_MAXVAL; gf_boolean_t propagate = _gf_false; + gf_boolean_t launch_heal = _gf_false; LOCK(&ec->lock); { @@ -384,6 +385,11 @@ ec_notify_cbk(void *data) * still bricks DOWN, they will be healed when they * come up. */ ec_up(ec->xl, ec); + + if (ec->shd.iamshd && !ec->shutdown) { + launch_heal = _gf_true; + GF_ATOMIC_INC(ec->async_fop_count); + } } propagate = _gf_true; @@ -391,13 +397,12 @@ ec_notify_cbk(void *data) unlock: UNLOCK(&ec->lock); + if (launch_heal) { + /* We have just brought the volume UP, so we trigger + * a self-heal check on the root directory. */ + ec_launch_replace_heal(ec); + } if (propagate) { - if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) { - /* We have just brought the volume UP, so we trigger - * a self-heal check on the root directory. */ - ec_launch_replace_heal(ec); - } - default_notify(ec->xl, event, NULL); } } @@ -425,7 +430,7 @@ ec_disable_delays(ec_t *ec) { ec->shutdown = _gf_true; - return list_empty(&ec->pending_fops); + return __ec_is_last_fop(ec); } void @@ -603,7 +608,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) if (event == GF_EVENT_CHILD_UP) { /* We need to trigger a selfheal if a brick changes * to UP state. */ - needs_shd_check = ec_set_up_state(ec, mask, mask); + if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd && + !ec->shutdown) { + needs_shd_check = _gf_true; + } } else if (event == GF_EVENT_CHILD_DOWN) { ec_set_up_state(ec, mask, 0); } @@ -633,17 +641,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) } } else { propagate = _gf_false; + needs_shd_check = _gf_false; + } + + if (needs_shd_check) { + GF_ATOMIC_INC(ec->async_fop_count); } } unlock: UNLOCK(&ec->lock); done: + if (needs_shd_check) { + ec_launch_replace_heal(ec); + } if (propagate) { - if (needs_shd_check && ec->shd.iamshd) { - ec_launch_replace_heal(ec); - } - error = default_notify(this, event, data); } @@ -705,6 +717,7 @@ init(xlator_t *this) ec->xl = this; LOCK_INIT(&ec->lock); + GF_ATOMIC_INIT(ec->async_fop_count, 0); INIT_LIST_HEAD(&ec->pending_fops); INIT_LIST_HEAD(&ec->heal_waiting); INIT_LIST_HEAD(&ec->healing); |