ec/fini: Fix race between xlator cleanup and on going async fop

Problem: While we process a cleanup, there is a chance for a race between async operations, for example ec_launch_replace_heal. So this can lead to invalid mem access. Solution: Just like we track on going heal fops, we can also track fops like ec_launch_replace_heal, so that we can decide when to send a PARENT_DOWN request. Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298 fixes: bz#1703948 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
author: Mohammed Rafi KC <rkavunga@redhat.com> 2019-06-02 01:36:33 +0530
committer: Pranith Kumar K <pkarampu@redhat.com> 2019-06-08 17:50:10 +0530
commit: 4cfc5788af2488d173ac033850370c4f9ed7a05e (patch)
tree: 98a97731e3c76fca7c6f6c06dd2622fe3228adbe /xlators/cluster/ec/src/ec.c
parent: 9e0de2b634b888dd069e908b7745197d20fe7036 (diff)
1 files changed, 25 insertions, 12 deletions
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 95100951220..b7acc666afc 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -355,6 +355,7 @@ ec_notify_cbk(void *data)
     ec_t *ec = data;
     glusterfs_event_t event = GF_EVENT_MAXVAL;
     gf_boolean_t propagate = _gf_false;
+    gf_boolean_t launch_heal = _gf_false;
 
     LOCK(&ec->lock);
     {
@@ -384,6 +385,11 @@ ec_notify_cbk(void *data)
              * still bricks DOWN, they will be healed when they
              * come up. */
             ec_up(ec->xl, ec);
+
+            if (ec->shd.iamshd && !ec->shutdown) {
+                launch_heal = _gf_true;
+                GF_ATOMIC_INC(ec->async_fop_count);
+            }
         }
 
         propagate = _gf_true;
@@ -391,13 +397,12 @@ ec_notify_cbk(void *data)
 unlock:
     UNLOCK(&ec->lock);
 
+    if (launch_heal) {
+        /* We have just brought the volume UP, so we trigger
+         * a self-heal check on the root directory. */
+        ec_launch_replace_heal(ec);
+    }
     if (propagate) {
-        if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
-            /* We have just brought the volume UP, so we trigger
-             * a self-heal check on the root directory. */
-            ec_launch_replace_heal(ec);
-        }
-
         default_notify(ec->xl, event, NULL);
     }
 }
@@ -425,7 +430,7 @@ ec_disable_delays(ec_t *ec)
 {
     ec->shutdown = _gf_true;
 
-    return list_empty(&ec->pending_fops);
+    return __ec_is_last_fop(ec);
 }
 
 void
@@ -603,7 +608,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
         if (event == GF_EVENT_CHILD_UP) {
             /* We need to trigger a selfheal if a brick changes
              * to UP state. */
-            needs_shd_check = ec_set_up_state(ec, mask, mask);
+            if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
+                !ec->shutdown) {
+                needs_shd_check = _gf_true;
+            }
         } else if (event == GF_EVENT_CHILD_DOWN) {
             ec_set_up_state(ec, mask, 0);
         }
@@ -633,17 +641,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
             }
         } else {
             propagate = _gf_false;
+            needs_shd_check = _gf_false;
+        }
+
+        if (needs_shd_check) {
+            GF_ATOMIC_INC(ec->async_fop_count);
         }
     }
 unlock:
     UNLOCK(&ec->lock);
 
 done:
+    if (needs_shd_check) {
+        ec_launch_replace_heal(ec);
+    }
     if (propagate) {
-        if (needs_shd_check && ec->shd.iamshd) {
-            ec_launch_replace_heal(ec);
-        }
-
         error = default_notify(this, event, data);
     }
 
@@ -705,6 +717,7 @@ init(xlator_t *this)
     ec->xl = this;
     LOCK_INIT(&ec->lock);
 
+    GF_ATOMIC_INIT(ec->async_fop_count, 0);
     INIT_LIST_HEAD(&ec->pending_fops);
     INIT_LIST_HEAD(&ec->heal_waiting);
     INIT_LIST_HEAD(&ec->healing);
author	Mohammed Rafi KC <rkavunga@redhat.com>	2019-06-02 01:36:33 +0530
committer	Pranith Kumar K <pkarampu@redhat.com>	2019-06-08 17:50:10 +0530
commit	4cfc5788af2488d173ac033850370c4f9ed7a05e (patch)
tree	98a97731e3c76fca7c6f6c06dd2622fe3228adbe /xlators/cluster/ec/src/ec.c
parent	9e0de2b634b888dd069e908b7745197d20fe7036 (diff)