From e2adc9dc66dc46519007790ecd7dd57642dff0fd Mon Sep 17 00:00:00 2001
From: Mohammed Rafi KC <rkavunga@redhat.com>
Date: Mon, 29 Apr 2019 13:22:32 +0530
Subject: ec/shd: Cleanup self heal daemon resources during ec fini

We were not properly cleaning self-heal daemon resources
during ec fini. With shd multiplexing, it is absolutely
necessary to cleanup all the resources during ec fini.

Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
fixes: bz#1703948
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
---
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
 5 files changed, 122 insertions(+), 13 deletions(-)

(limited to 'xlators')

diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 29714f5a90b..e476ac62ec4 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     afr_private_t *priv = NULL;
 
     priv = this->private;
+
+    if (this->cleanup_starting) {
+        return -ENOTCONN;
+    }
+
     if (!priv->shd.enabled)
         return -EBUSY;
 
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 67c8072a01e..985d4854363 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -71,6 +71,11 @@ disabled_loop:
             break;
     }
 
+    if (ec->shutdown) {
+        healer->running = _gf_false;
+        return -1;
+    }
+
     ret = healer->rerun;
     healer->rerun = 0;
 
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
         goto out;
     }
 
+    _mask_cancellation();
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
                              healer, ec_shd_index_heal, xdata,
                              ec->shd.max_threads, ec->shd.wait_qlength);
+    _unmask_cancellation();
 out:
     if (xdata)
         dict_unref(xdata);
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     int ret = 0;
 
     ec = this->private;
+
+    if (this->cleanup_starting) {
+        return -ENOTCONN;
+    }
+
     if (ec->xl_up_count <= ec->fragments) {
         return -ENOTCONN;
     }
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
 {
     ec_t *ec = NULL;
     loc_t loc = {0};
+    int ret = -1;
 
     ec = healer->this->private;
     loc.inode = inode;
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+    _mask_cancellation();
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+    _unmask_cancellation();
+    return ret;
 }
 
 void *
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
 {
     struct subvol_healer *healer = NULL;
     xlator_t *this = NULL;
+    int run = 0;
 
     healer = data;
     THIS = this = healer->this;
     ec_t *ec = this->private;
 
     for (;;) {
-        ec_shd_healer_wait(healer);
+        run = ec_shd_healer_wait(healer);
+        if (run == -1)
+            break;
 
         if (ec->xl_up_count > ec->fragments) {
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
 
     rootloc.inode = this->itable->root;
     for (;;) {
-        pthread_mutex_lock(&healer->mutex);
-        {
-            run = __ec_shd_healer_wait(healer);
-            if (!run)
-                healer->running = _gf_false;
-        }
-        pthread_mutex_unlock(&healer->mutex);
-
-        if (!run)
+        run = ec_shd_healer_wait(healer);
+        if (run < 0) {
             break;
+        } else if (run == 0) {
+            continue;
+        }
 
         if (ec->xl_up_count > ec->fragments) {
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
@@ -562,3 +577,41 @@ out:
     dict_del(output, this->name);
     return ret;
 }
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+    if (!healer)
+        return;
+
+    pthread_cond_destroy(&healer->cond);
+    pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+    struct subvol_healer *healer = NULL;
+    ec_self_heald_t *shd = NULL;
+    ec_t *priv = NULL;
+    int i = 0;
+
+    priv = this->private;
+    if (!priv)
+        return;
+
+    shd = &priv->shd;
+    if (!shd->iamshd)
+        return;
+
+    for (i = 0; i < priv->nodes; i++) {
+        healer = &shd->index_healers[i];
+        ec_destroy_healer_object(this, healer);
+
+        healer = &shd->full_healers[i];
+        ec_destroy_healer_object(this, healer);
+    }
+
+    GF_FREE(shd->index_healers);
+    GF_FREE(shd->full_healers);
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 2eda2a74f54..8184cf4147c 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
 void
 ec_shd_index_healer_wake(ec_t *ec);
 
+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
 #endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
index 7c2880851a8..ce299bb61be 100644
--- a/xlators/cluster/ec/src/ec-messages.h
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+           EC_MSG_THREAD_CLEANUP_FAILED);
 
 #endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 9e37f5e40f6..4e9fac5ea39 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -428,6 +428,51 @@ ec_disable_delays(ec_t *ec)
     return list_empty(&ec->pending_fops);
 }
 
+void
+ec_cleanup_healer_object(ec_t *ec)
+{
+    struct subvol_healer *healer = NULL;
+    ec_self_heald_t *shd = NULL;
+    void *res = NULL;
+    int i = 0;
+    gf_boolean_t is_join = _gf_false;
+
+    shd = &ec->shd;
+    if (!shd->iamshd)
+        return;
+
+    for (i = 0; i < ec->nodes; i++) {
+        healer = &shd->index_healers[i];
+        pthread_mutex_lock(&healer->mutex);
+        {
+            healer->rerun = 1;
+            if (healer->running) {
+                pthread_cond_signal(&healer->cond);
+                is_join = _gf_true;
+            }
+        }
+        pthread_mutex_unlock(&healer->mutex);
+        if (is_join) {
+            pthread_join(healer->thread, &res);
+            is_join = _gf_false;
+        }
+
+        healer = &shd->full_healers[i];
+        pthread_mutex_lock(&healer->mutex);
+        {
+            healer->rerun = 1;
+            if (healer->running) {
+                pthread_cond_signal(&healer->cond);
+                is_join = _gf_true;
+            }
+        }
+        pthread_mutex_unlock(&healer->mutex);
+        if (is_join) {
+            pthread_join(healer->thread, &res);
+            is_join = _gf_false;
+        }
+    }
+}
 void
 ec_pending_fops_completed(ec_t *ec)
 {
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
         /* If there aren't pending fops running after we have waken up
          * them, we immediately propagate the notification. */
         propagate = ec_disable_delays(ec);
+        ec_cleanup_healer_object(ec);
         goto unlock;
     }
 
@@ -760,6 +806,7 @@ failed:
 void
 fini(xlator_t *this)
 {
+    ec_selfheal_daemon_fini(this);
     __ec_destroy_private(this);
 }
 
-- 
cgit