From e2adc9dc66dc46519007790ecd7dd57642dff0fd Mon Sep 17 00:00:00 2001 From: Mohammed Rafi KC Date: Mon, 29 Apr 2019 13:22:32 +0530 Subject: ec/shd: Cleanup self heal daemon resources during ec fini We were not properly cleaning self-heal daemon resources during ec fini. With shd multiplexing, it is absolutely necessary to cleanup all the resources during ec fini. Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2 fixes: bz#1703948 Signed-off-by: Mohammed Rafi KC --- xlators/cluster/afr/src/afr-self-heald.c | 5 +++ xlators/cluster/ec/src/ec-heald.c | 77 +++++++++++++++++++++++++++----- xlators/cluster/ec/src/ec-heald.h | 3 ++ xlators/cluster/ec/src/ec-messages.h | 3 +- xlators/cluster/ec/src/ec.c | 47 +++++++++++++++++++ 5 files changed, 122 insertions(+), 13 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 29714f5a90b..e476ac62ec4 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, afr_private_t *priv = NULL; priv = this->private; + + if (this->cleanup_starting) { + return -ENOTCONN; + } + if (!priv->shd.enabled) return -EBUSY; diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index 67c8072a01e..985d4854363 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -71,6 +71,11 @@ disabled_loop: break; } + if (ec->shutdown) { + healer->running = _gf_false; + return -1; + } + ret = healer->rerun; healer->rerun = 0; @@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer) goto out; } + _mask_cancellation(); ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_index_heal, xdata, ec->shd.max_threads, ec->shd.wait_qlength); + _unmask_cancellation(); out: if (xdata) dict_unref(xdata); @@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, int ret = 0; ec = this->private; + + if (this->cleanup_starting) { + return -ENOTCONN; + } + if (ec->xl_up_count <= ec->fragments) { return -ENOTCONN; } @@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode) { ec_t *ec = NULL; loc_t loc = {0}; + int ret = -1; ec = healer->this->private; loc.inode = inode; - return syncop_ftw(ec->xl_list[healer->subvol], &loc, - GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); + _mask_cancellation(); + ret = syncop_ftw(ec->xl_list[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); + _unmask_cancellation(); + return ret; } void * @@ -317,13 +333,16 @@ ec_shd_index_healer(void *data) { struct subvol_healer *healer = NULL; xlator_t *this = NULL; + int run = 0; healer = data; THIS = this = healer->this; ec_t *ec = this->private; for (;;) { - ec_shd_healer_wait(healer); + run = ec_shd_healer_wait(healer); + if (run == -1) + break; if (ec->xl_up_count > ec->fragments) { gf_msg_debug(this->name, 0, "starting index sweep on subvol %s", @@ -352,16 +371,12 @@ ec_shd_full_healer(void *data) rootloc.inode = this->itable->root; for (;;) { - pthread_mutex_lock(&healer->mutex); - { - run = __ec_shd_healer_wait(healer); - if (!run) - healer->running = _gf_false; - } - pthread_mutex_unlock(&healer->mutex); - - if (!run) + run = ec_shd_healer_wait(healer); + if (run < 0) { break; + } else if (run == 0) { + continue; + } if (ec->xl_up_count > ec->fragments) { gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START, @@ -562,3 +577,41 @@ out: dict_del(output, this->name); return ret; } + +void +ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) +{ + if (!healer) + return; + + pthread_cond_destroy(&healer->cond); + pthread_mutex_destroy(&healer->mutex); +} + +void +ec_selfheal_daemon_fini(xlator_t *this) +{ + struct subvol_healer *healer = NULL; + ec_self_heald_t *shd = NULL; + ec_t *priv = NULL; + int i = 0; + + priv = this->private; + if (!priv) + return; + + shd = &priv->shd; + if (!shd->iamshd) + return; + + for (i = 0; i < priv->nodes; i++) { + healer = &shd->index_healers[i]; + ec_destroy_healer_object(this, healer); + + healer = &shd->full_healers[i]; + ec_destroy_healer_object(this, healer); + } + + GF_FREE(shd->index_healers); + GF_FREE(shd->full_healers); +} diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h index 2eda2a74f54..8184cf4147c 100644 --- a/xlators/cluster/ec/src/ec-heald.h +++ b/xlators/cluster/ec/src/ec-heald.h @@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this); void ec_shd_index_healer_wake(ec_t *ec); +void +ec_selfheal_daemon_fini(xlator_t *this); + #endif /* __EC_HEALD_H__ */ diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h index 7c2880851a8..ce299bb61be 100644 --- a/xlators/cluster/ec/src/ec-messages.h +++ b/xlators/cluster/ec/src/ec-messages.h @@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, - EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); + EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, + EC_MSG_THREAD_CLEANUP_FAILED); #endif /* !_EC_MESSAGES_H_ */ diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 9e37f5e40f6..4e9fac5ea39 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -428,6 +428,51 @@ ec_disable_delays(ec_t *ec) return list_empty(&ec->pending_fops); } +void +ec_cleanup_healer_object(ec_t *ec) +{ + struct subvol_healer *healer = NULL; + ec_self_heald_t *shd = NULL; + void *res = NULL; + int i = 0; + gf_boolean_t is_join = _gf_false; + + shd = &ec->shd; + if (!shd->iamshd) + return; + + for (i = 0; i < ec->nodes; i++) { + healer = &shd->index_healers[i]; + pthread_mutex_lock(&healer->mutex); + { + healer->rerun = 1; + if (healer->running) { + pthread_cond_signal(&healer->cond); + is_join = _gf_true; + } + } + pthread_mutex_unlock(&healer->mutex); + if (is_join) { + pthread_join(healer->thread, &res); + is_join = _gf_false; + } + + healer = &shd->full_healers[i]; + pthread_mutex_lock(&healer->mutex); + { + healer->rerun = 1; + if (healer->running) { + pthread_cond_signal(&healer->cond); + is_join = _gf_true; + } + } + pthread_mutex_unlock(&healer->mutex); + if (is_join) { + pthread_join(healer->thread, &res); + is_join = _gf_false; + } + } +} void ec_pending_fops_completed(ec_t *ec) { @@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) /* If there aren't pending fops running after we have waken up * them, we immediately propagate the notification. */ propagate = ec_disable_delays(ec); + ec_cleanup_healer_object(ec); goto unlock; } @@ -760,6 +806,7 @@ failed: void fini(xlator_t *this) { + ec_selfheal_daemon_fini(this); __ec_destroy_private(this); } -- cgit