From f138d3fa2237e7fa940ecf17153fd700350c4138 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Tue, 16 Jul 2019 20:36:57 +0530 Subject: posix: In brick_mux brick is crashed while start/stop volume in loop Problem: In brick_mux environment sometime brick is crashed while volume stop/start in a loop.Brick is crashed in janitor task at the time of accessing priv.If posix priv is cleaned up before call janitor task then janitor task is crashed. Solution: To avoid the crash in brick_mux environment introduce a new flag janitor_task_stop in posix_private and before send CHILD_DOWN event wait for update the flag by janitor_task_done Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4 fixes: bz#1730409 Signed-off-by: Mohit Agrawal --- xlators/storage/posix/src/posix-common.c | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'xlators/storage/posix/src/posix-common.c') diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 2b39b5bde53..eee39fd000f 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -136,10 +136,15 @@ int32_t posix_notify(xlator_t *this, int32_t event, void *data, ...) { xlator_t *victim = data; + struct posix_private *priv = this->private; + int ret = 0; + struct timespec sleep_till = { + 0, + }; switch (event) { case GF_EVENT_PARENT_UP: { - /* Tell the parent that posix xlator is up */ + /* the parent that posix xlator is up */ default_notify(this, GF_EVENT_CHILD_UP, data); } break; @@ -148,6 +153,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) break; gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", victim->name); + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); + { + priv->janitor_task_stop = _gf_true; + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, + priv->janitor); + if (!ret) { + clock_gettime(CLOCK_REALTIME, &sleep_till); + sleep_till.tv_sec += 1; + /* Wait to set janitor_task flag to _gf_false by + * janitor_task_done */ + while (priv->janitor_task_stop) { + (void)pthread_cond_timedwait(&priv->janitor_cond, + &priv->janitor_mutex, + &sleep_till); + clock_gettime(CLOCK_REALTIME, &sleep_till); + sleep_till.tv_sec += 1; + } + } + } + pthread_mutex_unlock(&priv->janitor_mutex); + GF_FREE(priv->janitor); + } + priv->janitor = NULL; default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); } break; default: @@ -997,6 +1027,8 @@ posix_init(xlator_t *this) pthread_mutex_init(&_private->fsync_mutex, NULL); pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); ret = posix_spawn_ctx_janitor_thread(this); if (ret) @@ -1117,6 +1149,7 @@ posix_fini(xlator_t *this) (void)gf_thread_cleanup_xint(priv->disk_space_check); priv->disk_space_check = 0; } + if (priv->janitor) { /*TODO: Make sure the synctask is also complete */ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); @@ -1124,8 +1157,10 @@ posix_fini(xlator_t *this) gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, "Failed to delete janitor timer"); } + GF_FREE(priv->janitor); priv->janitor = NULL; } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; @@ -1137,6 +1172,9 @@ posix_fini(xlator_t *this) GF_FREE(priv->base_path); LOCK_DESTROY(&priv->lock); pthread_mutex_destroy(&priv->fsync_mutex); + pthread_cond_destroy(&priv->fsync_cond); + pthread_mutex_destroy(&priv->janitor_mutex); + pthread_cond_destroy(&priv->janitor_cond); GF_FREE(priv->hostname); GF_FREE(priv->trash_path); GF_FREE(priv); -- cgit