From 0043c63f70776444f69667a4ef9596217ecb42b7 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Mon, 12 Mar 2018 19:43:15 +0530 Subject: gluster: Sometimes Brick process is crashed at the time of stopping brick Problem: Sometimes brick process is getting crashed at the time of stop brick while brick mux is enabled. Solution: Brick process was getting crashed because of rpc connection was not cleaning properly while brick mux is enabled.In this patch after sending GF_EVENT_CLEANUP notification to xlator(server) waits for all rpc client connection destroy for specific xlator.Once rpc connections are destroyed in server_rpc_notify for all associated client for that brick then call xlator_mem_cleanup for for brick xlator as well as all child xlators.To avoid races at the time of cleanup introduce two new flags at each xlator cleanup_starting, call_cleanup. BUG: 1544090 Signed-off-by: Mohit Agrawal Note: Run all test-cases in separate build (https://review.gluster.org/#/c/19700/) with same patch after enable brick mux forcefully, all test cases are passed. Change-Id: Ic4ab9c128df282d146cf1135640281fcb31997bf updates: bz#1544090 --- xlators/storage/posix/src/posix-common.c | 6 ++++-- xlators/storage/posix/src/posix-helpers.c | 14 ++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'xlators/storage/posix') diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 507bfc20991..bcaad2703e9 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -1105,12 +1105,13 @@ posix_fini (xlator_t *this) struct posix_private *priv = this->private; if (!priv) return; - this->private = NULL; - if (priv->health_check) { + LOCK (&priv->lock); + if (priv->health_check_active) { priv->health_check_active = _gf_false; pthread_cancel (priv->health_check); priv->health_check = 0; } + UNLOCK (&priv->lock); if (priv->disk_space_check) { priv->disk_space_check_active = _gf_false; pthread_cancel (priv->disk_space_check); @@ -1135,6 +1136,7 @@ posix_fini (xlator_t *this) GF_FREE (priv->hostname); GF_FREE (priv->trash_path); GF_FREE (priv); + this->private = NULL; return; } diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 0ff94df944e..e9d379fda07 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -2001,6 +2001,12 @@ out: return NULL; abort: + LOCK (&priv->lock); + { + priv->health_check_active = _gf_false; + } + UNLOCK (&priv->lock); + /* health-check failed */ gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED, "health-check failed, going down"); @@ -2041,18 +2047,18 @@ abort: for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { victim = (*trav_p)->xlator; - if (victim && - strcmp (victim->name, priv->base_path) == 0) { + if (!victim->call_cleanup && + strcmp (victim->name, priv->base_path) == 0) { victim_found = _gf_true; break; } } UNLOCK (&ctx->volfile_lock); - if (victim_found) { + if (victim_found && !victim->cleanup_starting) { gf_log (THIS->name, GF_LOG_INFO, "detaching not-only " " child %s", priv->base_path); + victim->cleanup_starting = 1; top->notify (top, GF_EVENT_CLEANUP, victim); - xlator_mem_cleanup (victim); } } -- cgit