From 04f84756e1baa5eff4560339700f82970eaa5d80 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Tue, 22 Jan 2019 12:57:50 +0530 Subject: core: heketi-cli is throwing error "target is busy" Problem: At the time of deleting block hosting volume through heketi-cli , it is throwing an error "target is busy". cli is throwing an error because brick is not detached successfully and brick is not detached due to race condition to cleanp xprt associated with detached brick Solution: To avoid xprt specifc race condition introduce an atomic flag on rpc_transport Change-Id: Id4ff1fe8375a63be71fb3343f455190a1b8bb6d4 fixes: bz#1668190 Signed-off-by: Mohit Agrawal --- rpc/rpc-lib/src/rpc-transport.c | 1 + rpc/rpc-lib/src/rpc-transport.h | 1 + xlators/protocol/server/src/server.c | 17 +++++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c index e6421fcfab5..f9cbdf133c7 100644 --- a/rpc/rpc-lib/src/rpc-transport.c +++ b/rpc/rpc-lib/src/rpc-transport.c @@ -365,6 +365,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) } INIT_LIST_HEAD(&trans->list); + GF_ATOMIC_INIT(trans->disconnect_progress, 0); return_trans = trans; diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h index 7be1ba14a0c..9e75d1a2bbb 100644 --- a/rpc/rpc-lib/src/rpc-transport.h +++ b/rpc/rpc-lib/src/rpc-transport.h @@ -214,6 +214,7 @@ struct rpc_transport { gf_boolean_t connect_failed; char notify_poller_death; char poller_death_accept; + gf_atomic_t disconnect_progress; }; struct rpc_transport_ops { diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 50c6c30d63f..8b89e18d156 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -475,6 +475,10 @@ server_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) break; } + /* Set the disconnect_progress flag to 1 to avoid races + during brick detach while brick mux is enabled + */ + GF_ATOMIC_INIT(trans->disconnect_progress, 1); /* transport has to be removed from the list upon disconnect * irrespective of whether lock self heal is off or on, since * new transport will be created upon reconnect. @@ -1536,6 +1540,7 @@ server_notify(xlator_t *this, int32_t event, void *data, ...) glusterfs_ctx_t *ctx = NULL; gf_boolean_t xprt_found = _gf_false; uint64_t totxprt = 0; + uint64_t totdisconnect = 0; GF_VALIDATE_OR_GOTO(THIS->name, this, out); conf = this->private; @@ -1609,6 +1614,10 @@ server_notify(xlator_t *this, int32_t event, void *data, ...) if (!xprt->xl_private) { continue; } + + if (GF_ATOMIC_GET(xprt->disconnect_progress)) + continue; + if (xprt->xl_private->bound_xl == data) { totxprt++; } @@ -1635,14 +1644,22 @@ server_notify(xlator_t *this, int32_t event, void *data, ...) if (!xprt->xl_private) { continue; } + + if (GF_ATOMIC_GET(xprt->disconnect_progress)) + continue; + if (xprt->xl_private->bound_xl == data) { gf_log(this->name, GF_LOG_INFO, "disconnecting %s", xprt->peerinfo.identifier); xprt_found = _gf_true; + totdisconnect++; rpc_transport_disconnect(xprt, _gf_false); } } + if (totxprt > totdisconnect) + GF_ATOMIC_SUB(victim->xprtrefcnt, (totxprt - totdisconnect)); + pthread_mutex_unlock(&conf->mutex); if (this->ctx->active) { top = this->ctx->active->first; -- cgit