From fb917bf10b4783d5c669e81a5be1f902ca48cb84 Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Fri, 23 Nov 2018 09:39:43 +0530 Subject: [geo-rep]: Worker still ACTIVE after killing bricks Problem: In changelog xlator after destroying listener it call's unlink to delete changelog socket file but socket file reference is not cleaned up from process memory Solution: 1) To cleanup reference completely from process memory serialize transport cleanup for changelog and then unlink socket file 2) Brick xlator will notify GF_EVENT_PARENT_DOWN to next xlator only after cleanup all xprts Test: To test the same run below steps 1) Setup some volume and enable brick mux 2) kill anyone brick with gf_attach 3) check changelog socket for specific to killed brick in lsof, it should cleanup completely fixes: bz#1600145 Change-Id: Iba06cbf77d8a87b34a60fce50f6d8c0d427fa491 Signed-off-by: Mohit Agrawal --- xlators/features/changelog/src/changelog-rpc.c | 157 ++++++++++++++++++++----- 1 file changed, 126 insertions(+), 31 deletions(-) (limited to 'xlators/features/changelog/src/changelog-rpc.c') diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c index 394fae44e3e..28974fe0999 100644 --- a/xlators/features/changelog/src/changelog-rpc.c +++ b/xlators/features/changelog/src/changelog-rpc.c @@ -43,9 +43,6 @@ changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv) /** terminate dispatcher thread(s) */ changelog_cleanup_dispatchers(this, priv, priv->nr_dispatchers); - /* TODO: what about pending and waiting connections? */ - changelog_ev_cleanup_connections(this, conn); - /* destroy locks */ ret = pthread_mutex_destroy(&conn->pending_lock); if (ret != 0) @@ -147,48 +144,146 @@ int changelog_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, void *data) { + xlator_t *this = NULL; + rpc_transport_t *trans = NULL; + rpc_transport_t *xprt = NULL; + rpc_transport_t *xp_next = NULL; + changelog_priv_t *priv = NULL; + uint64_t listnercnt = 0; + uint64_t xprtcnt = 0; + uint64_t clntcnt = 0; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + gf_boolean_t listner_found = _gf_false; + socket_private_t *sockpriv = NULL; + + if (!xl || !data || !rpc) { + gf_msg_callingfn("changelog", GF_LOG_WARNING, 0, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, + "Calling rpc_notify without initializing"); + goto out; + } + + this = xl; + trans = data; + priv = this->private; + + if (!priv) { + gf_msg_callingfn("changelog", GF_LOG_WARNING, 0, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, + "Calling rpc_notify without priv initializing"); + goto out; + } + + if (event == RPCSVC_EVENT_ACCEPT) { + GF_ATOMIC_INC(priv->xprtcnt); + LOCK(&priv->lock); + { + list_add_tail(&trans->list, &priv->xprt_list); + } + UNLOCK(&priv->lock); + goto out; + } + + if (event == RPCSVC_EVENT_DISCONNECT) { + list_for_each_entry_safe(listener, next, &rpc->listeners, list) + { + if (listener && listener->trans) { + if (listener->trans == trans) { + listnercnt = GF_ATOMIC_DEC(priv->listnercnt); + listner_found = _gf_true; + rpcsvc_listener_destroy(listener); + } + } + } + + if (listnercnt > 0) { + goto out; + } + if (listner_found) { + LOCK(&priv->lock); + list_for_each_entry_safe(xprt, xp_next, &priv->xprt_list, list) + { + sockpriv = (socket_private_t *)(xprt->private); + gf_log("changelog", GF_LOG_INFO, + "Send disconnect" + " on socket %d", + sockpriv->sock); + rpc_transport_disconnect(xprt, _gf_false); + } + UNLOCK(&priv->lock); + goto out; + } + LOCK(&priv->lock); + { + list_del_init(&trans->list); + } + UNLOCK(&priv->lock); + + xprtcnt = GF_ATOMIC_DEC(priv->xprtcnt); + clntcnt = GF_ATOMIC_GET(priv->clntcnt); + if (!xprtcnt && !clntcnt) { + changelog_process_cleanup_event(this); + } + } + +out: return 0; } +void +changelog_process_cleanup_event(xlator_t *this) +{ + gf_boolean_t cleanup_notify = _gf_false; + changelog_priv_t *priv = NULL; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + + if (!this) + return; + priv = this->private; + if (!priv) + return; + + LOCK(&priv->lock); + { + cleanup_notify = priv->notify_down; + priv->notify_down = _gf_true; + } + UNLOCK(&priv->lock); + + if (priv->victim && !cleanup_notify) { + default_notify(this, GF_EVENT_PARENT_DOWN, priv->victim); + + if (priv->rpc) { + /* sockfile path could have been saved to avoid this */ + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, + UNIX_PATH_MAX); + sys_unlink(sockfile); + (void)rpcsvc_unregister_notify(priv->rpc, changelog_rpcsvc_notify, + this); + if (priv->rpc->rxpool) { + mem_pool_destroy(priv->rpc->rxpool); + priv->rpc->rxpool = NULL; + } + GF_FREE(priv->rpc); + priv->rpc = NULL; + } + } +} + void changelog_destroy_rpc_listner(xlator_t *this, changelog_priv_t *priv) { char sockfile[UNIX_PATH_MAX] = { 0, }; - changelog_clnt_t *c_clnt = &priv->connections; - changelog_rpc_clnt_t *crpc = NULL; - int nofconn = 0; /* sockfile path could have been saved to avoid this */ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX); changelog_rpc_server_destroy(this, priv->rpc, sockfile, changelog_rpcsvc_notify, changelog_programs); - - /* TODO Below approach is not perfect to wait for cleanup - all active connections without this code brick process - can be crash in case of brick multiplexing if any in-progress - request process on rpc by changelog xlator after - cleanup resources - */ - - if (c_clnt) { - do { - nofconn = 0; - LOCK(&c_clnt->active_lock); - list_for_each_entry(crpc, &c_clnt->active, list) { nofconn++; } - UNLOCK(&c_clnt->active_lock); - LOCK(&c_clnt->wait_lock); - list_for_each_entry(crpc, &c_clnt->waitq, list) { nofconn++; } - UNLOCK(&c_clnt->wait_lock); - pthread_mutex_lock(&c_clnt->pending_lock); - list_for_each_entry(crpc, &c_clnt->pending, list) { nofconn++; } - pthread_mutex_unlock(&c_clnt->pending_lock); - - } while (nofconn); /* Wait for all connection cleanup */ - } - - (void)changelog_cleanup_rpc_threads(this, priv); } rpcsvc_t * -- cgit