From e0335c32de133aafd88b888a0c20f4eb88bb9845 Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Thu, 31 Aug 2017 11:37:32 +0530 Subject: rpc: destroy transport after client_t Problem: 1. Ref counting increment on the client_t object is done in rpcsvc_request_init() which is incorrect. 2. Ref not taken when delegating to grace_time_handler() Solution: 1. Only fop requests which require processing down the graph via stack 'frames' now ref count the request in get_frame_from_request() 2. Take ref on client_t object in server_rpc_notify() but avoid dropping in RPCSVC_EVENT_TRANSPORT_DESRTROY. Drop the ref unconditionally when exiting out of grace_time_handler(). Also, avoid dropping ref on client_t in RPCSVC_EVENT_TRANSPORT_DESTROY when ref mangement as been delegated to grace_time_handler() mainline: > BUG: 1481600 > Reported-by: Raghavendra G > Signed-off-by: Milind Changire > Reviewed-on: https://review.gluster.org/17982 > Tested-by: Raghavendra G > Reviewed-by: Raghavendra G > CentOS-regression: Gluster Build System > Smoke: Gluster Build System (cherry picked from commit 24b95089a18a6a40e7703cb344e92025d67f3086) Change-Id: Ic16246bebc7ea4490545b26564658f4b081675e4 BUG: 1487033 Reported-by: Raghavendra G Signed-off-by: Milind Changire Reviewed-on: https://review.gluster.org/18156 Smoke: Gluster Build System Reviewed-by: Raghavendra G Tested-by: Raghavendra G CentOS-regression: Gluster Build System --- rpc/rpc-lib/src/rpcsvc.c | 1 - xlators/protocol/server/src/server-handshake.c | 17 +++++- xlators/protocol/server/src/server-helpers.c | 4 ++ xlators/protocol/server/src/server.c | 73 ++++++++++++++++++-------- 4 files changed, 70 insertions(+), 25 deletions(-) diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c index fa44bb84e16..68e27ab9e3f 100644 --- a/rpc/rpc-lib/src/rpcsvc.c +++ b/rpc/rpc-lib/src/rpcsvc.c @@ -411,7 +411,6 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, req->progver = rpc_call_progver (callmsg); req->procnum = rpc_call_progproc (callmsg); req->trans = rpc_transport_ref (trans); - gf_client_ref (req->trans->xl_private); req->count = msg->count; req->msg[0] = progmsg; req->iobref = iobref_ref (msg->iobref); diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index fa2f61315df..731e1c184ef 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -633,9 +633,24 @@ server_setvolume (rpcsvc_request_t *req) gf_msg_debug (this->name, 0, "Connected to %s", client->client_uid); cancelled = server_cancel_grace_timer (this, client); - if (cancelled)//Do gf_client_put on behalf of grace-timer-handler. + if (cancelled) { + /* If timer has been successfully cancelled then it means + * that the client has reconnected within grace period. + * Since we've bumped up the bind count with a gf_client_get() + * for this connect attempt, we need to drop the bind count + * for earlier connect, since grace timer handler couldn't + * drop it since the timer was cancelled. + */ gf_client_put (client, NULL); + /* We need to drop the ref count for this reconnected client + * since one ref was taken before delegating to the grace + * timer handler. Since grace timer handler was cancelled, + * it couldn't run and drop the ref either. + */ + gf_client_unref (client); + } + serv_ctx = server_ctx_get (client, client->this); if (serv_ctx == NULL) { gf_msg (this->name, GF_LOG_INFO, 0, diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index 381fb6f515e..51eb4919952 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -490,6 +490,10 @@ get_frame_from_request (rpcsvc_request_t *req) } } + /* Add a ref for this fop */ + if (client) + gf_client_ref (client); + frame->root->uid = req->uid; frame->root->gid = req->gid; frame->root->pid = req->pid; diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index ee900712f79..e47acb28637 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -73,10 +73,8 @@ grace_time_handler (void *data) if (cancelled) { /* - * client must not be destroyed in gf_client_put(), - * so take a ref. + * ref has already been taken in server_rpc_notify() */ - gf_client_ref (client); gf_client_put (client, &detached); if (detached) /* reconnection did not happen :-( */ @@ -543,13 +541,12 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, * new transport will be created upon reconnect. */ pthread_mutex_lock (&conf->mutex); + client = trans->xl_private; list_del_init (&trans->list); - rpc_transport_unref (trans); pthread_mutex_unlock (&conf->mutex); - client = trans->xl_private; if (!client) - break; + goto unref_transport; gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_CLIENT_DISCONNECTING, "disconnecting connection" @@ -582,18 +579,13 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, trans->myinfo.identifier, auth_path); } - gf_client_unref (client); - break; + + /* + * gf_client_unref will be done while handling + * RPC_EVENT_TRANSPORT_DESTROY + */ + goto unref_transport; } - trans->xl_private = NULL; - server_connection_cleanup (this, client, INTERNAL_LOCKS); - gf_event (EVENT_CLIENT_DISCONNECT, "client_uid=%s;" - "client_identifier=%s;server_identifier=%s;" - "brick_path=%s", - client->client_uid, - trans->peerinfo.identifier, - trans->myinfo.identifier, - auth_path); serv_ctx = server_ctx_get (client, this); @@ -601,7 +593,7 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_SERVER_CTX_GET_FAILED, "server_ctx_get() failed"); - goto out; + goto unref_transport; } grace_ts.tv_sec = conf->grace_timeout; @@ -616,6 +608,12 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, "starting a grace timer for %s", client->client_uid); + /* ref to protect against client destruction + * in RPCSVC_EVENT_TRANSPORT_DESTROY while + * we are starting a grace timer + */ + gf_client_ref (client); + serv_ctx->grace_timer = gf_timer_call_after (this->ctx, grace_ts, @@ -624,13 +622,42 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, } } UNLOCK (&serv_ctx->fdtable_lock); + + ret = dict_get_str (this->options, "auth-path", &auth_path); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + PS_MSG_DICT_GET_FAILED, + "failed to get auth-path"); + auth_path = NULL; + } + + gf_event (EVENT_CLIENT_DISCONNECT, "client_uid=%s;" + "client_identifier=%s;server_identifier=%s;" + "brick_path=%s", + client->client_uid, + trans->peerinfo.identifier, + trans->myinfo.identifier, + auth_path); + +unref_transport: + /* rpc_transport_unref() causes a RPCSVC_EVENT_TRANSPORT_DESTROY + * to be called in blocking manner + * So no code should ideally be after this unref + */ + rpc_transport_unref (trans); + break; + case RPCSVC_EVENT_TRANSPORT_DESTROY: - /*- conn obj has been disassociated from trans on first - * disconnect. - * conn cleanup and destruction is handed over to - * grace_time_handler or the subsequent handler that 'owns' - * the conn. Nothing left to be done here. */ + client = trans->xl_private; + if (!client) + break; + + /* unref only for if (!client->lk_heal) */ + if (!conf->lk_heal) + gf_client_unref (client); + + trans->xl_private = NULL; break; default: break; -- cgit