From d6c1468b2779b6247e44b75276436021a3469a59 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Tue, 21 Jan 2014 23:41:07 +0530 Subject: rpc: transport may be destroyed while rpc isn't rpc_clnt object is destroyed after the corresponding transport object is destroyed. But rpc_clnt_reconnect, a timer driven function, refers to the transport object beyond its 'life'. Instead, using the embedded connection object prevents use after free problem wrt transport object. Also, access transport object under conn->lock. Change-Id: Iae28e8a657d02689963c510114ad7cb7e6764e62 BUG: 962619 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.org/6751 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- rpc/rpc-lib/src/rpc-clnt.c | 161 ++++++++++++++++--------- rpc/rpc-lib/src/rpc-clnt.h | 1 + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 +- xlators/protocol/client/src/client-handshake.c | 2 +- xlators/protocol/client/src/client.c | 2 +- 5 files changed, 106 insertions(+), 64 deletions(-) diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 4920edaa3..22513b789 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -137,6 +137,7 @@ out: static void call_bail (void *data) { + rpc_transport_t *trans = NULL; struct rpc_clnt *clnt = NULL; rpc_clnt_connection_t *conn = NULL; struct timeval current; @@ -147,12 +148,27 @@ call_bail (void *data) char frame_sent[256] = {0,}; struct timespec timeout = {0,}; struct iovec iov = {0,}; + char peerid[UNIX_PATH_MAX] = {0}; GF_VALIDATE_OR_GOTO ("client", data, out); clnt = data; conn = &clnt->conn; + pthread_mutex_lock (&conn->lock); + { + trans = conn->trans; + if (trans) { + strncpy (peerid, conn->trans->peerinfo.identifier, + sizeof (peerid)-1); + + } + } + pthread_mutex_unlock (&conn->lock); + /*rpc_clnt_connection_cleanup will be unwinding all saved frames, + * bailed or otherwise*/ + if (!trans) + goto out; gettimeofday (¤t, NULL); INIT_LIST_HEAD (&list); @@ -172,9 +188,9 @@ call_bail (void *data) (void *) clnt); if (conn->timer == NULL) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "Cannot create bailout timer for %s", - conn->trans->peerinfo.identifier); + peerid); } } @@ -197,7 +213,7 @@ call_bail (void *data) 256 - strlen (frame_sent), ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec); - gf_log (conn->trans->name, GF_LOG_ERROR, + gf_log (conn->name, GF_LOG_ERROR, "bailing out frame type(%s) op(%s(%d)) xid = 0x%x " "sent = %s. timeout = %d for %s", trav->rpcreq->prog->progname, @@ -205,7 +221,7 @@ call_bail (void *data) trav->rpcreq->prog->procnames[trav->rpcreq->procnum] : "--", trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent, - conn->frame_timeout, conn->trans->peerinfo.identifier); + conn->frame_timeout, peerid); clnt = rpc_clnt_ref (clnt); trav->rpcreq->rpc_status = -1; @@ -357,7 +373,7 @@ saved_frames_unwind (struct saved_frames *saved_frames) if (!trav->rpcreq || !trav->rpcreq->prog) continue; - gf_log_callingfn (trav->rpcreq->conn->trans->name, + gf_log_callingfn (trav->rpcreq->conn->name, GF_LOG_ERROR, "forced unwinding frame type(%s) op(%s(%d)) " "called at %s (xid=0x%x)", @@ -394,7 +410,7 @@ saved_frames_destroy (struct saved_frames *frames) void -rpc_clnt_reconnect (void *trans_ptr) +rpc_clnt_reconnect (void *conn_ptr) { rpc_transport_t *trans = NULL; rpc_clnt_connection_t *conn = NULL; @@ -402,15 +418,16 @@ rpc_clnt_reconnect (void *trans_ptr) int32_t ret = 0; struct rpc_clnt *clnt = NULL; - trans = trans_ptr; - if (!trans || !trans->mydata) - return; - - conn = trans->mydata; + conn = conn_ptr; clnt = conn->rpc_clnt; pthread_mutex_lock (&conn->lock); { + trans = conn->trans; + if (!trans) { + pthread_mutex_unlock (&conn->lock); + return; + } if (conn->reconnect) gf_timer_call_cancel (clnt->ctx, conn->reconnect); @@ -420,16 +437,16 @@ rpc_clnt_reconnect (void *trans_ptr) ts.tv_sec = 3; ts.tv_nsec = 0; - gf_log (trans->name, GF_LOG_TRACE, + gf_log (conn->name, GF_LOG_TRACE, "attempting reconnect"); ret = rpc_transport_connect (trans, conn->config.remote_port); conn->reconnect = gf_timer_call_after (clnt->ctx, ts, rpc_clnt_reconnect, - trans); + conn); } else { - gf_log (trans->name, GF_LOG_TRACE, + gf_log (conn->name, GF_LOG_TRACE, "breaking reconnect chain"); } } @@ -457,7 +474,7 @@ rpc_clnt_fill_request_info (struct rpc_clnt *clnt, rpc_request_info_t *info) pthread_mutex_unlock (&clnt->conn.lock); if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_CRITICAL, + gf_log (clnt->conn.name, GF_LOG_CRITICAL, "cannot lookup the saved " "frame corresponding to xid (%d)", info->xid); goto out; @@ -517,11 +534,9 @@ rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn) clnt = conn->rpc_clnt; - gf_log (conn->trans->name, GF_LOG_TRACE, - "cleaning up state in transport object %p", conn->trans); - pthread_mutex_lock (&conn->lock); { + saved_frames = conn->saved_frames; conn->saved_frames = saved_frames_new (); @@ -651,7 +666,7 @@ rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg, ret = xdr_to_rpc_reply (msgbuf, msglen, &rpcmsg, &progmsg, req->verf.authdata); if (ret != 0) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "RPC reply decoding failed"); goto out; } @@ -662,13 +677,13 @@ rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg, goto out; } - gf_log (conn->trans->name, GF_LOG_TRACE, + gf_log (conn->name, GF_LOG_TRACE, "received rpc message (RPC XID: 0x%x" " Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)", saved_frame->rpcreq->xid, saved_frame->rpcreq->prog->progname, saved_frame->rpcreq->prog->progver, - saved_frame->rpcreq->procnum, conn->trans->name); + saved_frame->rpcreq->procnum, conn->name); out: if (ret != 0) { @@ -696,18 +711,18 @@ rpc_clnt_handle_cbk (struct rpc_clnt *clnt, rpc_transport_pollin_t *msg) clnt = rpc_clnt_ref (clnt); ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg, NULL,NULL); if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_WARNING, + gf_log (clnt->conn.name, GF_LOG_WARNING, "RPC call decoding failed"); goto out; } - gf_log (clnt->conn.trans->name, GF_LOG_TRACE, + gf_log (clnt->conn.name, GF_LOG_TRACE, "received rpc message (XID: 0x%lx, " "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) " "from rpc-transport (%s)", rpc_call_xid (&rpcmsg), rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg), rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg), - clnt->conn.trans->name); + clnt->conn.name); procnum = rpc_call_progproc (&rpcmsg); @@ -750,7 +765,7 @@ rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin) xid = ntoh32 (*((uint32_t *)pollin->vector[0].iov_base)); saved_frame = lookup_frame (conn, xid); if (saved_frame == NULL) { - gf_log (conn->trans->name, GF_LOG_ERROR, + gf_log (conn->name, GF_LOG_ERROR, "cannot lookup the saved frame for reply with xid (%u)", xid); goto out; @@ -758,7 +773,7 @@ rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin) req = saved_frame->rpcreq; if (req == NULL) { - gf_log (conn->trans->name, GF_LOG_ERROR, + gf_log (conn->name, GF_LOG_ERROR, "no request with frame for xid (%u)", xid); goto out; } @@ -766,7 +781,7 @@ rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin) ret = rpc_clnt_reply_init (conn, pollin, req, saved_frame); if (ret != 0) { req->rpc_status = -1; - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "initialising rpc reply failed"); } @@ -859,7 +874,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, conn->reconnect = gf_timer_call_after (clnt->ctx, ts, rpc_clnt_reconnect, - conn->trans); + conn); } } pthread_mutex_unlock (&conn->lock); @@ -961,10 +976,17 @@ rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx, { int ret = -1; rpc_clnt_connection_t *conn = NULL; + rpc_transport_t *trans = NULL; conn = &clnt->conn; pthread_mutex_init (&clnt->conn.lock, NULL); + conn->name = gf_strdup (name); + if (!conn->name) { + ret = -1; + goto out; + } + ret = dict_get_int32 (options, "frame-timeout", &conn->frame_timeout); if (ret >= 0) { @@ -975,25 +997,28 @@ rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx, "defaulting frame-timeout to 30mins"); conn->frame_timeout = 1800; } + conn->rpc_clnt = clnt; - conn->trans = rpc_transport_load (ctx, options, name); - if (!conn->trans) { + trans = rpc_transport_load (ctx, options, name); + if (!trans) { gf_log (name, GF_LOG_WARNING, "loading of new rpc-transport" " failed"); ret = -1; goto out; } + rpc_transport_ref (trans); - rpc_transport_ref (conn->trans); - - conn->rpc_clnt = clnt; + pthread_mutex_lock (&conn->lock); + { + conn->trans = trans; + trans = NULL; + } + pthread_mutex_unlock (&conn->lock); ret = rpc_transport_register_notify (conn->trans, rpc_clnt_notify, conn); if (ret == -1) { gf_log (name, GF_LOG_WARNING, "registering notify failed"); - rpc_clnt_connection_cleanup (conn); - conn = NULL; goto out; } @@ -1001,13 +1026,26 @@ rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx, if (!conn->saved_frames) { gf_log (name, GF_LOG_WARNING, "creation of saved_frames " "failed"); - rpc_clnt_connection_cleanup (conn); + ret = -1; goto out; } ret = 0; out: + if (ret) { + pthread_mutex_lock (&conn->lock); + { + trans = conn->trans; + conn->trans = NULL; + } + pthread_mutex_unlock (&conn->lock); + if (trans) + rpc_transport_unref (trans); + //conn cleanup needs to be done since we might have failed to + // register notification. + rpc_clnt_connection_cleanup (conn); + } return ret; } @@ -1079,7 +1117,7 @@ rpc_clnt_start (struct rpc_clnt *rpc) conn = &rpc->conn; - rpc_clnt_reconnect (conn->trans); + rpc_clnt_reconnect (conn); return 0; } @@ -1234,7 +1272,7 @@ rpc_clnt_record_build_record (struct rpc_clnt *clnt, int prognum, int progver, xid, au, &request, auth_data); if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_WARNING, + gf_log (clnt->conn.name, GF_LOG_WARNING, "cannot build a rpc-request xid (%"PRIu64")", xid); goto out; } @@ -1257,7 +1295,7 @@ rpc_clnt_record_build_record (struct rpc_clnt *clnt, int prognum, int progver, hdrsize); if (!recordhdr.iov_base) { - gf_log (clnt->conn.trans->name, GF_LOG_ERROR, + gf_log (clnt->conn.name, GF_LOG_ERROR, "Failed to build record header"); iobuf_unref (request_iob); request_iob = NULL; @@ -1307,7 +1345,7 @@ rpc_clnt_record (struct rpc_clnt *clnt, call_frame_t *call_frame, au.lk_owner.lk_owner_len = 4; } - gf_log (clnt->conn.trans->name, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d" + gf_log (clnt->conn.name, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d" ", gid: %d, owner: %s", au.pid, au.uid, au.gid, lkowner_utoa (&call_frame->root->lk_owner)); @@ -1317,7 +1355,7 @@ rpc_clnt_record (struct rpc_clnt *clnt, call_frame_t *call_frame, callid, &au, rpchdr); if (!request_iob) { - gf_log (clnt->conn.trans->name, GF_LOG_WARNING, + gf_log (clnt->conn.name, GF_LOG_WARNING, "cannot build rpc-record"); goto out; } @@ -1353,7 +1391,7 @@ rpcclnt_cbk_program_register (struct rpc_clnt *clnt, pthread_mutex_unlock (&clnt->lock); if (already_registered) { - gf_log_callingfn (clnt->conn.trans->name, GF_LOG_DEBUG, + gf_log_callingfn (clnt->conn.name, GF_LOG_DEBUG, "already registered"); ret = 0; goto out; @@ -1377,14 +1415,14 @@ rpcclnt_cbk_program_register (struct rpc_clnt *clnt, pthread_mutex_unlock (&clnt->lock); ret = 0; - gf_log (clnt->conn.trans->name, GF_LOG_DEBUG, + gf_log (clnt->conn.name, GF_LOG_DEBUG, "New program registered: %s, Num: %d, Ver: %d", program->progname, program->prognum, program->progver); out: if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_ERROR, + gf_log (clnt->conn.name, GF_LOG_ERROR, "Program registration failed:" " %s, Num: %d, Ver: %d", program->progname, program->prognum, program->progver); @@ -1419,10 +1457,6 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, conn = &rpc->conn; - if (conn->trans == NULL) { - goto out; - } - rpcreq = mem_get (rpc->reqpool); if (rpcreq == NULL) { goto out; @@ -1458,7 +1492,7 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, procnum, proglen, &rpchdr, callid); if (!request_iob) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "cannot build rpc-record"); goto out; } @@ -1487,15 +1521,14 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, conn->config.remote_port); } - ret = rpc_transport_submit_request (rpc->conn.trans, - &req); + ret = rpc_transport_submit_request (conn->trans, &req); if (ret == -1) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "failed to submit rpc-request " "(XID: 0x%x Program: %s, ProgVers: %d, " "Proc: %d) to rpc-transport (%s)", rpcreq->xid, rpcreq->prog->progname, rpcreq->prog->progver, - rpcreq->procnum, rpc->conn.trans->name); + rpcreq->procnum, conn->name); } if ((ret >= 0) && frame) { @@ -1506,7 +1539,7 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, "(XID: 0x%x Program: %s, ProgVers: %d, " "Proc: %d) to rpc-transport (%s)", rpcreq->xid, rpcreq->prog->progname, rpcreq->prog->progver, - rpcreq->procnum, rpc->conn.trans->name); + rpcreq->procnum, conn->name); } } pthread_mutex_unlock (&conn->lock); @@ -1554,11 +1587,14 @@ rpc_clnt_ref (struct rpc_clnt *rpc) static void rpc_clnt_trigger_destroy (struct rpc_clnt *rpc) { + rpc_clnt_connection_t *conn = NULL; + if (!rpc) return; + conn = &rpc->conn; rpc_clnt_disable (rpc); - rpc_transport_unref (rpc->conn.trans); + rpc_transport_unref (conn->trans); } static void @@ -1627,6 +1663,7 @@ void rpc_clnt_disable (struct rpc_clnt *rpc) { rpc_clnt_connection_t *conn = NULL; + rpc_transport_t *trans = NULL; if (!rpc) { goto out; @@ -1654,11 +1691,15 @@ rpc_clnt_disable (struct rpc_clnt *rpc) conn->ping_timer = NULL; conn->ping_started = 0; } + trans = conn->trans; + conn->trans = NULL; } pthread_mutex_unlock (&conn->lock); - rpc_transport_disconnect (rpc->conn.trans); + if (trans) { + rpc_transport_disconnect (trans); + } out: return; @@ -1670,7 +1711,7 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config) { if (config->rpc_timeout) { if (config->rpc_timeout != rpc->conn.config.rpc_timeout) - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "changing timeout to %d (from %d)", config->rpc_timeout, rpc->conn.config.rpc_timeout); @@ -1679,7 +1720,7 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config) if (config->remote_port) { if (config->remote_port != rpc->conn.config.remote_port) - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "changing port to %d (from %d)", config->remote_port, rpc->conn.config.remote_port); @@ -1691,13 +1732,13 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config) if (rpc->conn.config.remote_host) { if (strcmp (rpc->conn.config.remote_host, config->remote_host)) - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "changing hostname to %s (from %s)", config->remote_host, rpc->conn.config.remote_host); GF_FREE (rpc->conn.config.remote_host); } else { - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "setting hostname to %s", config->remote_host); } diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index 2596c3508..bd4820605 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -144,6 +144,7 @@ struct rpc_clnt_connection { struct timeval last_sent; struct timeval last_received; int32_t ping_started; + char *name; }; typedef struct rpc_clnt_connection rpc_clnt_connection_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index b274e3367..bdedf4c04 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -126,7 +126,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, UNLOCK (&defrag->lock); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT", - rpc->conn.trans->name); + rpc->conn.name); break; } @@ -161,7 +161,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, GF_FREE (defrag); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", - rpc->conn.trans->name); + rpc->conn.name); break; } case RPC_CLNT_DESTROY: diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 7c8be42ed..85b0f757b 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -1458,7 +1458,7 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m gf_log (this->name, GF_LOG_INFO, "Connected to %s, attached to remote volume '%s'.", - conf->rpc->conn.trans->peerinfo.identifier, + conf->rpc->conn.name, remote_subvol); rpc_clnt_set_connected (&conf->rpc->conn); diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index b0a71d3f9..306e555ef 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -2205,7 +2205,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, "will keep trying to connect to " "glusterd until brick's port is " "available", - conf->rpc->conn.trans->peerinfo.identifier); + conf->rpc->conn.name); if (conf->portmap_err_logged) conf->disconnect_err_logged = 1; -- cgit