From 5a57c1592a34ee6632ca1fb38e076dde381d1ae2 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Wed, 1 Feb 2017 22:00:32 -0500 Subject: socket: retry connect immediately if it fails Previously we relied on a complex dance of setting flags, shutting down the socket, tearing stuff down, getting an event, tearing more stuff down, and waiting for a higher-level retry. What we really need, in the case where we're just trying to connect prematurely e.g. to a brick that hasn't fully come up yet, is a simple retry of the connect(2) call. This was discovered by observing failures in ec-new-entry.t with multiplexing enabled, but probably fixes other random failures as well. Change-Id: Ibedb8942060bccc96b02272a333c3002c9b77d4c BUG: 1385758 Signed-off-by: Jeff Darcy Reviewed-on: https://review.gluster.org/16510 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Shyamsundar Ranganathan --- rpc/rpc-transport/socket/src/socket.c | 38 +++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'rpc/rpc-transport') diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index 4b1505c4eef..990571289c1 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2944,6 +2944,33 @@ socket_fix_ssl_opts (rpc_transport_t *this, socket_private_t *priv, } } +/* + * If we might just be trying to connect prematurely, e.g. to a brick that's + * slow coming up, all we need is a simple retry. Don't worry about sleeping + * in some arbitrary thread. The connect(2) could already have the exact same + * effect, and we deal with it in that case so we can deal with it for sleep(2) + * as well. + */ +static int +connect_loop (int sockfd, const struct sockaddr *addr, socklen_t addrlen) +{ + int ret; + int connect_fails = 0; + + for (;;) { + ret = connect (sockfd, addr, addrlen); + if (ret >= 0) { + break; + } + if ((errno != ENOENT) || (++connect_fails >= 5)) { + break; + } + sleep (1); + } + + return ret; +} + static int socket_connect (rpc_transport_t *this, int port) { @@ -3105,8 +3132,15 @@ socket_connect (rpc_transport_t *this, int port) } } - ret = connect (priv->sock, SA (&this->peerinfo.sockaddr), - this->peerinfo.sockaddr_len); + if (ign_enoent) { + ret = connect_loop (priv->sock, + SA (&this->peerinfo.sockaddr), + this->peerinfo.sockaddr_len); + } else { + ret = connect (priv->sock, + SA (&this->peerinfo.sockaddr), + this->peerinfo.sockaddr_len); + } if (ret == -1 && errno == ENOENT && ign_enoent) { gf_log (this->name, GF_LOG_WARNING, -- cgit