summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>2016-04-27 19:12:19 +0530
committerRaghavendra G <rgowdapp@redhat.com>2016-05-04 02:25:31 -0700
commit89759de7e47d99eb1fca2763931b4f33ac765173 (patch)
treec9ed3003cc4b452a12961b45110928019279a62e
parent5ba82c0b9fa043aa4397d6744b079388ea360cc0 (diff)
glusterd: add defence mechanism to avoid brick port clashes
Intro: Currently glusterd maintain the portmap registry which contains ports that are free to use between 49152 - 65535, this registry is initialized once, and updated accordingly as an then when glusterd sees they are been used. Glusterd first checks for a port within the portmap registry and gets a FREE port marked in it, then checks if that port is currently free using a connect() function then passes it to brick process which have to bind on it. Problem: We see that there is a time gap between glusterd checking the port with connect() and brick process actually binding on it. In this time gap it could be so possible that any process would have occupied this port because of which brick will fail to bind and exit. Case 1: To avoid the gluster client process occupying the port supplied by glusterd : we have separated the client port map range with brick port map range more @ http://review.gluster.org/#/c/13998/ Case 2: (Handled by this patch) To avoid the other foreign process occupying the port supplied by glusterd : To handle above situation this patch implements a mechanism to return EADDRINUSE error code to glusterd, upon which a new port is allocated and try to restart the brick process with the newly allocated port. Note: Incase of glusterd restarts i.e. runner_run_nowait() there is no way to handle Case 2, becuase runner_run_nowait() will not wait to get the return/exit code of the executed command (brick process). Hence as of now in such case, we cannot know with what error the brick has failed to connect. This patch also fix the runner_end() to perform some cleanup w.r.t return values. Change-Id: Iec52e7f5d87ce938d173f8ef16aa77fd573f2c5e BUG: 1322805 Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> Reviewed-on: http://review.gluster.org/14043 Tested-by: Prasanna Kumar Kalever <pkalever@redhat.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r--glusterfsd/src/glusterfsd.c7
-rw-r--r--libglusterfs/src/run.c12
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c61
-rw-r--r--rpc/rpc-transport/socket/src/socket.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c22
-rw-r--r--xlators/protocol/server/src/server.c3
7 files changed, 71 insertions, 49 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 09ba9cd8086..81bc15b8b28 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1316,7 +1316,7 @@ cleanup_and_exit (int signum)
trav = trav->next;
}
- exit(0);
+ exit(signum);
}
@@ -2196,6 +2196,7 @@ glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp)
glusterfs_graph_t *graph = NULL;
int ret = -1;
xlator_t *trav = NULL;
+ int err = 0;
graph = glusterfs_graph_construct (fp);
if (!graph) {
@@ -2232,7 +2233,9 @@ out:
if (ret && !ctx->active) {
glusterfs_graph_destroy (graph);
/* there is some error in setting up the first graph itself */
- cleanup_and_exit (0);
+ err = -ret;
+ sys_write (ctx->daemon_pipe[1], (void *) &err, sizeof (err));
+ cleanup_and_exit (err);
}
return ret;
diff --git a/libglusterfs/src/run.c b/libglusterfs/src/run.c
index 70ebcc54fb6..c625a5b99de 100644
--- a/libglusterfs/src/run.c
+++ b/libglusterfs/src/run.c
@@ -339,13 +339,13 @@ int
runner_end_reuse (runner_t *runner)
{
int i = 0;
- int ret = -1;
+ int ret = 1;
int chstat = 0;
if (runner->chpid > 0) {
if (waitpid (runner->chpid, &chstat, 0) == runner->chpid) {
if (WIFEXITED(chstat)) {
- ret = -WEXITSTATUS(chstat);
+ ret = WEXITSTATUS(chstat);
} else {
ret = chstat;
}
@@ -359,7 +359,7 @@ runner_end_reuse (runner_t *runner)
}
}
- return ret;
+ return -ret;
}
int
@@ -388,8 +388,12 @@ runner_run_generic (runner_t *runner, int (*rfin)(runner_t *runner))
int ret = 0;
ret = runner_start (runner);
+ if (ret)
+ goto out;
+ ret = rfin (runner);
- return -(rfin (runner) || ret);
+out:
+ return ret;
}
int
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index f4cff12762f..05d269609c5 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -1580,43 +1580,6 @@ rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
sasize);
}
-
-rpc_transport_t *
-rpcsvc_transport_create (rpcsvc_t *svc, dict_t *options, char *name)
-{
- int ret = -1;
- rpc_transport_t *trans = NULL;
-
- trans = rpc_transport_load (svc->ctx, options, name);
- if (!trans) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "cannot create listener, "
- "initing the transport failed");
- goto out;
- }
-
- ret = rpc_transport_listen (trans);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "listening on transport failed");
- goto out;
- }
-
- ret = rpc_transport_register_notify (trans, rpcsvc_notify, svc);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "registering notify failed");
- goto out;
- }
-
- ret = 0;
-out:
- if ((ret == -1) && (trans)) {
- rpc_transport_disconnect (trans);
- trans = NULL;
- }
-
- return trans;
-}
-
rpcsvc_listener_t *
rpcsvc_listener_alloc (rpcsvc_t *svc, rpc_transport_t *trans)
{
@@ -1654,9 +1617,23 @@ rpcsvc_create_listener (rpcsvc_t *svc, dict_t *options, char *name)
goto out;
}
- trans = rpcsvc_transport_create (svc, options, name);
+ trans = rpc_transport_load (svc->ctx, options, name);
if (!trans) {
- /* LOG TODO */
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "cannot create listener, "
+ "initing the transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_listen (trans);
+ if (ret == -EADDRINUSE || ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING,
+ "listening on transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_register_notify (trans, rpcsvc_notify, svc);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "registering notify failed");
goto out;
}
@@ -1759,7 +1736,11 @@ out:
GF_FREE (transport_name);
- return count;
+ if (count > 0) {
+ return count;
+ } else {
+ return ret;
+ }
}
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 8301f79c5dc..8cdec00f642 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -878,6 +878,8 @@ __socket_server_bind (rpc_transport_t *this)
if (errno == EADDRINUSE) {
gf_log (this->name, GF_LOG_ERROR,
"Port is already in use");
+
+ ret = -EADDRINUSE;
}
}
@@ -3349,7 +3351,7 @@ socket_listen (rpc_transport_t *this)
ret = __socket_server_bind (this);
- if (ret == -1) {
+ if ((ret == -EADDRINUSE) || (ret == -1)) {
/* logged inside __socket_server_bind() */
sys_close (priv->sock);
priv->sock = -1;
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index b49d87da1bf..61ba7bd3179 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -41,7 +41,7 @@
#define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD
-#define GLFS_NUM_MESSAGES 573
+#define GLFS_NUM_MESSAGES 575
#define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
/* Messaged with message IDs */
@@ -4641,6 +4641,15 @@
*/
#define GD_MSG_FILE_NOT_FOUND (GLUSTERD_COMP_BASE + 574)
+/*!
+ * @messageid 106575
+ * @diagnosis Brick failed to start with given port, hence it gets a fresh port
+ * on its own and try to restart the brick with a new port
+ * @recommendedaction Ensure the new port is not blocked by firewall
+ */
+
+#define GD_MSG_RETRY_WITH_NEW_PORT (GLUSTERD_COMP_BASE + 575)
+
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 5f04cba15db..c6d2dd52ebd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1808,6 +1808,8 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
writing the valgrind log to the same file.
*/
GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path);
+
+retry:
runinit (&runner);
if (priv->valgrind) {
@@ -1899,6 +1901,26 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
ret = runner_run (&runner);
synclock_lock (&priv->big_lock);
+ if (ret == -EADDRINUSE) {
+ /* retry after getting a new port */
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ GD_MSG_SRC_BRICK_PORT_UNAVAIL,
+ "Port %d is used by other process", port);
+
+ port = pmap_registry_alloc (this);
+ if (!port) {
+ gf_msg (this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_NO_FREE_PORTS,
+ "Couldn't allocate a port");
+ ret = -1;
+ goto out;
+ }
+ gf_msg (this->name, GF_LOG_NOTICE, 0,
+ GD_MSG_RETRY_WITH_NEW_PORT,
+ "Retrying to start brick %s with new port %d",
+ brickinfo->path, port);
+ goto retry;
+ }
} else {
ret = runner_run_nowait (&runner);
}
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 75b208c98c0..99874acae72 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -1059,7 +1059,8 @@ init (xlator_t *this)
gf_msg (this->name, GF_LOG_WARNING, 0,
PS_MSG_RPCSVC_LISTENER_CREATE_FAILED,
"creation of listener failed");
- ret = -1;
+ if (ret != -EADDRINUSE)
+ ret = -1;
goto out;
} else if (ret < total_transport) {
gf_msg (this->name, GF_LOG_ERROR, 0,