summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2017-10-17 21:32:44 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-10-31 18:07:17 +0000
commit7fee47f4ae3b2f6052e36546c50f69ec31a058d0 (patch)
treed8a89463a96d6ea49aa8b9a8867d74323dc893a8
parent68b18972fb09be2be32b434f9525cd41e608ac11 (diff)
glusterd: clean up portmap on brick disconnect
GlusterD's portmap entry for a brick is cleaned up when a PMAP_SIGNOUT event is initiated by the brick process at the shutdown. But if the brick process crashes or gets killed through SIGKILL then this event is not initiated and glusterd ends up with a stale port. Since GlusterD's portmap traversal happens both ways, forward for allocation and backward for registry search, there is a possibility that glusterd might end up running with a stale port for a brick which eventually will end up with clients to fail to connect to the bricks. Solution is to clean up the port entry in case the process is down as part of the brick disconnect event. Although with this the handling PMAP_SIGNOUT event becomes redundant in most of the cases, but this is the safeguard method to avoid glusterd getting into the stale port issues. This patch also needs to bring in the changes from change id I705f101739ab1647ff52a92820d478354407264a which is needed for the compilation to go through. > mainline patch : https://review.gluster.org/#/c/18541/ > https://review.gluster.org/#/c/17129/ Change-Id: I04c5be6d11e772ee4de16caf56dbb37d5c944303 BUG: 1507749 Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
-rw-r--r--rpc/rpc-lib/src/protocol-common.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c25
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c38
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c3
5 files changed, 54 insertions, 17 deletions
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 8865baf759b..3f659c332d6 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -106,7 +106,7 @@ enum gf_pmap_port_type {
GF_PMAP_PORT_FREE = 0,
GF_PMAP_PORT_FOREIGN, /* it actually means, not sure who is using it, but it is in-use */
GF_PMAP_PORT_LEASED,
- GF_PMAP_PORT_NONE,
+ GF_PMAP_PORT_ANY,
GF_PMAP_PORT_BRICKSERVER, /* port used by brick process */
};
typedef enum gf_pmap_port_type gf_pmap_port_type_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index b3e1ec3a362..6d006c063d8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5607,6 +5607,8 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_volinfo_t *volinfo = NULL;
xlator_t *this = NULL;
+ int32_t pid = -1;
+ char pidfile[PATH_MAX] = {0};
brickid = mydata;
if (!brickid)
@@ -5707,6 +5709,29 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
"peer=%s;volume=%s;brick=%s",
brickinfo->hostname, volinfo->volname,
brickinfo->path);
+ /* In case of an abrupt shutdown of a brick PMAP_SIGNOUT
+ * event is not received by glusterd which can lead to a
+ * stale port entry in glusterd, so forcibly clean up
+ * the same if the process is not running
+ */
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
+ brickinfo, conf);
+ if (!gf_is_service_running (pidfile, &pid)) {
+ ret = pmap_registry_remove (
+ THIS, brickinfo->port,
+ brickinfo->path,
+ GF_PMAP_PORT_BRICKSERVER,
+ NULL, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING,
+ GD_MSG_PMAP_REGISTRY_REMOVE_FAIL,
+ 0, "Failed to remove pmap "
+ "registry for port %d for "
+ "brick %s", brickinfo->port,
+ brickinfo->path);
+ ret = 0;
+ }
+ }
}
glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index aa34ce4900e..bbbd18b9ad9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -27,7 +27,7 @@
#include <netinet/in.h>
-int
+static int
pmap_port_isfree (int port)
{
struct sockaddr_in sin;
@@ -155,7 +155,7 @@ pmap_registry_search (xlator_t *this, const char *brickname,
return 0;
}
-int
+static int
pmap_registry_search_by_xprt (xlator_t *this, void *xprt,
gf_pmap_port_type_t type)
{
@@ -168,10 +168,12 @@ pmap_registry_search_by_xprt (xlator_t *this, void *xprt,
for (p = pmap->last_alloc; p >= pmap->base_port; p--) {
if (!pmap->ports[p].xprt)
continue;
- if (pmap->ports[p].xprt == xprt &&
- pmap->ports[p].type == type) {
+ if (pmap->ports[p].xprt == xprt) {
+ if (pmap->ports[p].type == type ||
+ type == GF_PMAP_PORT_ANY) {
port = p;
break;
+ }
}
}
@@ -179,7 +181,7 @@ pmap_registry_search_by_xprt (xlator_t *this, void *xprt,
}
-char *
+static char *
pmap_registry_search_by_port (xlator_t *this, int port)
{
struct pmap_registry *pmap = NULL;
@@ -237,7 +239,8 @@ pmap_assign_port (xlator_t *this, int old_port, const char *path)
if (old_port) {
ret = pmap_registry_remove (this, 0, path,
- GF_PMAP_PORT_BRICKSERVER, NULL);
+ GF_PMAP_PORT_BRICKSERVER, NULL,
+ _gf_false);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING,
GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0, "Failed to"
@@ -340,7 +343,8 @@ pmap_registry_extend (xlator_t *this, int port, const char *brickname)
int
pmap_registry_remove (xlator_t *this, int port, const char *brickname,
- gf_pmap_port_type_t type, void *xprt)
+ gf_pmap_port_type_t type, void *xprt,
+ gf_boolean_t brick_disconnect)
{
struct pmap_registry *pmap = NULL;
int p = 0;
@@ -387,11 +391,16 @@ remove:
* can delete the entire entry.
*/
if (!pmap->ports[p].xprt) {
- brick_str = pmap->ports[p].brickname;
- if (brick_str) {
- while (*brick_str != '\0') {
- if (*(brick_str++) != ' ') {
- goto out;
+ /* If the signout call is being triggered by brick disconnect
+ * then clean up all the bricks (in case of brick mux)
+ */
+ if (!brick_disconnect) {
+ brick_str = pmap->ports[p].brickname;
+ if (brick_str) {
+ while (*brick_str != '\0') {
+ if (*(brick_str++) != ' ') {
+ goto out;
+ }
}
}
}
@@ -542,14 +551,15 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
goto fail;
}
rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick,
- GF_PMAP_PORT_BRICKSERVER, req->trans);
+ GF_PMAP_PORT_BRICKSERVER, req->trans,
+ _gf_false);
ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo);
if (args.rdma_port) {
snprintf(brick_path, PATH_MAX, "%s.rdma", args.brick);
rsp.op_ret = pmap_registry_remove (THIS, args.rdma_port,
brick_path, GF_PMAP_PORT_BRICKSERVER,
- req->trans);
+ req->trans, _gf_false);
}
/* Clean up the pidfile for this brick given glusterfsd doesn't clean it
* any more. This is required to ensure we don't end up with having
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
index 9965a9577b5..253b4ccc85d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.h
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
@@ -42,7 +42,8 @@ int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
gf_pmap_port_type_t type, void *xprt);
int pmap_registry_extend (xlator_t *this, int port, const char *brickname);
int pmap_registry_remove (xlator_t *this, int port, const char *brickname,
- gf_pmap_port_type_t type, void *xprt);
+ gf_pmap_port_type_t type, void *xprt,
+ gf_boolean_t brick_disconnect);
int pmap_registry_search (xlator_t *this, const char *brickname,
gf_pmap_port_type_t type, gf_boolean_t destroy);
struct pmap_registry *pmap_registry_get (xlator_t *this);
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index a5738919c79..c4661d6be4d 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -424,7 +424,8 @@ glusterd_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
pthread_mutex_lock (&priv->xprt_lock);
list_del (&xprt->list);
pthread_mutex_unlock (&priv->xprt_lock);
- pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_NONE, xprt);
+ pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_ANY, xprt,
+ _gf_false);
break;
}