summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-pmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-pmap.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c666
1 files changed, 666 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
new file mode 100644
index 00000000000..16ac628ab82
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -0,0 +1,666 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+
+#include "portmap-xdr.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include "glusterd-messages.h"
+#include "rpcsvc.h"
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+static int
+pmap_port_isfree(int port)
+{
+ struct sockaddr_in sin;
+ int sock = -1;
+ int ret = 0;
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = PF_INET;
+ sin.sin_port = hton16(port);
+
+ sock = socket(PF_INET, SOCK_STREAM, 0);
+ if (sock == -1)
+ return -1;
+
+ ret = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+ sys_close(sock);
+
+ return (ret == 0) ? 1 : 0;
+}
+
+static struct pmap_registry *
+pmap_registry_new(xlator_t *this)
+{
+ struct pmap_registry *pmap = NULL;
+ int i = 0;
+
+ pmap = CALLOC(sizeof(*pmap), 1);
+ if (!pmap)
+ return NULL;
+
+ pmap->base_port = pmap->last_alloc = ((glusterd_conf_t *)(this->private))
+ ->base_port;
+ pmap->max_port = ((glusterd_conf_t *)(this->private))->max_port;
+ for (i = pmap->base_port; i <= pmap->max_port; i++) {
+ if (pmap_port_isfree(i))
+ pmap->ports[i].type = GF_PMAP_PORT_FREE;
+ else
+ pmap->ports[i].type = GF_PMAP_PORT_FOREIGN;
+ }
+
+ return pmap;
+}
+
+struct pmap_registry *
+pmap_registry_get(xlator_t *this)
+{
+ glusterd_conf_t *priv = NULL;
+ struct pmap_registry *pmap = NULL;
+
+ priv = this->private;
+
+ pmap = priv->pmap;
+ if (!pmap) {
+ pmap = pmap_registry_new(this);
+ if (!pmap)
+ return NULL;
+ priv->pmap = pmap;
+ }
+
+ return pmap;
+}
+
+/*
+ * The "destroy" argument avoids a double search in pmap_registry_remove - one
+ * to find the entry in the table, and the other to find the particular
+ * brickname within that entry (which might cover multiple bricks). We do the
+ * actual deletion here by "whiting out" the brick name with spaces. It's up
+ * to pmap_registry_remove to figure out what to do from there.
+ */
+int
+pmap_registry_search(xlator_t *this, const char *brickname,
+ gf_pmap_port_type_t type, gf_boolean_t destroy)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ char *brck = NULL;
+ size_t i;
+
+ pmap = pmap_registry_get(this);
+
+ for (p = pmap->last_alloc; p >= pmap->base_port; p--) {
+ if (!pmap->ports[p].brickname || pmap->ports[p].type != type)
+ continue;
+
+ brck = pmap->ports[p].brickname;
+ for (;;) {
+ for (i = 0; brck[i] && !isspace(brck[i]); ++i)
+ ;
+ if (i == 0 && brck[i] == '\0')
+ break;
+
+ if (strncmp(brck, brickname, i) == 0) {
+ /*
+ * Without this check, we'd break when brck
+ * is merely a substring of brickname.
+ */
+ if (brickname[i] == '\0') {
+ if (destroy)
+ do {
+ *(brck++) = ' ';
+ } while (--i);
+ return p;
+ }
+ }
+
+ brck += i;
+
+ /*
+ * Skip over *any* amount of whitespace, including
+ * none (if we're already at the end of the string).
+ */
+ while (isspace(*brck))
+ ++brck;
+ /*
+ * We're either at the end of the string (which will be
+ * handled above strncmp on the next iteration) or at
+ * the next non-whitespace substring (which will be
+ * handled by strncmp itself).
+ */
+ }
+ }
+
+ return 0;
+}
+
+static int
+pmap_registry_search_by_xprt(xlator_t *this, void *xprt,
+ gf_pmap_port_type_t type)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get(this);
+
+ for (p = pmap->last_alloc; p >= pmap->base_port; p--) {
+ if (!pmap->ports[p].xprt)
+ continue;
+ if (pmap->ports[p].xprt == xprt) {
+ if (pmap->ports[p].type == type || type == GF_PMAP_PORT_ANY) {
+ port = p;
+ break;
+ }
+ }
+ }
+
+ return port;
+}
+
+static char *
+pmap_registry_search_by_port(xlator_t *this, int port)
+{
+ struct pmap_registry *pmap = NULL;
+ char *brickname = NULL;
+ int max_port = 0;
+
+ max_port = ((glusterd_conf_t *)(this->private))->max_port;
+ if (port > max_port)
+ goto out;
+
+ pmap = pmap_registry_get(this);
+
+ if (pmap->ports[port].type == GF_PMAP_PORT_BRICKSERVER)
+ brickname = pmap->ports[port].brickname;
+
+out:
+ return brickname;
+}
+
+int
+pmap_registry_alloc(xlator_t *this)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get(this);
+
+ for (p = pmap->base_port; p <= pmap->max_port; p++) {
+ /* GF_PMAP_PORT_FOREIGN may be freed up ? */
+ if ((pmap->ports[p].type == GF_PMAP_PORT_FREE) ||
+ (pmap->ports[p].type == GF_PMAP_PORT_FOREIGN)) {
+ if (pmap_port_isfree(p)) {
+ pmap->ports[p].type = GF_PMAP_PORT_LEASED;
+ port = p;
+ break;
+ }
+ }
+ }
+
+ if (port > pmap->last_alloc)
+ pmap->last_alloc = port;
+
+ return port;
+}
+
+/* pmap_assign_port does a pmap_registry_remove followed by pmap_registry_alloc,
+ * the reason for the former is to ensure we don't end up with stale ports
+ */
+int
+pmap_assign_port(xlator_t *this, int old_port, const char *path)
+{
+ int ret = -1;
+ int new_port = 0;
+
+ if (old_port) {
+ ret = pmap_registry_remove(this, 0, path, GF_PMAP_PORT_BRICKSERVER,
+ NULL, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, GD_MSG_PMAP_REGISTRY_REMOVE_FAIL,
+ 0,
+ "Failed to"
+ "remove pmap registry for older signin for path"
+ " %s",
+ path);
+ }
+ }
+ new_port = pmap_registry_alloc(this);
+ return new_port;
+}
+
+int
+pmap_registry_bind(xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+
+ pmap = pmap_registry_get(this);
+
+ if (port > pmap->max_port)
+ goto out;
+
+ p = port;
+ if (pmap->ports[p].type == GF_PMAP_PORT_FREE) {
+ /* Because of some crazy race in volume start code path because
+ * of friend handshaking with volumes with quorum enabled we
+ * might end up into a situation where glusterd would start a
+ * brick and get a disconnect and then immediately try to start
+ * the same brick instance based on another friend update
+ * request. And then if for the very first brick even if the
+ * process doesn't come up at the end sign in event gets sent
+ * and we end up having two duplicate portmap entries for the
+ * same brick. Since in brick start we mark the previous port as
+ * free, its better to consider a sign in request as no op if
+ * the corresponding port type is marked as free
+ */
+ goto out;
+ }
+ if (pmap->ports[p].brickname) {
+ char *tmp = pmap->ports[p].brickname;
+ asprintf(&pmap->ports[p].brickname, "%s %s", tmp, brickname);
+ free(tmp);
+ } else {
+ pmap->ports[p].brickname = strdup(brickname);
+ }
+ pmap->ports[p].type = type;
+ pmap->ports[p].xprt = xprt;
+
+ gf_msg("pmap", GF_LOG_INFO, 0, GD_MSG_BRICK_ADD,
+ "adding brick %s on port %d", brickname, port);
+
+ if (pmap->last_alloc < p)
+ pmap->last_alloc = p;
+out:
+ return 0;
+}
+
+int
+pmap_registry_extend(xlator_t *this, int port, const char *brickname)
+{
+ struct pmap_registry *pmap = NULL;
+ char *old_bn;
+ char *new_bn;
+ size_t bn_len;
+ char *entry;
+ int found = 0;
+
+ pmap = pmap_registry_get(this);
+
+ if (port > pmap->max_port) {
+ return -1;
+ }
+
+ switch (pmap->ports[port].type) {
+ case GF_PMAP_PORT_LEASED:
+ case GF_PMAP_PORT_BRICKSERVER:
+ break;
+ default:
+ return -1;
+ }
+
+ old_bn = pmap->ports[port].brickname;
+ if (old_bn) {
+ bn_len = strlen(brickname);
+ entry = strstr(old_bn, brickname);
+ while (entry) {
+ found = 1;
+ if ((entry != old_bn) && (entry[-1] != ' ')) {
+ found = 0;
+ }
+ if ((entry[bn_len] != ' ') && (entry[bn_len] != '\0')) {
+ found = 0;
+ }
+ if (found) {
+ return 0;
+ }
+ entry = strstr(entry + bn_len, brickname);
+ }
+ asprintf(&new_bn, "%s %s", old_bn, brickname);
+ } else {
+ new_bn = strdup(brickname);
+ }
+
+ if (!new_bn) {
+ return -1;
+ }
+
+ pmap->ports[port].brickname = new_bn;
+ free(old_bn);
+
+ return 0;
+}
+
+int
+pmap_registry_remove(xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt,
+ gf_boolean_t brick_disconnect)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ glusterd_conf_t *priv = NULL;
+ char *brick_str;
+
+ priv = this->private;
+ pmap = priv->pmap;
+ if (!pmap)
+ goto out;
+
+ if (port) {
+ if (port > pmap->max_port)
+ goto out;
+ }
+
+ if (brickname) {
+ p = pmap_registry_search(this, brickname, type, _gf_true);
+ if (p)
+ goto remove;
+ }
+
+ if (xprt) {
+ p = pmap_registry_search_by_xprt(this, xprt, type);
+ if (p)
+ goto remove;
+ }
+
+ goto out;
+remove:
+ gf_msg("pmap", GF_LOG_INFO, 0, GD_MSG_BRICK_REMOVE,
+ "removing brick %s on port %d", brickname, p);
+
+ if (xprt && (xprt == pmap->ports[p].xprt)) {
+ pmap->ports[p].xprt = NULL;
+ }
+
+ /*
+ * This is where we garbage-collect. If all of the brick names have
+ * been "whited out" by pmap_registry_search(...,destroy=_gf_true) and
+ * there's no xprt either, then we have nothing left worth saving and
+ * can delete the entire entry.
+ */
+ if (brick_disconnect || !pmap->ports[p].xprt) {
+ /* If the signout call is being triggered by brick disconnect
+ * then clean up all the bricks (in case of brick mux)
+ */
+ if (!brick_disconnect) {
+ brick_str = pmap->ports[p].brickname;
+ if (brick_str) {
+ while (*brick_str != '\0') {
+ if (*(brick_str++) != ' ') {
+ goto out;
+ }
+ }
+ }
+ }
+ free(pmap->ports[p].brickname);
+ pmap->ports[p].brickname = NULL;
+ pmap->ports[p].type = GF_PMAP_PORT_FREE;
+ }
+
+out:
+ return 0;
+}
+
+int
+__gluster_pmap_portbybrick(rpcsvc_request_t *req)
+{
+ pmap_port_by_brick_req args = {
+ 0,
+ };
+ pmap_port_by_brick_rsp rsp = {
+ 0,
+ };
+ char *brick = NULL;
+ int port = 0;
+ int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+
+ brick = args.brick;
+
+ port = pmap_registry_search(this, brick, GF_PMAP_PORT_BRICKSERVER,
+ _gf_false);
+
+ if (!port)
+ rsp.op_ret = -1;
+
+ rsp.port = port;
+
+fail:
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ free(args.brick); // malloced by xdr
+
+ return 0;
+}
+
+int
+gluster_pmap_portbybrick(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __gluster_pmap_portbybrick);
+}
+
+int
+__gluster_pmap_brickbyport(rpcsvc_request_t *req)
+{
+ pmap_brick_by_port_req args = {
+ 0,
+ };
+ pmap_brick_by_port_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_brick_by_port_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+
+ rsp.brick = pmap_registry_search_by_port(THIS, args.port);
+ if (!rsp.brick) {
+ rsp.op_ret = -1;
+ rsp.brick = "";
+ }
+fail:
+
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_brick_by_port_rsp);
+
+ return 0;
+}
+
+int
+gluster_pmap_brickbyport(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __gluster_pmap_brickbyport);
+}
+
+int
+__gluster_pmap_signin(rpcsvc_request_t *req)
+{
+ pmap_signin_req args = {
+ 0,
+ };
+ pmap_signin_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+
+ rsp.op_ret = pmap_registry_bind(THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans);
+
+ ret = glusterd_get_brickinfo(THIS, args.brick, args.port, &brickinfo);
+ /* Update portmap status in brickinfo */
+ if (brickinfo)
+ brickinfo->port_registered = _gf_true;
+
+fail:
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signin_rsp);
+ free(args.brick); // malloced by xdr
+
+ return 0;
+}
+
+int
+gluster_pmap_signin(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __gluster_pmap_signin);
+}
+
+int
+__gluster_pmap_signout(rpcsvc_request_t *req)
+{
+ pmap_signout_req args = {
+ 0,
+ };
+ pmap_signout_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char pidfile[PATH_MAX] = {0};
+ char brick_path[PATH_MAX] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, fail);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, fail);
+
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signout_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+ rsp.op_ret = pmap_registry_remove(THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans,
+ _gf_false);
+
+ ret = glusterd_get_brickinfo(THIS, args.brick, args.port, &brickinfo);
+ if (args.rdma_port) {
+ snprintf(brick_path, PATH_MAX, "%s.rdma", args.brick);
+ rsp.op_ret = pmap_registry_remove(THIS, args.rdma_port, brick_path,
+ GF_PMAP_PORT_BRICKSERVER, req->trans,
+ _gf_false);
+ }
+ /* Update portmap status on brickinfo */
+ if (brickinfo)
+ brickinfo->port_registered = _gf_false;
+
+ /* Clean up the pidfile for this brick given glusterfsd doesn't clean it
+ * any more. This is required to ensure we don't end up with having
+ * stale pid files in case a brick is killed from the backend
+ */
+ ret = glusterd_get_volinfo_from_brick(args.brick, &volinfo);
+ if (!ret) {
+ if (volinfo && brickinfo) {
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf);
+ sys_unlink(pidfile);
+
+ /* Setting the brick status to GF_BRICK_STOPPED to
+ * ensure correct brick status is maintained on the
+ * glusterd end when a brick is killed from the
+ * backend */
+ brickinfo->status = GF_BRICK_STOPPED;
+
+ /* Remove brick from brick process if not already
+ * removed in the brick op phase. This situation would
+ * arise when the brick is killed explicitly from the
+ * backend */
+ ret = glusterd_brick_process_remove_brick(brickinfo, NULL);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Couldn't remove "
+ "brick %s:%s from brick process",
+ brickinfo->hostname, brickinfo->path);
+ /* Ignore 'ret' here since the brick might
+ * have already been deleted in brick op phase
+ */
+ ret = 0;
+ }
+ }
+ }
+
+fail:
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signout_rsp);
+ free(args.brick); // malloced by xdr
+
+ return 0;
+}
+
+int
+gluster_pmap_signout(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __gluster_pmap_signout);
+}
+
+static rpcsvc_actor_t gluster_pmap_actors[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_NULL] = {"NULL", NULL, NULL, GF_PMAP_NULL, DRC_NA, 0},
+ [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", gluster_pmap_portbybrick, NULL,
+ GF_PMAP_PORTBYBRICK, DRC_NA, 0},
+ [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", gluster_pmap_brickbyport, NULL,
+ GF_PMAP_BRICKBYPORT, DRC_NA, 0},
+ [GF_PMAP_SIGNIN] = {"SIGNIN", gluster_pmap_signin, NULL, GF_PMAP_SIGNIN,
+ DRC_NA, 0},
+ [GF_PMAP_SIGNOUT] = {"SIGNOUT", gluster_pmap_signout, NULL, GF_PMAP_SIGNOUT,
+ DRC_NA, 0},
+};
+
+struct rpcsvc_program gluster_pmap_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .actors = gluster_pmap_actors,
+ .numactors = GF_PMAP_MAXVALUE,
+};