summaryrefslogtreecommitdiffstats
path: root/xlators/nfs
diff options
context:
space:
mode:
authorShehjar Tikoo <shehjart@gluster.com>2010-03-31 07:27:02 +0000
committerAnand V. Avati <avati@dev.gluster.com>2010-03-31 07:43:58 -0700
commit8b2949db0d56bdf5842abcb72437cc7dccd884df (patch)
tree04c51fdd0d850ab8454c2286391afa8141a74279 /xlators/nfs
parenteff83c8dae2a9f6d52ae2d8b069190a13f92deaf (diff)
nfs: Add RPCv2 service
Signed-off-by: Shehjar Tikoo <shehjart@gluster.com> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 399 (NFS translator with Mount v3 and NFS v3 support) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=399
Diffstat (limited to 'xlators/nfs')
-rw-r--r--xlators/nfs/lib/src/Makefile.am4
-rw-r--r--xlators/nfs/lib/src/auth-null.c71
-rw-r--r--xlators/nfs/lib/src/auth-unix.c91
-rw-r--r--xlators/nfs/lib/src/rpc-socket.c358
-rw-r--r--xlators/nfs/lib/src/rpc-socket.h65
-rw-r--r--xlators/nfs/lib/src/rpcsvc-auth.c391
-rw-r--r--xlators/nfs/lib/src/rpcsvc.c2743
-rw-r--r--xlators/nfs/lib/src/rpcsvc.h715
8 files changed, 4436 insertions, 2 deletions
diff --git a/xlators/nfs/lib/src/Makefile.am b/xlators/nfs/lib/src/Makefile.am
index d0221f4940b..a6090bb2896 100644
--- a/xlators/nfs/lib/src/Makefile.am
+++ b/xlators/nfs/lib/src/Makefile.am
@@ -1,10 +1,10 @@
lib_LTLIBRARIES = librpcsvc.la
librpcsvc_la_LDFLAGS = -module -avoidversion
-librpcsvc_la_SOURCES = msg-nfs3.c xdr-nfs3.c xdr-rpc.c
+librpcsvc_la_SOURCES = msg-nfs3.c xdr-nfs3.c xdr-rpc.c auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c rpc-socket.c
librpcsvc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = xdr-rpc.h msg-nfs3.h xdr-common.h xdr-nfs3.h
+noinst_HEADERS = xdr-rpc.h msg-nfs3.h xdr-common.h xdr-nfs3.h rpc-socket.h rpcsvc.h
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
diff --git a/xlators/nfs/lib/src/auth-null.c b/xlators/nfs/lib/src/auth-null.c
new file mode 100644
index 00000000000..b162db11247
--- /dev/null
+++ b/xlators/nfs/lib/src/auth-null.c
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "list.h"
+#include "dict.h"
+
+
+int
+auth_null_request_init (rpcsvc_request_t *req, void *priv)
+{
+ if (!req)
+ return -1;
+
+ memset (req->cred.authdata, 0, RPCSVC_MAX_AUTH_BYTES);
+ req->cred.datalen = 0;
+
+ memset (req->verf.authdata, 0, RPCSVC_MAX_AUTH_BYTES);
+ req->verf.datalen = 0;
+
+ return 0;
+}
+
+int auth_null_authenticate (rpcsvc_request_t *req, void *priv)
+{
+ /* Always succeed. */
+ return RPCSVC_AUTH_ACCEPT;
+}
+
+rpcsvc_auth_ops_t auth_null_ops = {
+ .conn_init = NULL,
+ .request_init = auth_null_request_init,
+ .authenticate = auth_null_authenticate
+};
+
+rpcsvc_auth_t rpcsvc_auth_null = {
+ .authname = "AUTH_NULL",
+ .authnum = AUTH_NULL,
+ .authops = &auth_null_ops,
+ .authprivate = NULL
+};
+
+
+rpcsvc_auth_t *
+rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_null;
+}
+
diff --git a/xlators/nfs/lib/src/auth-unix.c b/xlators/nfs/lib/src/auth-unix.c
new file mode 100644
index 00000000000..0eaf0686654
--- /dev/null
+++ b/xlators/nfs/lib/src/auth-unix.c
@@ -0,0 +1,91 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "list.h"
+#include "dict.h"
+#include "xdr-rpc.h"
+
+
+int
+auth_unix_request_init (rpcsvc_request_t *req, void *priv)
+{
+ if (!req)
+ return -1;
+ memset (req->verf.authdata, 0, RPCSVC_MAX_AUTH_BYTES);
+ req->verf.datalen = 0;
+ req->verf.flavour = AUTH_NULL;
+
+ return 0;
+}
+
+int auth_unix_authenticate (rpcsvc_request_t *req, void *priv)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ struct authunix_parms aup;
+ char machname[MAX_MACHINE_NAME];
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_auth_unix_cred (req->cred.authdata, req->cred.datalen,
+ &aup, machname, req->auxgids);
+ if (ret == -1) {
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->uid = aup.aup_uid;
+ req->gid = aup.aup_gid;
+ req->auxgidcount = aup.aup_len;
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: machine name: %s, uid: %d"
+ ", gid: %d", machname, req->uid, req->gid);
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ return ret;
+}
+
+rpcsvc_auth_ops_t auth_unix_ops = {
+ .conn_init = NULL,
+ .request_init = auth_unix_request_init,
+ .authenticate = auth_unix_authenticate
+};
+
+rpcsvc_auth_t rpcsvc_auth_unix = {
+ .authname = "AUTH_UNIX",
+ .authnum = AUTH_UNIX,
+ .authops = &auth_unix_ops,
+ .authprivate = NULL
+};
+
+
+rpcsvc_auth_t *
+rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_unix;
+}
+
diff --git a/xlators/nfs/lib/src/rpc-socket.c b/xlators/nfs/lib/src/rpc-socket.c
new file mode 100644
index 00000000000..01f114a8530
--- /dev/null
+++ b/xlators/nfs/lib/src/rpc-socket.c
@@ -0,0 +1,358 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpc-socket.h"
+#include "rpcsvc.h"
+#include "dict.h"
+#include "logging.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+
+static int
+rpcsvc_socket_server_get_local_socket (int addrfam, char *listenhost,
+ uint16_t listenport,
+ struct sockaddr *addr,
+ socklen_t *addr_len)
+{
+ struct addrinfo hints, *res = 0;
+ char service[NI_MAXSERV];
+ int ret = -1;
+
+ memset (service, 0, sizeof (service));
+ sprintf (service, "%d", listenport);
+
+ memset (&hints, 0, sizeof (hints));
+ addr->sa_family = hints.ai_family = addrfam;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
+
+ ret = getaddrinfo(listenhost, service, &hints, &res);
+ if (ret != 0) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR,
+ "getaddrinfo failed for host %s, service %s (%s)",
+ listenhost, service, gai_strerror (ret));
+ ret = -1;
+ goto err;
+ }
+
+ memcpy (addr, res->ai_addr, res->ai_addrlen);
+ *addr_len = res->ai_addrlen;
+
+ freeaddrinfo (res);
+ ret = 0;
+
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_socket_listen (int addrfam, char *listenhost, uint16_t listenport)
+{
+ int sock = -1;
+ struct sockaddr_storage sockaddr;
+ socklen_t sockaddr_len;
+ int flags = 0;
+ int ret = -1;
+ int opt = 1;
+
+ ret = rpcsvc_socket_server_get_local_socket (addrfam, listenhost,
+ listenport,SA (&sockaddr),
+ &sockaddr_len);
+
+ if (ret == -1)
+ return ret;
+
+ sock = socket (SA (&sockaddr)->sa_family, SOCK_STREAM, 0);
+ if (sock == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "socket creation failed"
+ " (%s)", strerror (errno));
+ goto err;
+ }
+
+ flags = fcntl (sock, F_GETFL);
+ if (flags == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot get socket flags"
+ " (%s)", strerror(errno));
+ goto close_err;
+ }
+
+ ret = fcntl (sock, F_SETFL, flags | O_NONBLOCK);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot set socket "
+ "non-blocking (%s)", strerror (errno));
+ goto close_err;
+ }
+
+ ret = setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt));
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "setsockopt() for "
+ "SO_REUSEADDR failed (%s)", strerror (errno));
+ goto close_err;
+ }
+
+ ret = bind (sock, (struct sockaddr *)&sockaddr, sockaddr_len);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "binding socket failed:"
+ " %s", strerror (errno));
+ if (errno == EADDRINUSE)
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "Port is already"
+ " in use");
+ goto close_err;
+ }
+
+ ret = listen (sock, 10);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "could not listen on"
+ " socket (%s)", strerror (errno));
+ goto close_err;
+ }
+
+ return sock;
+
+close_err:
+ close (sock);
+ sock = -1;
+
+err:
+ return sock;
+}
+
+
+int
+rpcsvc_socket_accept (int listenfd)
+{
+ int new_sock = -1;
+ struct sockaddr_storage new_sockaddr = {0, };
+ socklen_t addrlen = sizeof (new_sockaddr);
+ int flags = 0;
+ int ret = -1;
+ int on = 1;
+
+ new_sock = accept (listenfd, SA (&new_sockaddr), &addrlen);
+ if (new_sock == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR,"accept on socket failed");
+ goto err;
+ }
+
+ flags = fcntl (new_sock, F_GETFL);
+ if (flags == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot get socket flags"
+ " (%s)", strerror(errno));
+ goto close_err;
+ }
+
+ ret = fcntl (new_sock, F_SETFL, flags | O_NONBLOCK);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot set socket "
+ "non-blocking (%s)", strerror (errno));
+ goto close_err;
+ }
+
+#ifdef TCP_NODELAY
+ ret = setsockopt(new_sock, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "cannot set no-delay "
+ " socket option");
+ }
+#endif
+
+ return new_sock;
+
+close_err:
+ close (new_sock);
+ new_sock = -1;
+
+err:
+ return new_sock;
+}
+
+ssize_t
+rpcsvc_socket_read (int sockfd, char *readaddr, size_t readsize)
+{
+ ssize_t dataread = 0;
+ ssize_t readlen = -1;
+
+ if (!readaddr)
+ return -1;
+
+ while (readsize > 0) {
+ readlen = read (sockfd, readaddr, readsize);
+ if (readlen == -1) {
+ if (errno != EAGAIN) {
+ dataread = -1;
+ break;
+ } else
+ break;
+ } else if (readlen == 0)
+ break;
+
+ dataread += readlen;
+ readaddr += readlen;
+ readsize -= readlen;
+ }
+
+ return dataread;
+}
+
+
+ssize_t
+rpcsvc_socket_write (int sockfd, char *buffer, size_t size)
+{
+ size_t writelen = -1;
+ ssize_t written = 0;
+
+ if (!buffer)
+ return -1;
+
+ while (size > 0) {
+ writelen = write (sockfd, buffer, size);
+ if (writelen == -1) {
+ if (errno != EAGAIN) {
+ written = -1;
+ break;
+ } else
+ break;
+ } else if (writelen == 0)
+ break;
+
+ written += writelen;
+ size -= writelen;
+ buffer += writelen;
+ }
+
+ return written;
+}
+
+
+int
+rpcsvc_socket_peername (int sockfd, char *hostname, int hostlen)
+{
+ struct sockaddr sa;
+ socklen_t sl = sizeof (sa);
+ int ret = EAI_FAIL;
+
+ if (!hostname)
+ return ret;
+
+ ret = getpeername (sockfd, &sa, &sl);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "Failed to get peer name:"
+ " %s", strerror (errno));
+ ret = EAI_FAIL;
+ goto err;
+ }
+
+ ret = getnameinfo (&sa, sl, hostname, hostlen, NULL, 0, 0);
+ if (ret != 0)
+ goto err;
+
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_socket_peeraddr (int sockfd, char *addrstr, int addrlen,
+ struct sockaddr *returnsa, socklen_t sasize)
+{
+ struct sockaddr sa;
+ int ret = EAI_FAIL;
+
+ if (returnsa)
+ ret = getpeername (sockfd, returnsa, &sasize);
+ else {
+ sasize = sizeof (sa);
+ ret = getpeername (sockfd, &sa, &sasize);
+ }
+
+ if (ret == -1) {
+ gf_log (GF_RPCSVC_SOCK, GF_LOG_ERROR, "Failed to get peer addr:"
+ " %s", strerror (errno));
+ ret = EAI_FAIL;
+ goto err;
+ }
+
+ /* If caller did not specify a string into which the address can be
+ * stored, dont bother getting it.
+ */
+ if (!addrstr) {
+ ret = 0;
+ goto err;
+ }
+
+ if (returnsa)
+ ret = getnameinfo (returnsa, sasize, addrstr, addrlen, NULL, 0,
+ NI_NUMERICHOST);
+ else
+ ret = getnameinfo (&sa, sasize, addrstr, addrlen, NULL, 0,
+ NI_NUMERICHOST);
+
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_socket_block_tx (int sockfd)
+{
+ int ret = -1;
+ int on = 1;
+
+#ifdef TCP_CORK
+ ret = setsockopt(sockfd, IPPROTO_TCP, TCP_CORK, &on, sizeof(on));
+#endif
+
+#ifdef TCP_NOPUSH
+ ret = setsockopt(sockfd, IPPROTO_TCP, TCP_NOPUSH, &on, sizeof(on));
+#endif
+
+ return ret;
+}
+
+
+int
+rpcsvc_socket_unblock_tx (int sockfd)
+{
+ int ret = -1;
+ int off = 0;
+
+#ifdef TCP_CORK
+ ret = setsockopt(sockfd, IPPROTO_TCP, TCP_CORK, &off, sizeof(off));
+#endif
+
+#ifdef TCP_NOPUSH
+ ret = setsockopt(sockfd, IPPROTO_TCP, TCP_NOPUSH, &off, sizeof(off));
+#endif
+ return ret;
+}
+
diff --git a/xlators/nfs/lib/src/rpc-socket.h b/xlators/nfs/lib/src/rpc-socket.h
new file mode 100644
index 00000000000..3a50c97a98d
--- /dev/null
+++ b/xlators/nfs/lib/src/rpc-socket.h
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _RPCSVC_SOCKET_H_
+#define _RPCSVC_SOCKET_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "logging.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <netdb.h>
+
+#define SA(ptr) ((struct sockaddr *)ptr)
+#define GF_RPCSVC_SOCK "rpc-socket"
+extern int
+rpcsvc_socket_listen (int addrfam, char *listenhost, uint16_t listenport);
+
+extern int
+rpcsvc_socket_accept (int listenfd);
+
+extern ssize_t
+rpcsvc_socket_read (int sockfd, char *readaddr, size_t readsize);
+
+extern ssize_t
+rpcsvc_socket_write (int sockfd, char *buffer, size_t size);
+
+extern int
+rpcsvc_socket_peername (int sockfd, char *hostname, int hostlen);
+
+extern int
+rpcsvc_socket_peeraddr (int sockfd, char *addrstr, int addrlen,
+ struct sockaddr *returnsa, socklen_t sasize);
+extern int
+rpcsvc_socket_block_tx (int sockfd);
+
+extern int
+rpcsvc_socket_unblock_tx (int sockfd);
+#endif
diff --git a/xlators/nfs/lib/src/rpcsvc-auth.c b/xlators/nfs/lib/src/rpcsvc-auth.c
new file mode 100644
index 00000000000..38697965bbd
--- /dev/null
+++ b/xlators/nfs/lib/src/rpcsvc-auth.c
@@ -0,0 +1,391 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include "rpcsvc.h"
+#include "logging.h"
+#include "dict.h"
+
+extern rpcsvc_auth_t *
+rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options);
+
+extern rpcsvc_auth_t *
+rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options);
+
+int
+rpcsvc_auth_add_initer (struct list_head *list, char *idfier,
+ rpcsvc_auth_initer_t init)
+{
+ struct rpcsvc_auth_list *new = NULL;
+
+ if ((!list) || (!init) || (!idfier))
+ return -1;
+
+ new = CALLOC (1, sizeof (*new));
+ if (!new) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Memory allocation failed");
+ return -1;
+ }
+
+ new->init = init;
+ strcpy (new->name, idfier);
+ INIT_LIST_HEAD (&new->authlist);
+ list_add_tail (&new->authlist, list);
+ return 0;
+}
+
+
+
+int
+rpcsvc_auth_add_initers (rpcsvc_t *svc)
+{
+ int ret = -1;
+
+ ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-unix",
+ (rpcsvc_auth_initer_t)
+ rpcsvc_auth_unix_init);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_UNIX");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-null",
+ (rpcsvc_auth_initer_t)
+ rpcsvc_auth_null_init);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_NULL");
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return 0;
+}
+
+
+int
+rpcsvc_auth_init_auth (rpcsvc_t *svc, dict_t *options,
+ struct rpcsvc_auth_list *authitem)
+{
+ int ret = -1;
+
+ if ((!svc) || (!options) || (!authitem))
+ return -1;
+
+ if (!authitem->init) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "No init function defined");
+ ret = -1;
+ goto err;
+ }
+
+ authitem->auth = authitem->init (svc, options);
+ if (!authitem->auth) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Registration of auth failed:"
+ " %s", authitem->name);
+ ret = -1;
+ goto err;
+ }
+
+ authitem->enable = 1;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Authentication enabled: %s",
+ authitem->auth->authname);
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_auth_init_auths (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+
+ if (!svc)
+ return -1;
+
+ if (list_empty (&svc->authschemes)) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
+ ret = 0;
+ goto err;
+ }
+
+ /* If auth null and sys are not disabled by the user, we must enable
+ * it by default. This is a globally default rule, the user is still
+ * allowed to disable the two for particular subvolumes.
+ */
+ if (!dict_get (options, "rpc-auth.auth-null"))
+ ret = dict_set_dynstr (options, "rpc-auth.auth-null", "on");
+
+ if (!dict_get (options, "rpc-auth.auth-unix"))
+ ret = dict_set_dynstr (options, "rpc-auth.auth-unix", "on");
+
+ list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
+ ret = rpcsvc_auth_init_auth (svc, options, auth);
+ if (ret == -1)
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+
+}
+
+int
+rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ if ((!svc) || (!options))
+ return -1;
+
+ ret = rpcsvc_auth_add_initers (svc);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
+ goto out;
+ }
+
+ ret = rpcsvc_auth_init_auths (svc, options);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init auth schemes");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+
+rpcsvc_auth_t *
+__rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if (!req)
+ return NULL;
+
+ svc = rpcsvc_request_service (req);
+ if (list_empty (&svc->authschemes)) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
+ ret = 0;
+ goto err;
+ }
+
+ list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
+ if (!auth->enable)
+ continue;
+ if (auth->auth->authnum == req->cred.flavour)
+ goto err;
+
+ }
+
+ auth = NULL;
+err:
+ if (auth)
+ return auth->auth;
+ else
+ return NULL;
+}
+
+rpcsvc_auth_t *
+rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+{
+ rpcsvc_auth_t *auth = NULL;
+
+ auth = __rpcsvc_auth_get_handler (req);
+ if (auth)
+ goto ret;
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "No auth handler: %d",
+ req->cred.flavour);
+
+ /* The requested scheme was not available so fall back the to one
+ * scheme that will always be present.
+ */
+ req->cred.flavour = AUTH_NULL;
+ req->verf.flavour = AUTH_NULL;
+ auth = __rpcsvc_auth_get_handler (req);
+ret:
+ return auth;
+}
+
+
+int
+rpcsvc_auth_request_init (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ rpcsvc_auth_t *auth = NULL;
+
+ if (!req)
+ return -1;
+
+ auth = rpcsvc_auth_get_handler (req);
+ if (!auth)
+ goto err;
+ ret = 0;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
+ if (!auth->authops->request_init)
+ ret = auth->authops->request_init (req, auth->authprivate);
+
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_authenticate (rpcsvc_request_t *req)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ rpcsvc_auth_t *auth = NULL;
+ int minauth = 0;
+
+ if (!req)
+ return ret;
+
+ minauth = rpcsvc_request_prog_minauth (req);
+ if (minauth > rpcsvc_request_cred_flavour (req)) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Auth too weak");
+ rpcsvc_request_set_autherr (req, AUTH_TOOWEAK);
+ goto err;
+ }
+
+ auth = rpcsvc_auth_get_handler (req);
+ if (!auth) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "No auth handler found");
+ goto err;
+ }
+
+ if (auth->authops->authenticate)
+ ret = auth->authops->authenticate (req, auth->authprivate);
+
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
+{
+ int count = 0;
+ int gen = RPCSVC_AUTH_REJECT;
+ int spec = RPCSVC_AUTH_REJECT;
+ int final = RPCSVC_AUTH_REJECT;
+ char *srchstr = NULL;
+ char *valstr = NULL;
+ gf_boolean_t boolval = _gf_false;
+ int ret = 0;
+
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+
+ if ((!svc) || (!autharr) || (!volname))
+ return -1;
+
+ memset (autharr, 0, arrlen * sizeof(int));
+ if (list_empty (&svc->authschemes)) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "No authentication!");
+ goto err;
+ }
+
+ list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
+ if (count >= arrlen)
+ break;
+
+ gen = asprintf (&srchstr, "rpc-auth.%s", auth->name);
+ if (gen == -1) {
+ count = -1;
+ goto err;
+ }
+
+ gen = RPCSVC_AUTH_REJECT;
+ if (dict_get (svc->options, srchstr)) {
+ ret = dict_get_str (svc->options, srchstr, &valstr);
+ if (ret == 0) {
+ ret = gf_string2boolean (valstr, &boolval);
+ if (ret == 0) {
+ if (boolval == _gf_true)
+ gen = RPCSVC_AUTH_ACCEPT;
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
+ "d to read auth val");
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
+ "d to read auth val");
+ }
+
+ FREE (srchstr);
+ spec = asprintf (&srchstr, "rpc-auth.%s.%s", auth->name,
+ volname);
+ if (spec == -1) {
+ count = -1;
+ goto err;
+ }
+
+ spec = RPCSVC_AUTH_DONTCARE;
+ if (dict_get (svc->options, srchstr)) {
+ ret = dict_get_str (svc->options, srchstr, &valstr);
+ if (ret == 0) {
+ ret = gf_string2boolean (valstr, &boolval);
+ if (ret == 0) {
+ if (boolval == _gf_true)
+ spec = RPCSVC_AUTH_ACCEPT;
+ else
+ spec = RPCSVC_AUTH_REJECT;
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
+ "d to read auth val");
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
+ "d to read auth val");
+ }
+
+ FREE (srchstr);
+ final = rpcsvc_combine_gen_spec_volume_checks (gen, spec);
+ if (final == RPCSVC_AUTH_ACCEPT) {
+ autharr[count] = auth->auth->authnum;
+ ++count;
+ }
+ }
+
+err:
+ return count;
+}
+
+
+gid_t *
+rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen)
+{
+ if ((!req) || (!arrlen))
+ return NULL;
+
+ if (req->cred.flavour != AUTH_UNIX)
+ return NULL;
+
+ *arrlen = req->auxgidcount;
+ if (*arrlen == 0)
+ return NULL;
+
+ return &req->auxgids[0];
+}
+
diff --git a/xlators/nfs/lib/src/rpcsvc.c b/xlators/nfs/lib/src/rpcsvc.c
new file mode 100644
index 00000000000..e76ee16087a
--- /dev/null
+++ b/xlators/nfs/lib/src/rpcsvc.c
@@ -0,0 +1,2743 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "rpc-socket.h"
+#include "dict.h"
+#include "logging.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "list.h"
+#include "xdr-rpc.h"
+#include "iobuf.h"
+#include "globals.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <rpc/rpc.h>
+#include <rpc/pmap_clnt.h>
+#include <arpa/inet.h>
+#include <rpc/xdr.h>
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+
+#define rpcsvc_alloc_request(con, request) \
+ do { \
+ request = (rpcsvc_request_t *) mem_get ((con)->rxpool); \
+ memset (request, 0, sizeof (rpcsvc_request_t)); \
+ } while (0) \
+
+/* The generic event handler for every stage */
+void *
+rpcsvc_stage_proc (void *arg)
+{
+ rpcsvc_stage_t *stg = (rpcsvc_stage_t *)arg;
+
+ if (!stg)
+ return NULL;
+
+ event_dispatch (stg->eventpool);
+ return NULL;
+}
+
+
+rpcsvc_stage_t *
+rpcsvc_stage_init (rpcsvc_t *svc)
+{
+ rpcsvc_stage_t *stg = NULL;
+ int ret = -1;
+ size_t stacksize = RPCSVC_THREAD_STACK_SIZE;
+ pthread_attr_t stgattr;
+ unsigned int eventpoolsize = 0;
+
+ if (!svc)
+ return NULL;
+
+ stg = CALLOC (1, sizeof(*stg));
+ if (!stg)
+ return NULL;
+
+ eventpoolsize = svc->memfactor * RPCSVC_EVENTPOOL_SIZE_MULT;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "event pool size: %d", eventpoolsize);
+ stg->eventpool = event_pool_new (eventpoolsize);
+ if (!stg->eventpool)
+ goto free_stg;
+
+ pthread_attr_init (&stgattr);
+ ret = pthread_attr_setstacksize (&stgattr, stacksize);
+ if (ret == EINVAL)
+ gf_log (GF_RPCSVC, GF_LOG_WARNING,
+ "Using default thread stack size");
+
+ ret = pthread_create (&stg->tid, &stgattr, rpcsvc_stage_proc,
+ (void *)stg);
+ if (ret != 0) {
+ ret = -1;
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Stage creation failed");
+ goto free_stg;
+ }
+
+ stg->svc = svc;
+ ret = 0;
+free_stg:
+ if (ret == -1) {
+ FREE (stg);
+ stg = NULL;
+ }
+
+ return stg;
+}
+
+
+int
+rpcsvc_init_options (rpcsvc_t *svc, dict_t *options)
+{
+ svc->memfactor = RPCSVC_DEFAULT_MEMFACTOR;
+ return 0;
+}
+
+
+/* The global RPC service initializer.
+ * Starts up the stages and then waits for RPC program registrations
+ * to come in.
+ */
+rpcsvc_t *
+rpcsvc_init (glusterfs_ctx_t *ctx, dict_t *options)
+{
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if ((!ctx) || (!options))
+ return NULL;
+
+ svc = CALLOC (1, sizeof (*svc));
+ if (!svc)
+ return NULL;
+
+ pthread_mutex_init (&svc->rpclock, NULL);
+ INIT_LIST_HEAD (&svc->stages);
+ INIT_LIST_HEAD (&svc->authschemes);
+
+ ret = rpcsvc_init_options (svc, options);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init options");
+ goto free_svc;
+ }
+
+ ret = rpcsvc_auth_init (svc, options);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init "
+ "authentication");
+ goto free_svc;
+ }
+
+ ret = -1;
+ svc->defaultstage = rpcsvc_stage_init (svc);
+ if (!svc->defaultstage) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,"RPC service init failed.");
+ goto free_svc;
+ }
+ svc->options = options;
+ svc->ctx = ctx;
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "RPC service inited.");
+
+ ret = 0;
+free_svc:
+ if (ret == -1) {
+ FREE (svc);
+ svc = NULL;
+ }
+
+ return svc;
+}
+
+
+/* Once multi-threaded support is complete, we'll be able to round-robin
+ * the various incoming connections over the many available stages. This
+ * function selects one from among all the stages.
+ */
+rpcsvc_stage_t *
+rpcsvc_select_stage (rpcsvc_t *rpcservice)
+{
+ if (!rpcservice)
+ return NULL;
+
+ return rpcservice->defaultstage;
+}
+
+
+int
+rpcsvc_conn_peer_check_search (dict_t *options, char *pattern, char *clstr)
+{
+ int ret = -1;
+ char *addrtok = NULL;
+ char *addrstr = NULL;
+ char *svptr = NULL;
+
+ if ((!options) || (!clstr))
+ return -1;
+
+ if (!dict_get (options, pattern))
+ return -1;
+
+ ret = dict_get_str (options, pattern, &addrstr);
+ if (ret < 0) {
+ ret = -1;
+ goto err;
+ }
+
+ if (!addrstr) {
+ ret = -1;
+ goto err;
+ }
+
+ addrtok = strtok_r (addrstr, ",", &svptr);
+ while (addrtok) {
+
+ ret = fnmatch (addrtok, clstr, FNM_CASEFOLD);
+ if (ret == 0)
+ goto err;
+
+ addrtok = strtok_r (NULL, ",", &svptr);
+ }
+
+ ret = -1;
+err:
+
+ return ret;
+}
+
+
+int
+rpcsvc_conn_peer_check_allow (dict_t *options, char *volname, char *clstr)
+{
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
+ char globalrule[] = "rpc-auth.addr.allow";
+
+ if ((!options) || (!clstr))
+ return ret;
+
+ /* If volname is NULL, then we're searching for the general rule to
+ * determine the current address in clstr is allowed or not for all
+ * subvolumes.
+ */
+ if (volname) {
+ ret = asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_DONTCARE;
+ goto out;
+ }
+ } else
+ srchstr = globalrule;
+
+ ret = rpcsvc_conn_peer_check_search (options, srchstr, clstr);
+ if (volname)
+ FREE (srchstr);
+
+ if (ret == 0)
+ ret = RPCSVC_AUTH_ACCEPT;
+ else
+ ret = RPCSVC_AUTH_DONTCARE;
+out:
+ return ret;
+}
+
+int
+rpcsvc_conn_peer_check_reject (dict_t *options, char *volname, char *clstr)
+{
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
+ char generalrule[] = "rpc-auth.addr.reject";
+
+ if ((!options) || (!clstr))
+ return ret;
+
+ if (volname) {
+ ret = asprintf (&srchstr, "rpc-auth.addr.%s.reject", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto out;
+ }
+ } else
+ srchstr = generalrule;
+
+ ret = rpcsvc_conn_peer_check_search (options, srchstr, clstr);
+ if (volname)
+ FREE (srchstr);
+
+ if (ret == 0)
+ ret = RPCSVC_AUTH_REJECT;
+ else
+ ret = RPCSVC_AUTH_DONTCARE;
+out:
+ return ret;
+}
+
+
+/* This function tests the results of the allow rule and the reject rule to
+ * combine them into a single result that can be used to determine if the
+ * connection should be allowed to proceed.
+ * Heres the test matrix we need to follow in this function.
+ *
+ * A - Allow, the result of the allow test. Never returns R.
+ * R - Reject, result of the reject test. Never returns A.
+ * Both can return D or dont care if no rule was given.
+ *
+ * | @allow | @reject | Result |
+ * | A | R | R |
+ * | D | D | D |
+ * | A | D | A |
+ * | D | R | R |
+ */
+int
+rpcsvc_combine_allow_reject_volume_check (int allow, int reject)
+{
+ int final = RPCSVC_AUTH_REJECT;
+
+ /* If allowed rule allows but reject rule rejects, we stay cautious
+ * and reject. */
+ if ((allow == RPCSVC_AUTH_ACCEPT) && (reject == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+ /* if both are dont care, that is user did not specify for either allow
+ * or reject, we leave it up to the general rule to apply, in the hope
+ * that there is one.
+ */
+ else if ((allow == RPCSVC_AUTH_DONTCARE) &&
+ (reject == RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_DONTCARE;
+ /* If one is dont care, the other one applies. */
+ else if ((allow == RPCSVC_AUTH_ACCEPT) &&
+ (reject == RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((allow == RPCSVC_AUTH_DONTCARE) &&
+ (reject == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+
+ return final;
+}
+
+
+/* Combines the result of the general rule test against, the specific rule
+ * to determine final permission for the client's address.
+ *
+ * | @gen | @spec | Result |
+ * | A | A | A |
+ * | A | R | R |
+ * | A | D | A |
+ * | D | A | A |
+ * | D | R | R |
+ * | D | D | D |
+ * | R | A | A |
+ * | R | D | R |
+ * | R | R | R |
+ */
+int
+rpcsvc_combine_gen_spec_addr_checks (int gen, int spec)
+{
+ int final = RPCSVC_AUTH_REJECT;
+
+ if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+ else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+ else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_DONTCARE;
+ else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_REJECT;
+ else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+
+ return final;
+}
+
+
+
+/* Combines the result of the general rule test against, the specific rule
+ * to determine final test for the connection coming in for a given volume.
+ *
+ * | @gen | @spec | Result |
+ * | A | A | A |
+ * | A | R | R |
+ * | A | D | A |
+ * | D | A | A |
+ * | D | R | R |
+ * | D | D | R |, special case, we intentionally disallow this.
+ * | R | A | A |
+ * | R | D | R |
+ * | R | R | R |
+ */
+int
+rpcsvc_combine_gen_spec_volume_checks (int gen, int spec)
+{
+ int final = RPCSVC_AUTH_REJECT;
+
+ if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+ else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+ /* On no rule, we reject. */
+ else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_REJECT;
+ else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
+ final = RPCSVC_AUTH_ACCEPT;
+ else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
+ final = RPCSVC_AUTH_REJECT;
+ else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
+ final = RPCSVC_AUTH_REJECT;
+
+ return final;
+}
+
+
+int
+rpcsvc_conn_peer_check_name (dict_t *options, char *volname,
+ rpcsvc_conn_t *conn)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ int aret = RPCSVC_AUTH_REJECT;
+ int rjret = RPCSVC_AUTH_REJECT;
+ char clstr[RPCSVC_PEER_STRLEN];
+
+ if (!conn)
+ return ret;
+
+ ret = rpcsvc_conn_peername (conn, clstr, RPCSVC_PEER_STRLEN);
+ if (ret != 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
+ "%s", gai_strerror (ret));
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ aret = rpcsvc_conn_peer_check_allow (options, volname, clstr);
+ rjret = rpcsvc_conn_peer_check_reject (options, volname, clstr);
+
+ ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
+
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_conn_peer_check_addr (dict_t *options, char *volname,rpcsvc_conn_t *conn)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ int aret = RPCSVC_AUTH_DONTCARE;
+ int rjret = RPCSVC_AUTH_REJECT;
+ char clstr[RPCSVC_PEER_STRLEN];
+
+ if (!conn)
+ return ret;
+
+ ret = rpcsvc_conn_peeraddr (conn, clstr, RPCSVC_PEER_STRLEN, NULL, 0);
+ if (ret != 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
+ "%s", gai_strerror (ret));
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ aret = rpcsvc_conn_peer_check_allow (options, volname, clstr);
+ rjret = rpcsvc_conn_peer_check_reject (options, volname, clstr);
+
+ ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_conn_check_volume_specific (dict_t *options, char *volname,
+ rpcsvc_conn_t *conn)
+{
+ int namechk = RPCSVC_AUTH_REJECT;
+ int addrchk = RPCSVC_AUTH_REJECT;
+ gf_boolean_t namelookup = _gf_true;
+ char *namestr = NULL;
+ int ret = 0;
+
+ if ((!options) || (!volname) || (!conn))
+ return RPCSVC_AUTH_REJECT;
+
+ /* Enabled by default */
+ if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
+ ret = dict_get_str (options, "rpc-auth.addr.namelookup"
+ , &namestr);
+ if (ret == 0)
+ ret = gf_string2boolean (namestr, &namelookup);
+ }
+
+ /* We need two separate checks because the rules with addresses in them
+ * can be network addresses which can be general and names can be
+ * specific which will over-ride the network address rules.
+ */
+ if (namelookup)
+ namechk = rpcsvc_conn_peer_check_name (options, volname, conn);
+ addrchk = rpcsvc_conn_peer_check_addr (options, volname, conn);
+
+ if (namelookup)
+ ret = rpcsvc_combine_gen_spec_addr_checks (addrchk, namechk);
+ else
+ ret = addrchk;
+
+ return ret;
+}
+
+
+int
+rpcsvc_conn_check_volume_general (dict_t *options, rpcsvc_conn_t *conn)
+{
+ int addrchk = RPCSVC_AUTH_REJECT;
+ int namechk = RPCSVC_AUTH_REJECT;
+ gf_boolean_t namelookup = _gf_true;
+ char *namestr = NULL;
+ int ret = 0;
+
+ if ((!options) || (!conn))
+ return RPCSVC_AUTH_REJECT;
+
+ /* Enabled by default */
+ if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
+ ret = dict_get_str (options, "rpc-auth.addr.namelookup"
+ , &namestr);
+ if (ret == 0)
+ ret = gf_string2boolean (namestr, &namelookup);
+ }
+
+ /* We need two separate checks because the rules with addresses in them
+ * can be network addresses which can be general and names can be
+ * specific which will over-ride the network address rules.
+ */
+ if (namelookup)
+ namechk = rpcsvc_conn_peer_check_name (options, NULL, conn);
+ addrchk = rpcsvc_conn_peer_check_addr (options, NULL, conn);
+
+ if (namelookup)
+ ret = rpcsvc_combine_gen_spec_addr_checks (addrchk, namechk);
+ else
+ ret = addrchk;
+
+ return ret;
+}
+
+int
+rpcsvc_conn_peer_check (dict_t *options, char *volname, rpcsvc_conn_t *conn)
+{
+ int general_chk = RPCSVC_AUTH_REJECT;
+ int specific_chk = RPCSVC_AUTH_REJECT;
+
+ if ((!options) || (!volname) || (!conn))
+ return RPCSVC_AUTH_REJECT;
+
+ general_chk = rpcsvc_conn_check_volume_general (options, conn);
+ specific_chk = rpcsvc_conn_check_volume_specific (options, volname,
+ conn);
+
+ return rpcsvc_combine_gen_spec_volume_checks (general_chk,specific_chk);
+}
+
+
+char *
+rpcsvc_volume_allowed (dict_t *options, char *volname)
+{
+ char globalrule[] = "rpc-auth.addr.allow";
+ char *srchstr = NULL;
+ char *addrstr = NULL;
+ int ret = -1;
+
+ if ((!options) || (!volname))
+ return NULL;
+
+ ret = asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ goto out;
+ }
+
+ if (!dict_get (options, srchstr)) {
+ FREE (srchstr);
+ srchstr = globalrule;
+ ret = dict_get_str (options, srchstr, &addrstr);
+ } else
+ ret = dict_get_str (options, srchstr, &addrstr);
+
+out:
+ return addrstr;
+}
+
+
+/* Initialize the core of a connection */
+rpcsvc_conn_t *
+rpcsvc_conn_init (rpcsvc_t *svc, rpcsvc_program_t *prog, int sockfd)
+{
+ rpcsvc_conn_t *conn = NULL;
+ int ret = -1;
+ unsigned int poolcount = 0;
+
+ conn = CALLOC (1, sizeof(*conn));
+ if (!conn) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "memory allocation failed");
+ return NULL;
+ }
+
+ conn->sockfd = sockfd;
+ conn->program = (void *)prog;
+ INIT_LIST_HEAD (&conn->txbufs);
+ poolcount = RPCSVC_POOLCOUNT_MULT * svc->memfactor;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "tx pool: %d", poolcount);
+ conn->txpool = mem_pool_new (rpcsvc_txbuf_t, poolcount);
+ if (!conn->txpool) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
+ goto free_conn;
+ }
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "rx pool: %d", poolcount);
+ conn->rxpool = mem_pool_new (rpcsvc_request_t, poolcount);
+ if (!conn->rxpool) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
+ goto free_txp;
+ }
+
+ /* Cannot consider a connection connected unless the user of this
+ * connection decides it is ready to use. It is possible that we have
+ * to free this connection soon after. That free will not happpen
+ * unless the state is disconnected.
+ */
+ conn->connstate = RPCSVC_CONNSTATE_DISCONNECTED;
+ pthread_mutex_init (&conn->connlock, NULL);
+ conn->connref = 0;
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New connection inited: sockfd: %d",
+ sockfd);
+
+ ret = 0;
+free_txp:
+ if (ret == -1)
+ mem_pool_destroy (conn->txpool);
+
+free_conn:
+ if (ret == -1) {
+ FREE (conn);
+ conn = NULL;
+ }
+
+ return conn;
+}
+
+
+void
+rpcsvc_conn_destroy (rpcsvc_conn_t *conn)
+{
+ mem_pool_destroy (conn->txpool);
+ mem_pool_destroy (conn->rxpool);
+
+ if (conn->program->conn_destroy)
+ conn->program->conn_destroy (conn->program->private, conn);
+
+ /* Need to destory record state, txlists etc. */
+ FREE (conn);
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Connection destroyed");
+}
+
+
+int
+__rpcsvc_conn_unref (rpcsvc_conn_t *conn)
+{
+ --conn->connref;
+ return conn->connref;
+}
+
+
+void
+__rpcsvc_conn_deinit (rpcsvc_conn_t *conn)
+{
+ if (!conn)
+ return;
+
+ if ((conn->stage) && (conn->stage->eventpool)) {
+ event_unregister (conn->stage->eventpool, conn->sockfd,
+ conn->eventidx);
+ }
+
+ if (rpcsvc_conn_check_active (conn)) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Connection de-activated:"
+ " sockfd: %d", conn->sockfd);
+ conn->connstate = RPCSVC_CONNSTATE_DISCONNECTED;
+ }
+
+ if (conn->sockfd != -1) {
+ close (conn->sockfd);
+ conn->sockfd = -1;
+ }
+}
+
+
+void
+rpcsvc_conn_deinit (rpcsvc_conn_t *conn)
+{
+ int ref = 0;
+
+ if (!conn)
+ return;
+
+ pthread_mutex_lock (&conn->connlock);
+ {
+ __rpcsvc_conn_deinit (conn);
+ ref = __rpcsvc_conn_unref (conn);
+ }
+ pthread_mutex_unlock (&conn->connlock);
+
+ if (ref == 0)
+ rpcsvc_conn_destroy (conn);
+
+ return;
+}
+
+
+void
+rpcsvc_conn_unref (rpcsvc_conn_t *conn)
+{
+ int ref = 0;
+ if (!conn)
+ return;
+
+ pthread_mutex_lock (&conn->connlock);
+ {
+ ref = __rpcsvc_conn_unref (conn);
+ }
+ pthread_mutex_unlock (&conn->connlock);
+
+ if (ref == 0)
+ rpcsvc_conn_destroy (conn);
+}
+
+
+int
+rpcsvc_conn_active (rpcsvc_conn_t *conn)
+{
+ int status = 0;
+
+ if (!conn)
+ return 0;
+
+ pthread_mutex_lock (&conn->connlock);
+ {
+ status = rpcsvc_conn_check_active (conn);
+ }
+ pthread_mutex_unlock (&conn->connlock);
+
+ return status;
+}
+
+
+
+void
+rpcsvc_conn_ref (rpcsvc_conn_t *conn)
+{
+ if (!conn)
+ return;
+
+ pthread_mutex_lock (&conn->connlock);
+ {
+ ++conn->connref;
+ }
+ pthread_mutex_unlock (&conn->connlock);
+
+ return;
+}
+
+
+void
+rpcsvc_conn_state_init (rpcsvc_conn_t *conn)
+{
+ if (!conn)
+ return;
+
+ ++conn->connref;
+ conn->connstate = RPCSVC_CONNSTATE_CONNECTED;
+}
+
+/* Builds a rpcsvc_conn_t with the aim of listening on it.
+ */
+rpcsvc_conn_t *
+rpcsvc_conn_listen_init (rpcsvc_t *svc, rpcsvc_program_t *newprog)
+{
+ rpcsvc_conn_t *conn = NULL;
+ int sock = -1;
+
+ if (!newprog)
+ return NULL;
+
+ sock = rpcsvc_socket_listen (newprog->progaddrfamily, newprog->proghost,
+ newprog->progport);
+ if (sock == -1)
+ goto err;
+
+ conn = rpcsvc_conn_init (svc, newprog, sock);
+ if (!conn)
+ goto sock_close_err;
+
+ rpcsvc_conn_state_init (conn);
+sock_close_err:
+ if (!conn)
+ close (sock);
+
+err:
+ return conn;
+}
+
+void
+rpcsvc_record_init (rpcsvc_record_state_t *rs, struct iobuf_pool *pool)
+{
+ if (!rs)
+ return;
+
+ rs->state = RPCSVC_READ_FRAGHDR;
+ rs->vecstate = 0;
+ rs->remainingfraghdr = RPCSVC_FRAGHDR_SIZE;
+ rs->remainingfrag = 0;
+ rs->fragsize = 0;
+ rs->recordsize = 0;
+ rs->islastfrag = 0;
+
+ /* If the rs preserves a ref to the iob used by the previous request,
+ * we must unref it here to prevent memory leak.
+ * If program actor wanted to keep that memory around, it should've
+ * refd it on entry into the actor.
+ */
+ if (rs->activeiob)
+ iobuf_unref (rs->activeiob);
+
+ if (rs->vectoriob) {
+ iobuf_unref (rs->vectoriob);
+ rs->vectoriob = NULL;
+ }
+
+ rs->activeiob = iobuf_get (pool);
+ rs->fragcurrent = iobuf_ptr (rs->activeiob);
+
+ memset (rs->fragheader, 0, RPCSVC_FRAGHDR_SIZE);
+ rs->hdrcurrent = &rs->fragheader[0];
+
+}
+
+
+int
+rpcsvc_conn_privport_check (rpcsvc_t *svc, char *volname, rpcsvc_conn_t *conn)
+{
+ struct sockaddr_in sa;
+ int ret = RPCSVC_AUTH_REJECT;
+ socklen_t sasize = sizeof (sa);
+ char *srchstr = NULL;
+ char *valstr = NULL;
+ int globalinsecure = RPCSVC_AUTH_REJECT;
+ int exportinsecure = RPCSVC_AUTH_DONTCARE;
+ uint16_t port = 0;
+ gf_boolean_t insecure = _gf_false;
+
+ if ((!svc) || (!volname) || (!conn))
+ return ret;
+
+ ret = rpcsvc_conn_peeraddr (conn, NULL, 0, (struct sockaddr *)&sa,
+ sasize);
+ if (ret != 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get peer addr: %s",
+ gai_strerror (ret));
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ port = ntohs (sa.sin_port);
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
+ /* If the port is already a privileged one, dont bother with checking
+ * options.
+ */
+ if (port <= 1024) {
+ ret = RPCSVC_AUTH_ACCEPT;
+ goto err;
+ }
+
+ /* Disabled by default */
+ if ((dict_get (svc->options, "rpc-auth.ports.insecure"))) {
+ ret = dict_get_str (svc->options, "rpc-auth.ports.insecure"
+ , &srchstr);
+ if (ret == 0) {
+ ret = gf_string2boolean (srchstr, &insecure);
+ if (ret == 0) {
+ if (insecure == _gf_true)
+ globalinsecure = RPCSVC_AUTH_ACCEPT;
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " read rpc-auth.ports.insecure value");
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " read rpc-auth.ports.insecure value");
+ }
+
+ /* Disabled by default */
+ ret = asprintf (&srchstr, "rpc-auth.ports.%s.insecure", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (dict_get (svc->options, srchstr)) {
+ ret = dict_get_str (svc->options, srchstr, &valstr);
+ if (ret == 0) {
+ ret = gf_string2boolean (srchstr, &insecure);
+ if (ret == 0) {
+ if (insecure == _gf_true)
+ exportinsecure = RPCSVC_AUTH_ACCEPT;
+ else
+ exportinsecure = RPCSVC_AUTH_REJECT;
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " read rpc-auth.ports.insecure value");
+ } else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " read rpc-auth.ports.insecure value");
+ }
+
+ ret = rpcsvc_combine_gen_spec_volume_checks (globalinsecure,
+ exportinsecure);
+ if (ret == RPCSVC_AUTH_ACCEPT)
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
+ else
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port not"
+ " allowed");
+
+err:
+ return ret;
+}
+
+/* Inits a rpcsvc_conn_t after accepting the connection.
+ */
+rpcsvc_conn_t *
+rpcsvc_conn_accept_init (rpcsvc_t *svc, int listenfd,
+ rpcsvc_program_t *destprog)
+{
+ rpcsvc_conn_t *newconn = NULL;
+ int sock = -1;
+ int ret = -1;
+
+ sock = rpcsvc_socket_accept (listenfd);
+ if (sock == -1)
+ goto err;
+
+ newconn = rpcsvc_conn_init (svc, destprog, sock);
+ if (!newconn) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init conn object");
+ ret = -1;
+ goto err;
+ }
+
+ rpcsvc_record_init (&newconn->rstate, svc->ctx->iobuf_pool);
+ rpcsvc_conn_state_init (newconn);
+ if (destprog->conn_init)
+ destprog->conn_init (destprog->private, newconn);
+ ret = 0;
+
+err:
+ if (ret == -1)
+ close (sock);
+
+ return newconn;
+}
+
+
+/* Once the connection has been created, we need to associate it with
+ * a stage so that the selected stage will handle the event on this connection.
+ * This function also allows the caller to decide which handler should
+ * be executed in the context of the stage, and also which specific events
+ * should be handed to the handler when running in this particular stage.
+ */
+int
+rpcsvc_stage_conn_associate (rpcsvc_stage_t *stg, rpcsvc_conn_t *conn,
+ event_handler_t handler, void *data)
+{
+ int ret = -1;
+
+ if ((!stg) || (!conn))
+ return -1;
+
+ conn->stage = stg;
+ conn->eventidx = event_register (stg->eventpool, conn->sockfd, handler,
+ data, 1, 0);
+ if (conn->eventidx == -1)
+ goto err;
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+/* Depending on the state we're in, return the size of the next read request. */
+size_t
+rpcsvc_record_read_size (rpcsvc_record_state_t *rs)
+{
+ size_t toread = -1;
+
+ if (!rs)
+ return -1;
+
+ if (rpcsvc_record_readfraghdr (rs))
+ toread = rs->remainingfraghdr;
+ else if (rpcsvc_record_readfrag (rs))
+ toread = rs->remainingfrag;
+ else
+ toread = RPCSVC_CONN_READ;
+
+ return toread;
+}
+
+
+uint32_t
+rpcsvc_record_extract_fraghdr (char *fraghdr)
+{
+ uint32_t hdr = 0;
+ if (!fraghdr)
+ return 0;
+
+ memcpy ((void *)&hdr, fraghdr, sizeof (hdr));
+
+ hdr = ntohl (hdr);
+ return hdr;
+}
+
+
+ssize_t
+rpcsvc_record_read_complete_fraghdr (rpcsvc_record_state_t *rs,ssize_t dataread)
+{
+ uint32_t remhdr = 0;
+ char *fraghdrstart = NULL;
+ uint32_t fraghdr = 0;
+
+ fraghdrstart = &rs->fragheader[0];
+ remhdr = rs->remainingfraghdr;
+ fraghdr = rpcsvc_record_extract_fraghdr (fraghdrstart);
+ rs->fragsize = RPCSVC_FRAGSIZE (fraghdr);
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Received fragment size: %d",
+ rs->fragsize);
+ if (rpcsvc_record_vectored (rs)) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC header,"
+ " remaining: %d", RPCSVC_BARERPC_MSGSZ);
+ rs->remainingfrag = RPCSVC_BARERPC_MSGSZ;
+ } else {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Regular RPC header,"
+ " remaining: %d", rs->fragsize);
+ rs->remainingfrag = rs->fragsize;
+ }
+
+ rs->state = RPCSVC_READ_FRAG;
+ dataread -= remhdr;
+ rs->remainingfraghdr -= remhdr;
+ rs->islastfrag = RPCSVC_LASTFRAG (fraghdr);
+
+ return dataread;
+}
+
+
+ssize_t
+rpcsvc_record_read_partial_fraghdr (rpcsvc_record_state_t *rs, ssize_t dataread)
+{
+
+ /* In case we got less than even the remaining header size,
+ * we need to consume it all and wait for remaining frag hdr
+ * bytes to come in.
+ */
+ rs->remainingfraghdr -= dataread;
+ rpcsvc_record_update_currenthdr (rs, dataread);
+ dataread = 0;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Fragment header remaining: %d",
+ rs->remainingfraghdr);
+
+ return dataread;
+}
+
+
+ssize_t
+rpcsvc_record_update_fraghdr (rpcsvc_record_state_t *rs, ssize_t dataread)
+{
+ if ((!rs) || (dataread <= 0))
+ return -1;
+
+ /* Why are we even here, we're not supposed to be in the fragment
+ * header processing state.
+ */
+ if (!rpcsvc_record_readfraghdr(rs)) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
+ ": request to update frag header when state is not"
+ "RPCSVC_READ_FRAGHDR");
+ return -1;
+ }
+
+ /* Again, if header has been read then the state member above should've
+ * been different, this is crazy. We should not be here.
+ */
+ if (rs->remainingfraghdr == 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
+ ": request to update frag header when frag header"
+ "remaining is 0.");
+ return -1;
+ }
+
+ /* We've definitely got the full header now and may be even more. */
+ if (dataread >= rs->remainingfraghdr)
+ dataread = rpcsvc_record_read_complete_fraghdr (rs, dataread);
+ else
+ dataread = rpcsvc_record_read_partial_fraghdr (rs, dataread);
+
+ return dataread;
+}
+
+ssize_t
+rpcsvc_record_read_complete_frag (rpcsvc_record_state_t *rs, ssize_t dataread)
+{
+ uint32_t remfrag;
+
+ /* Since the frag is now complete, change the state to the next
+ * one, i.e. to read the header of the next fragment.
+ */
+ remfrag = rs->remainingfrag;
+ rs->state = RPCSVC_READ_FRAGHDR;
+ dataread -= remfrag;
+
+ /* This will be 0 now. */
+ rs->remainingfrag -= remfrag;
+
+ /* Now that the fragment is complete, we must update the
+ * record size. Recall that fragsize was got from the frag
+ * header.
+ */
+ rs->recordsize += rs->fragsize;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Fragment remaining: %d",
+ rs->remainingfrag);
+
+ return dataread;
+}
+
+
+ssize_t
+rpcsvc_record_read_partial_frag (rpcsvc_record_state_t *rs, ssize_t dataread)
+{
+ /* Just take whatever has come through the current network buffer. */
+ rs->remainingfrag -= dataread;
+
+ rpcsvc_record_update_currentfrag (rs, dataread);
+ /* Since we know we're consuming the whole buffer from dataread
+ * simply setting to 0 zero is fine.
+ */
+ dataread = 0;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Fragment remaining: %d",
+ rs->remainingfrag);
+ return dataread;
+}
+
+
+ssize_t
+rpcsvc_record_update_frag (rpcsvc_record_state_t *rs, ssize_t dataread)
+{
+ if ((!rs) || (dataread <= 0))
+ return -1;
+
+ if (!rpcsvc_record_readfrag (rs)) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
+ ": request to update fragment when record state is not"
+ "RPCSVC_READ_FRAG.");
+ return -1;
+ }
+
+ if (rs->remainingfrag == 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "record state inconsistent"
+ ": request to update fragment when there is no fragment"
+ " data remaining to be read.");
+ return -1;
+ }
+
+ /* We've read in more data than the current fragment requires. */
+ if (dataread >= rs->remainingfrag)
+ dataread = rpcsvc_record_read_complete_frag (rs, dataread);
+ else
+ dataread = rpcsvc_record_read_partial_frag (rs, dataread);
+
+ return dataread;
+}
+
+
+/* This needs to change to returning errors, since
+ * we need to return RPC specific error messages when some
+ * of the pointers below are NULL.
+ */
+rpcsvc_actor_t *
+rpcsvc_program_actor (rpcsvc_conn_t *conn, rpcsvc_request_t *req)
+{
+ rpcsvc_program_t *program = NULL;
+ int err = SYSTEM_ERR;
+ rpcsvc_actor_t *actor = NULL;
+
+ if ((!conn) || (!req))
+ goto err;
+
+ program = (rpcsvc_program_t *)conn->program;
+ if (!program)
+ goto err;
+
+ if (req->prognum != program->prognum) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC program not available");
+ err = PROG_UNAVAIL;
+ goto err;
+ }
+
+ if (!program->actors) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC System error");
+ err = SYSTEM_ERR;
+ goto err;
+ }
+
+ if (req->progver != program->progver) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC program version not"
+ " available");
+ err = PROG_MISMATCH;
+ goto err;
+ }
+
+ if ((req->procnum < 0) || (req->procnum >= program->numactors)) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
+ " available");
+ err = PROC_UNAVAIL;
+ goto err;
+ }
+
+ actor = &program->actors[req->procnum];
+ if (!actor->actor) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
+ " available");
+ err = PROC_UNAVAIL;
+ actor = NULL;
+ goto err;
+ }
+
+ err = SUCCESS;
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Actor found: %s - %s",
+ program->progname, actor->procname);
+err:
+ if (req)
+ req->rpc_err = err;
+
+ return actor;
+}
+
+
+rpcsvc_txbuf_t *
+rpcsvc_init_txbuf (rpcsvc_conn_t *conn, struct iovec msg, struct iobuf *iob,
+ struct iobref *iobref, int txflags)
+{
+ rpcsvc_txbuf_t *txbuf = NULL;
+
+ txbuf = (rpcsvc_txbuf_t *) mem_get(conn->txpool);
+ if (!txbuf) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get txbuf");
+ return NULL;
+ }
+
+ memset (txbuf, 0, sizeof (*txbuf));
+ INIT_LIST_HEAD (&txbuf->txlist);
+ txbuf->buf = msg;
+
+ /* If it was required, this iob must've been ref'd already
+ * so I dont have to bother here.
+ */
+ txbuf->iob = iob;
+ txbuf->iobref = iobref;
+ txbuf->offset = 0;
+ txbuf->txbehave = txflags;
+
+ return txbuf;
+}
+
+
+int
+rpcsvc_conn_append_txlist (rpcsvc_conn_t *conn, struct iovec msg,
+ struct iobuf *iob, int txflags)
+{
+ rpcsvc_txbuf_t *txbuf = NULL;
+
+ if ((!conn) || (!msg.iov_base) || (!iob))
+ return -1;
+
+ txbuf = rpcsvc_init_txbuf (conn, msg, iob, NULL, txflags);
+ if (!txbuf)
+ return -1;
+
+ list_add_tail (&txbuf->txlist, &conn->txbufs);
+ return 0;
+}
+
+
+void
+rpcsvc_set_lastfrag (uint32_t *fragsize) {
+ (*fragsize) |= 0x80000000U;
+}
+
+void
+rpcsvc_set_frag_header_size (uint32_t size, char *haddr)
+{
+ size = htonl (size);
+ memcpy (haddr, &size, sizeof (size));
+}
+
+void
+rpcsvc_set_last_frag_header_size (uint32_t size, char *haddr)
+{
+ rpcsvc_set_lastfrag (&size);
+ rpcsvc_set_frag_header_size (size, haddr);
+}
+
+
+/* Given the RPC reply structure and the payload handed by the RPC program,
+ * encode the RPC record header into the buffer pointed by recordstart.
+ */
+struct iovec
+rpcsvc_record_build_header (char *recordstart, size_t rlen,
+ struct rpc_msg reply, size_t payload)
+{
+ struct iovec replyhdr;
+ struct iovec txrecord = {0, 0};
+ size_t fraglen = 0;
+ int ret = -1;
+
+ /* After leaving aside the 4 bytes for the fragment header, lets
+ * encode the RPC reply structure into the buffer given to us.
+ */
+ ret = rpc_reply_to_xdr (&reply,(recordstart + RPCSVC_FRAGHDR_SIZE),
+ rlen, &replyhdr);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to create RPC reply");
+ goto err;
+ }
+
+ fraglen = payload + replyhdr.iov_len;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Reply fraglen %zu, payload: %zu, "
+ "rpc hdr: %ld", fraglen, payload, replyhdr.iov_len);
+
+ /* Since we're not spreading RPC records over mutiple fragments
+ * we just set this fragment as the first and last fragment for this
+ * record.
+ */
+ rpcsvc_set_last_frag_header_size (fraglen, recordstart);
+
+ /* Even though the RPC record starts at recordstart+RPCSVC_FRAGHDR_SIZE
+ * we need to transmit the record with the fragment header, which starts
+ * at recordstart.
+ */
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = RPCSVC_FRAGHDR_SIZE + replyhdr.iov_len;
+
+err:
+ return txrecord;
+}
+
+
+int
+rpcsvc_conn_submit (rpcsvc_conn_t *conn, struct iovec hdr,
+ struct iobuf *hdriob, struct iovec msgvec,
+ struct iobuf *msgiob)
+{
+ int ret = -1;
+
+ if ((!conn) || (!hdr.iov_base) || (!hdriob))
+ return -1;
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx Header: %ld, payload: %ld",
+ hdr.iov_len, msgvec.iov_len);
+ /* Now that we have both the RPC and Program buffers in xdr format
+ * lets hand it to the transmission layer.
+ */
+ pthread_mutex_lock (&conn->connlock);
+ {
+ if (!rpcsvc_conn_check_active (conn)) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Connection inactive");
+ goto unlock_err;
+ }
+
+ ret = rpcsvc_conn_append_txlist (conn, hdr, hdriob,
+ RPCSVC_TXB_FIRST);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to append "
+ "header to transmission list");
+ goto unlock_err;
+ }
+
+ /* It is possible that this RPC reply is an error reply. In that
+ * case we might not have been handed a payload.
+ */
+ ret = 0;
+ if (msgiob)
+ ret = rpcsvc_conn_append_txlist (conn, msgvec, msgiob,
+ RPCSVC_TXB_LAST);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to append"
+ " payload to transmission list");
+ goto unlock_err;
+ }
+ }
+unlock_err:
+ pthread_mutex_unlock (&conn->connlock);
+
+ if (ret == -1)
+ goto err;
+
+ /* Tell event pool, we're interested in poll_out to trigger flush
+ * of our tx buffers.
+ */
+ conn->eventidx = event_select_on (conn->stage->eventpool, conn->sockfd,
+ conn->eventidx, -1, 1);
+ ret = 0;
+err:
+
+ return ret;
+}
+
+
+int
+rpcsvc_fill_reply (rpcsvc_request_t *req, struct rpc_msg *reply)
+{
+ rpcsvc_program_t *prog = NULL;
+ if ((!req) || (!reply))
+ return -1;
+
+ prog = rpcsvc_request_program (req);
+ rpc_fill_empty_reply (reply, req->xid);
+
+ if (req->rpc_stat == MSG_DENIED)
+ rpc_fill_denied_reply (reply, req->rpc_err, req->auth_err);
+ else if (req->rpc_stat == MSG_ACCEPTED)
+ rpc_fill_accepted_reply (reply, req->rpc_err, prog->proglowvers,
+ prog->proghighvers, req->verf.flavour,
+ req->verf.datalen,
+ req->verf.authdata);
+ else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Invalid rpc_stat value");
+
+ return 0;
+}
+
+
+/* Given a request and the reply payload, build a reply and encodes the reply
+ * into a record header. This record header is encoded into the vector pointed
+ * to be recbuf.
+ * msgvec is the buffer that points to the payload of the RPC program.
+ * This buffer can be NULL, if an RPC error reply is being constructed.
+ * The only reason it is needed here is that in case the buffer is provided,
+ * we should account for the length of that buffer in the RPC fragment header.
+ */
+struct iobuf *
+rpcsvc_record_build_record (rpcsvc_request_t *req, size_t payload,
+ struct iovec *recbuf)
+{
+ struct rpc_msg reply;
+ struct iobuf *replyiob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {0, };
+ size_t pagesize = 0;
+ rpcsvc_conn_t *conn = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if ((!req) || (!req->conn) || (!recbuf))
+ return NULL;
+
+ /* First, try to get a pointer into the buffer which the RPC
+ * layer can use.
+ */
+ conn = req->conn;
+ svc = rpcsvc_conn_rpcsvc (conn);
+ replyiob = iobuf_get (svc->ctx->iobuf_pool);
+ pagesize = iobpool_pagesize ((struct iobuf_pool *)svc->ctx->iobuf_pool);
+ if (!replyiob) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get iobuf");
+ goto err_exit;
+ }
+
+ record = iobuf_ptr (replyiob); /* Now we have it. */
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ rpcsvc_fill_reply (req, &reply);
+ recordhdr = rpcsvc_record_build_header (record, pagesize, reply,
+ payload);
+ if (!recordhdr.iov_base) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to build record "
+ " header");
+ iobuf_unref (replyiob);
+ replyiob = NULL;
+ recbuf->iov_base = NULL;
+ goto err_exit;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
+err_exit:
+ return replyiob;
+}
+
+
+/*
+ * The function to submit a program message to the RPC service.
+ * This message is added to the transmission queue of the
+ * conn.
+ *
+ * Program callers are not expected to use the msgvec->iov_base
+ * address for anything else.
+ * Nor are they expected to free it once this function returns.
+ * Once the transmission of the buffer is completed by the RPC service,
+ * the memory area as referenced through @msg will be unrefed.
+ * If a higher layer does not want anything to do with this iobuf
+ * after this function returns, it should call unref on it. For keeping
+ * it around till the transmission is actually complete, rpcsvc also refs it.
+ * *
+ * If this function returns an error by returning -1, the
+ * higher layer programs should assume that a disconnection happened
+ * and should know that the conn memory area as well as the req structure
+ * has been freed internally.
+ *
+ * For now, this function assumes that a submit is always called
+ * to send a new record. Later, if there is a situation where different
+ * buffers for the same record come from different sources, then we'll
+ * need to change this code to account for multiple submit calls adding
+ * the buffers into a single record.
+ */
+
+int
+rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec msgvec,
+ struct iobuf *msg)
+{
+ int ret = -1;
+ struct iobuf *replyiob = NULL;
+ struct iovec recordhdr = {0, };
+ rpcsvc_conn_t *conn = NULL;
+
+ if ((!req) || (!req->conn))
+ return -1;
+
+ conn = req->conn;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx message: %ld", msgvec.iov_len);
+ /* Build the buffer containing the encoded RPC reply. */
+ replyiob = rpcsvc_record_build_record (req, msgvec.iov_len, &recordhdr);
+ if (!replyiob) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,"Reply record creation failed");
+ goto disconnect_exit;
+ }
+
+ /* Must ref the iobuf got from higher layer so that the higher layer
+ * can rest assured that it can unref it and leave the final freeing
+ * of the buffer to us. Note msg can be NULL if an RPC-only message
+ * was being sent. Happens when an RPC error reply is being sent.
+ */
+ if (msg)
+ iobuf_ref (msg);
+ ret = rpcsvc_conn_submit (conn, recordhdr, replyiob, msgvec, msg);
+ mem_put (conn->rxpool, req);
+
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to submit message");
+ iobuf_unref (replyiob);
+ }
+
+disconnect_exit:
+ /* Note that a unref is called everytime a reply is sent. This is in
+ * response to the ref that is performed on the conn when a request is
+ * handed to the RPC program.
+ *
+ * The catch, however, is that if the reply is an rpc error, we must
+ * not unref. This is because the ref only contains
+ * references for the actors to which the request was handed plus one
+ * reference maintained by the RPC layer. By unrefing for a case where
+ * no actor was called, we will be losing the ref held for the RPC
+ * layer.
+ */
+ if ((rpcsvc_request_accepted (req)) &&
+ (rpcsvc_request_accepted_success (req)))
+ rpcsvc_conn_unref (conn);
+
+ return ret;
+}
+
+
+int
+rpcsvc_request_attach_vector (rpcsvc_request_t *req, struct iovec msgvec,
+ struct iobuf *iob, struct iobref *iobref,
+ int finalvector)
+{
+ rpcsvc_txbuf_t *txb = NULL;
+ int txflags = 0;
+
+ if ((!req) || (!msgvec.iov_base))
+ return -1;
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx Vector: %ld", msgvec.iov_len);
+ if (finalvector)
+ txflags |= RPCSVC_TXB_LAST;
+ /* We only let the user decide whether this is the last vector for the
+ * record, since the first vector is always the RPC header.
+ */
+ txb = rpcsvc_init_txbuf (req->conn, msgvec, iob, iobref, txflags);
+ if (!txb) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not init tx buf");
+ return -1;
+ }
+
+ req->payloadsize += msgvec.iov_len;
+ if (iob)
+ iobuf_ref (iob);
+ if (iobref)
+ iobref_ref (iobref);
+ list_add_tail (&txb->txlist, &req->txlist);
+
+ return 0;
+}
+
+
+int
+rpcsvc_submit_vectors (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ struct iobuf *replyiob = NULL;
+ struct iovec recordhdr = {0, };
+ rpcsvc_txbuf_t *rpctxb = NULL;
+
+ if ((!req) || (!req->conn))
+ return -1;
+
+ /* Build the buffer containing the encoded RPC reply. */
+ replyiob = rpcsvc_record_build_record (req, req->payloadsize,
+ &recordhdr);
+ if (!replyiob) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,"Reply record creation failed");
+ goto disconnect_exit;
+ }
+
+ rpctxb = rpcsvc_init_txbuf (req->conn, recordhdr, replyiob, NULL,
+ RPCSVC_TXB_FIRST);
+ if (!rpctxb) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to create tx buf");
+ goto disconnect_exit;
+ }
+
+ pthread_mutex_lock (&req->conn->connlock);
+ {
+ list_splice_init (&req->txlist, &req->conn->txbufs);
+ list_add (&rpctxb->txlist, &req->conn->txbufs);
+ }
+ pthread_mutex_unlock (&req->conn->connlock);
+
+ ret = 0;
+ req->conn->eventidx = event_select_on (req->conn->stage->eventpool,
+ req->conn->sockfd,
+ req->conn->eventidx, -1, 1);
+disconnect_exit:
+ /* Note that a unref is called everytime a reply is sent. This is in
+ * response to the ref that is performed on the conn when a request is
+ * handed to the RPC program.
+ */
+ rpcsvc_conn_unref (req->conn);
+ if (ret == -1)
+ iobuf_unref (replyiob);
+
+ mem_put (req->conn->rxpool, req);
+ return ret;
+}
+
+
+int
+rpcsvc_error_reply (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+
+ if (!req)
+ return -1;
+
+ /* At this point the req should already have been filled with the
+ * appropriate RPC error numbers.
+ */
+ return rpcsvc_submit_generic (req, dummyvec, NULL);
+}
+
+
+rpcsvc_request_t *
+rpcsvc_request_init (rpcsvc_conn_t *conn, struct rpc_msg *callmsg,
+ struct iovec progmsg, rpcsvc_request_t *req)
+{
+ if ((!conn) || (!callmsg)|| (!req))
+ return NULL;
+
+
+ /* We start a RPC request as always denied. */
+ req->rpc_stat = MSG_DENIED;
+ req->xid = rpc_call_xid (callmsg);
+ req->prognum = rpc_call_program (callmsg);
+ req->progver = rpc_call_progver (callmsg);
+ req->procnum = rpc_call_progproc (callmsg);
+ req->conn = conn;
+ req->msg = progmsg;
+ req->recordiob = conn->rstate.activeiob;
+ INIT_LIST_HEAD (&req->txlist);
+ req->payloadsize = 0;
+
+ /* By this time, the data bytes for the auth scheme would have already
+ * been copied into the required sections of the req structure,
+ * we just need to fill in the meta-data about it now.
+ */
+ req->cred.flavour = rpc_call_cred_flavour (callmsg);
+ req->cred.datalen = rpc_call_cred_len (callmsg);
+ req->verf.flavour = rpc_call_verf_flavour (callmsg);
+ req->verf.datalen = rpc_call_verf_len (callmsg);
+
+ /* AUTH */
+ rpcsvc_auth_request_init (req);
+ return req;
+}
+
+
+rpcsvc_request_t *
+rpcsvc_request_create (rpcsvc_conn_t *conn)
+{
+ char *msgbuf = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ rpcsvc_request_t *req = NULL;
+ int ret = -1;
+
+ if (!conn)
+ return NULL;
+
+ /* We need to allocate the request before actually calling
+ * rpcsvc_request_init on the request so that we, can fill the auth
+ * data directly into the request structure from the message iobuf.
+ * This avoids a need to keep a temp buffer into which the auth data
+ * would've been copied otherwise.
+ */
+ rpcsvc_alloc_request (conn, req);
+ if (!req) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to alloc request");
+ goto err;
+ }
+
+ msgbuf = iobuf_ptr (conn->rstate.activeiob);
+ ret = xdr_to_rpc_call (msgbuf, conn->rstate.recordsize, &rpcmsg,
+ &progmsg, req->cred.authdata,req->verf.authdata);
+
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC call decoding failed");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto err;
+ }
+
+ ret = -1;
+ rpcsvc_request_init (conn, &rpcmsg, progmsg, req);
+
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "RPC XID: %lx, Ver: %ld, Program: %ld,"
+ " ProgVers: %ld, Proc: %ld", rpc_call_xid (&rpcmsg),
+ rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
+ rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg));
+
+ if (rpc_call_rpcvers (&rpcmsg) != 2) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC version not supported");
+ rpcsvc_request_seterr (req, RPC_MISMATCH);
+ goto err;
+ }
+
+ ret = rpcsvc_authenticate (req);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ /* No need to set auth_err, that is the responsibility of
+ * the authentication handler since only that know what exact
+ * error happened.
+ */
+ rpcsvc_request_seterr (req, AUTH_ERROR);
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed authentication");
+ ret = -1;
+ goto err;
+ }
+
+
+ /* If the error is not RPC_MISMATCH, we consider the call as accepted
+ * since we are not handling authentication failures for now.
+ */
+ req->rpc_stat = MSG_ACCEPTED;
+ ret = 0;
+err:
+ if (ret == -1) {
+ ret = rpcsvc_error_reply (req);
+ req = NULL;
+ }
+
+ return req;
+}
+
+
+int
+rpcsvc_handle_rpc_call (rpcsvc_conn_t *conn)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = -1;
+
+ if (!conn)
+ return -1;
+
+ req = rpcsvc_request_create (conn);
+ if (!req)
+ goto err;
+
+ if (!rpcsvc_request_accepted (req))
+ goto err_reply;
+
+ actor = rpcsvc_program_actor (conn, req);
+ if (!actor)
+ goto err_reply;
+
+ if ((actor) && (actor->actor)) {
+ rpcsvc_conn_ref (conn);
+ ret = actor->actor (req);
+ }
+
+err_reply:
+ if (ret == RPCSVC_ACTOR_ERROR)
+ ret = rpcsvc_error_reply (req);
+
+ /* No need to propagate error beyond this function since the reply
+ * has now been queued. */
+ ret = 0;
+err:
+ return ret;
+}
+
+#define rpc_call_cred_addr(rs) (iobuf_ptr ((rs)->activeiob) + RPCSVC_BARERPC_MSGSZ - 4)
+
+uint32_t
+rpcsvc_call_credlen (rpcsvc_record_state_t *rs)
+{
+ char *credaddr = NULL;
+ uint32_t credlen_nw = 0;
+ uint32_t credlen_host = 0;
+
+ /* Position to the start of the credential length field. */
+ credaddr = rpc_call_cred_addr (rs);
+ credlen_nw = *(uint32_t *)credaddr;
+ credlen_host = ntohl (credlen_nw);
+
+ return credlen_host;
+}
+
+uint32_t
+rpcsvc_call_verflen (rpcsvc_record_state_t *rs)
+{
+ char *verfaddr = NULL;
+ uint32_t verflen_nw = 0;
+ uint32_t verflen_host = 0;
+ uint32_t credlen = 0;
+
+ /* Position to the start of the verifier length field. */
+ credlen = rpcsvc_call_credlen (rs);
+ verfaddr = (rpc_call_cred_addr (rs) + 4 + credlen);
+ verflen_nw = *(uint32_t *)verfaddr;
+ verflen_host = ntohl (verflen_nw);
+
+ return verflen_host;
+}
+
+
+void
+rpcsvc_update_vectored_verf (rpcsvc_record_state_t *rs)
+{
+ if (!rs)
+ return;
+
+ rs->recordsize += rpcsvc_call_verflen (rs);
+ return;
+}
+
+
+void
+rpcsvc_handle_vectored_prep_rpc_call (rpcsvc_conn_t *conn)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_request_t *req = NULL;
+ rpcsvc_record_state_t *rs = NULL;
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+ ssize_t remfrag = RPCSVC_ACTOR_ERROR;
+ int newbuf = 0;
+
+ if (!conn)
+ return;
+
+ rs = &conn->rstate;
+
+ /* In case one of the steps below fails, we need to make sure that the
+ * remaining frag in the kernel's buffers are read-out so that the
+ * requests that follow can be served.
+ */
+ rs->remainingfrag = rs->fragsize - rs->recordsize;
+ rs->vecstate = RPCSVC_VECTOR_IGNORE;
+ req = rpcsvc_request_create (conn);
+ svc = rpcsvc_conn_rpcsvc (conn);
+ if (!req)
+ goto err;
+
+ if (!rpcsvc_request_accepted (req))
+ goto err_reply;
+
+ actor = rpcsvc_program_actor (conn, req);
+ if (!actor)
+ goto err_reply;
+
+ if (!actor->vector_sizer) {
+ ret = -1;
+ rpcsvc_request_seterr (req, PROC_UNAVAIL);
+ goto err_reply;
+ }
+
+ rpcsvc_conn_ref (conn);
+ ret = actor->vector_sizer (req, &remfrag, &newbuf);
+ rpcsvc_conn_unref (conn);
+
+ if (ret == RPCSVC_ACTOR_ERROR) {
+ ret = -1;
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto err_reply;
+ }
+
+ rs->remainingfrag = remfrag;
+ rs->vecstate = RPCSVC_VECTOR_READPROCHDR;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC proc header remaining:"
+ " %d", rs->remainingfrag);
+ conn->vectoredreq = req;
+
+ /* Store the reference to the current frag pointer. This is where the
+ * proc header will be read into.
+ */
+ req->msg.iov_base = rs->fragcurrent;
+ req->msg.iov_len = rs->remainingfrag;
+ ret = 0;
+
+err_reply:
+ if (ret == -1)
+ ret = rpcsvc_error_reply (req);
+
+ /* No need to propagate error beyond this function since the reply
+ * has now been queued. */
+ ret = 0;
+err:
+ return;
+}
+
+
+void
+rpcsvc_update_vectored_verfsz (rpcsvc_conn_t *conn)
+{
+ rpcsvc_record_state_t *rs = NULL;
+ uint32_t verflen = 0;
+
+ if (!conn)
+ return;
+
+ rs = &conn->rstate;
+
+ verflen = rpcsvc_call_verflen (rs);
+ rs->recordsize += 8;
+ if (verflen > 0) {
+ rs->remainingfrag = verflen;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC verf remaining: "
+ " %d", rs->remainingfrag);
+ rs->vecstate = RPCSVC_VECTOR_READVERF;
+ } else {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC preparing call");
+ rpcsvc_handle_vectored_prep_rpc_call (conn);
+ }
+
+ return;
+}
+
+
+void
+rpcsvc_update_vectored_cred (rpcsvc_record_state_t *rs)
+{
+ uint32_t credlen = 0;
+
+ if (!rs)
+ return;
+
+ credlen = rpcsvc_call_credlen (rs);
+ /* Update remainingfrag to read the 8 bytes needed for
+ * reading verf flavour and verf len.
+ */
+ rs->remainingfrag = (2 * sizeof (uint32_t));
+ rs->vecstate = RPCSVC_VECTOR_READVERFSZ;
+ rs->recordsize += credlen;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC verfsz remaining: %d",
+ rs->remainingfrag);
+
+ return;
+}
+
+void
+rpcsvc_update_vectored_barerpc (rpcsvc_record_state_t *rs)
+{
+ uint32_t credlen = 0;
+
+ if (!rs)
+ return;
+
+ credlen = rpcsvc_call_credlen (rs);
+ rs->recordsize = RPCSVC_BARERPC_MSGSZ;
+ if (credlen == 0) {
+ rs->remainingfrag = 8;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC verfsz remaining"
+ ": %d", rs->remainingfrag);
+ rs->vecstate = RPCSVC_VECTOR_READVERFSZ;
+ } else {
+ rs->remainingfrag = credlen;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC cred remaining: "
+ "%d", rs->remainingfrag);
+ rs->vecstate = RPCSVC_VECTOR_READCRED;
+ }
+
+ return;
+}
+
+
+void
+rpcsvc_handle_vectored_rpc_call (rpcsvc_conn_t *conn)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_request_t *req = NULL;
+ rpcsvc_record_state_t *rs = NULL;
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+ ssize_t remfrag = -1;
+ int newbuf = 0;
+
+ if (!conn)
+ return;
+
+ rs = &conn->rstate;
+
+ req = conn->vectoredreq;
+ svc = rpcsvc_conn_rpcsvc (conn);
+
+ if (!req)
+ goto err;
+
+ actor = rpcsvc_program_actor (conn, req);
+ if (!actor)
+ goto err_reply;
+
+ if (!actor->vector_sizer) {
+ ret = -1;
+ rpcsvc_request_seterr (req, PROC_UNAVAIL);
+ goto err_reply;
+ }
+
+ req->msg.iov_len = (unsigned long)((long)rs->fragcurrent - (long)req->msg.iov_base);
+ rpcsvc_conn_ref (conn);
+ ret = actor->vector_sizer (req, &remfrag, &newbuf);
+ rpcsvc_conn_unref (conn);
+ if (ret == RPCSVC_ACTOR_ERROR) {
+ ret = -1;
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto err_reply;
+ }
+
+ if (newbuf) {
+ rs->vectoriob = iobuf_get (svc->ctx->iobuf_pool);
+ rs->fragcurrent = iobuf_ptr (rs->vectoriob);
+ rs->vecstate = RPCSVC_VECTOR_READVEC;
+ rs->remainingfrag = remfrag;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC buf remaining:"
+ " %d", rs->remainingfrag);
+ } else {
+ rs->remainingfrag = remfrag;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC proc remaining:"
+ " %d", rs->remainingfrag);
+ }
+
+ ret = 0;
+err_reply:
+ if (ret == -1)
+ ret = rpcsvc_error_reply (req);
+
+ /* No need to propagate error beyond this function since the reply
+ * has now been queued. */
+ ret = 0;
+err:
+ return;
+}
+
+
+
+void
+rpcsvc_record_vectored_call_actor (rpcsvc_conn_t *conn)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_request_t *req = NULL;
+ rpcsvc_record_state_t *rs = NULL;
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if (!conn)
+ return;
+
+ rs = &conn->rstate;
+ req = conn->vectoredreq;
+ svc = rpcsvc_conn_rpcsvc (conn);
+
+ if (!req)
+ goto err;
+
+ actor = rpcsvc_program_actor (conn, req);
+ if (!actor)
+ goto err_reply;
+
+ if (actor->vector_actor) {
+ rpcsvc_conn_ref (conn);
+ ret = actor->vector_actor (req, rs->vectoriob);
+ } else {
+ rpcsvc_request_seterr (req, PROC_UNAVAIL);
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "No vectored handler present");
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+err_reply:
+ if (ret == RPCSVC_ACTOR_ERROR)
+ ret = rpcsvc_error_reply (req);
+
+ /* No need to propagate error beyond this function since the reply
+ * has now been queued. */
+ ret = 0;
+err:
+ return;
+}
+
+
+
+ssize_t
+rpcsvc_update_vectored_state (rpcsvc_conn_t *conn)
+{
+ rpcsvc_record_state_t *rs = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if (!conn)
+ return 0;
+
+ /* At this point, we can be confident that the activeiob contains
+ * exactly the first RPCSVC_BARERPC_MSGSZ bytes needed in order to
+ * determine the program and actor. So the next state will be
+ * to read the credentials.
+ *
+ * But first, try to determine how many more bytes do we need from the
+ * network to complete the RPC message including the credentials.
+ */
+
+ rs = &conn->rstate;
+ if (rpcsvc_record_vectored_baremsg (rs))
+ rpcsvc_update_vectored_barerpc (rs);
+ else if (rpcsvc_record_vectored_cred (rs))
+ rpcsvc_update_vectored_cred (rs);
+ else if (rpcsvc_record_vectored_verfsz (rs))
+ rpcsvc_update_vectored_verfsz (conn);
+ else if (rpcsvc_record_vectored_verfread (rs)) {
+ rpcsvc_update_vectored_verf (rs);
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC preparing call");
+ rpcsvc_handle_vectored_prep_rpc_call (conn);
+ } else if (rpcsvc_record_vectored_readprochdr (rs))
+ rpcsvc_handle_vectored_rpc_call (conn);
+ else if (rpcsvc_record_vectored_ignore (rs)) {
+ svc = rpcsvc_conn_rpcsvc (conn);
+ rpcsvc_record_init (rs, svc->ctx->iobuf_pool);
+ } else if (rpcsvc_record_vectored_readvec (rs)) {
+ svc = rpcsvc_conn_rpcsvc (conn);
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored RPC vector read");
+ rpcsvc_record_vectored_call_actor (conn);
+ rpcsvc_record_init (rs, svc->ctx->iobuf_pool);
+ }
+
+ return 0;
+}
+
+
+ssize_t
+rpcsvc_record_read_partial_frag (rpcsvc_record_state_t *rs, ssize_t dataread);
+
+ssize_t
+rpcsvc_update_vectored_msg (rpcsvc_conn_t *conn, ssize_t dataread)
+{
+
+ if (!conn)
+ return dataread;
+
+ /* find out how much of the bare msg is pending and set that up to be
+ * read into the updated fragcurrent along with the updated size into
+ * remainingfrag.
+ */
+
+
+ /* Incidently, the logic needed here is similar to a regular partial
+ * fragment read since we've already set the remainingfrag member in
+ * rstate to be RPCSVC_BARERPC_MSGSZ for the purpose of a vectored
+ * fragment.
+ */
+ return rpcsvc_record_read_partial_frag (&conn->rstate, dataread);
+}
+
+/* FIX: As a first version of vectored reading, I am assuming dataread will
+ * always be equal to RPCSVC_BARERPC_MSGSZ for the sake of simplicity on the
+ * belief that we're never actually reading more bytes than needed in each
+ * poll_in.
+ */
+ssize_t
+rpcsvc_handle_vectored_frag (rpcsvc_conn_t *conn, ssize_t dataread)
+{
+ if (!conn)
+ return dataread;
+
+ /* At this point we can be confident that only the frag size has been
+ * read from the network. Now it is up to us to have the remaining RPC
+ * fields given to us here.
+ */
+
+ /* Since the poll_in handler uses the remainingfrag field to determine
+ * how much to read from the network, we'll hack this scheme to tell
+ * the poll_in to read at most RPCSVC_BARERPC_MSGSZ bytes. This is done
+ * to, as a first step, identify which (program, actor) we need to call.
+ */
+
+ dataread = rpcsvc_update_vectored_msg (conn, dataread);
+
+ if (conn->rstate.remainingfrag == 0) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored frag complete");
+ dataread = rpcsvc_update_vectored_state (conn);
+ }
+
+ return dataread;
+}
+
+
+int
+rpcsvc_record_update_state (rpcsvc_conn_t *conn, ssize_t dataread)
+{
+ rpcsvc_record_state_t *rs = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if (!conn)
+ return -1;
+
+ rs = &conn->rstate;
+ /* At entry into this function, fragcurrent will be pointing to the\
+ * start of the area into which dataread number of bytes were read.
+ */
+
+ if (rpcsvc_record_readfraghdr(rs))
+ dataread = rpcsvc_record_update_fraghdr (rs, dataread);
+
+ if (rpcsvc_record_readfrag(rs)) {
+ /* The minimum needed for triggering the vectored handler is
+ * the frag size field. The fragsize member remains set to this
+ * size till this request is completely extracted from the
+ * network. Once all the data has been read from the network,
+ * the request structure would've been created. The point being
+ * that even if it takes multiple calls to network IO for
+ * getting the vectored fragment, we can continue to use this
+ * condition as the flag to tell us that this is a vectored
+ * fragment.
+ */
+ if ((dataread > 0) && (rpcsvc_record_vectored (rs))) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Vectored frag");
+ dataread = rpcsvc_handle_vectored_frag (conn, dataread);
+ } else if (dataread > 0) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Regular frag");
+ dataread = rpcsvc_record_update_frag (rs, dataread);
+ }
+ }
+
+ /* This should not happen. We are never reading more than the current
+ * fragment needs. Something is seriously wrong.
+ */
+ if (dataread > 0) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Data Left: %ld", dataread);
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Unwanted data read from "
+ " connection.");
+ }
+
+ /* If we're now supposed to wait for a new fragment header and if the
+ * fragment that we just completed in the previous call to
+ * rpcsvc_record_update_frag was the last fragment for the current
+ * RPC record, then, it is time to perform the translation from
+ * XDR formatted buffer in activeiob followed by the upcall to the
+ * protocol actor.
+ */
+ if ((rpcsvc_record_readfraghdr(rs)) && (rs->islastfrag)) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Full Record Received.");
+ rpcsvc_handle_rpc_call (conn);
+ svc = rpcsvc_conn_rpcsvc (conn);
+ rpcsvc_record_init (rs, svc->ctx->iobuf_pool);
+ }
+
+ return 0;
+}
+
+
+char *
+rpcsvc_record_read_addr (rpcsvc_record_state_t *rs)
+{
+
+ if (rpcsvc_record_readfraghdr (rs))
+ return rpcsvc_record_currenthdr_addr (rs);
+ else if (rpcsvc_record_readfrag (rs))
+ return rpcsvc_record_currentfrag_addr (rs);
+
+ return NULL;
+}
+
+
+int
+rpcsvc_conn_data_poll_in (rpcsvc_conn_t *conn)
+{
+ ssize_t dataread = -1;
+ size_t readsize = 0;
+ char *readaddr = NULL;
+ int ret = -1;
+
+ readaddr = rpcsvc_record_read_addr (&conn->rstate);
+ if (!readaddr)
+ goto err;
+
+ readsize = rpcsvc_record_read_size (&conn->rstate);
+ if (readsize == -1)
+ goto err;
+
+ dataread = rpcsvc_socket_read (conn->sockfd, readaddr, readsize);
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "conn: 0x%lx, readsize: %ld, dataread: %ld",
+ (long)conn, readsize, dataread);
+
+ if (dataread > 0)
+ ret = rpcsvc_record_update_state (conn, dataread);
+
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_conn_data_poll_err (rpcsvc_conn_t *conn)
+{
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Received error event");
+ rpcsvc_conn_deinit (conn);
+ return 0;
+}
+
+
+int
+__rpcsvc_conn_data_poll_out (rpcsvc_conn_t *conn)
+{
+ rpcsvc_txbuf_t *txbuf = NULL;
+ rpcsvc_txbuf_t *tmp = NULL;
+ ssize_t written = -1;
+ char *writeaddr = NULL;
+ size_t writesize = -1;
+
+ if (!conn)
+ return -1;
+
+ /* Attempt transmission of each of the pending buffers */
+ list_for_each_entry_safe (txbuf, tmp, &conn->txbufs, txlist) {
+tx_remaining:
+ writeaddr = (char *)(txbuf->buf.iov_base + txbuf->offset);
+ writesize = (txbuf->buf.iov_len - txbuf->offset);
+
+ if (txbuf->txbehave & RPCSVC_TXB_FIRST) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "First Tx Buf");
+ rpcsvc_socket_block_tx (conn->sockfd);
+ }
+
+ written = rpcsvc_socket_write (conn->sockfd, writeaddr,
+ writesize);
+ if (txbuf->txbehave & RPCSVC_TXB_LAST) {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Last Tx Buf");
+ rpcsvc_socket_unblock_tx (conn->sockfd);
+ }
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "conn: 0x%lx, Tx request: %ld,"
+ " Tx sent: %ld", (long)conn, writesize, written);
+
+ /* There was an error transmitting this buffer */
+ if (written == -1)
+ break;
+
+ if (written >= 0)
+ txbuf->offset += written;
+
+ /* If the current buffer has been completely transmitted,
+ * delete it from the list and move on to the next buffer.
+ */
+ if (txbuf->offset == txbuf->buf.iov_len) {
+ /* It doesnt matter who ref'ed this iobuf, rpcsvc for
+ * its own header or a RPC program.
+ */
+ if (txbuf->iob)
+ iobuf_unref (txbuf->iob);
+ if (txbuf->iobref)
+ iobref_unref (txbuf->iobref);
+
+ list_del (&txbuf->txlist);
+ mem_put (conn->txpool, txbuf);
+ } else
+ /* If the current buffer is incompletely tx'd, do not
+ * go to the head of the loop, since that moves us to
+ * the next buffer.
+ */
+ goto tx_remaining;
+ }
+
+ /* If we've broken out of the loop above then we must unblock
+ * the transmission now.
+ */
+ rpcsvc_socket_unblock_tx (conn->sockfd);
+ if (list_empty (&conn->txbufs))
+ conn->eventidx = event_select_on (conn->stage->eventpool,
+ conn->sockfd, conn->eventidx,
+ -1, 0);
+
+ return 0;
+}
+
+
+int
+rpcsvc_conn_data_poll_out (rpcsvc_conn_t *conn)
+{
+ if (!conn)
+ return -1;
+
+
+ pthread_mutex_lock (&conn->connlock);
+ {
+ __rpcsvc_conn_data_poll_out (conn);
+ }
+ pthread_mutex_unlock (&conn->connlock);
+
+ return 0;
+}
+
+
+int
+rpcsvc_conn_data_handler (int fd, int idx, void *data, int poll_in, int poll_out
+ , int poll_err)
+{
+ rpcsvc_conn_t *conn = NULL;
+ int ret = 0;
+
+ if (!data)
+ return 0;
+
+ conn = (rpcsvc_conn_t *)data;
+
+ if (poll_out)
+ ret = rpcsvc_conn_data_poll_out (conn);
+
+ if (poll_err) {
+ ret = rpcsvc_conn_data_poll_err (conn);
+ return 0;
+ }
+
+ if (poll_in) {
+ ret = 0;
+ ret = rpcsvc_conn_data_poll_in (conn);
+ }
+
+ if (ret == -1)
+ rpcsvc_conn_data_poll_err (conn);
+
+ return 0;
+}
+
+
+int
+rpcsvc_conn_listening_handler (int fd, int idx, void *data, int poll_in,
+ int poll_out, int poll_err)
+{
+ rpcsvc_conn_t *newconn = NULL;
+ rpcsvc_stage_t *selectedstage = NULL;
+ int ret = -1;
+ rpcsvc_conn_t *conn = NULL;
+ rpcsvc_program_t *prog = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if (!poll_in)
+ return 0;
+
+ conn = (rpcsvc_conn_t *)data;
+ prog = (rpcsvc_program_t *)conn->program;
+ svc = rpcsvc_conn_rpcsvc (conn);
+ newconn = rpcsvc_conn_accept_init (svc, fd, prog);
+ if (!newconn) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "failed to accept connection");
+ goto err;
+ }
+
+ selectedstage = rpcsvc_select_stage (svc);
+ if (!selectedstage)
+ goto close_err;
+
+ /* Now that we've accepted the connection, we need to associate
+ * its events to a stage.
+ */
+ ret = rpcsvc_stage_conn_associate (selectedstage, newconn,
+ rpcsvc_conn_data_handler, newconn);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "could not associated stage "
+ " with new connection");
+ goto close_err;
+ }
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New Connection: Program %s, Num: %d,"
+ " Ver: %d, Port: %d", prog->progname, prog->prognum,
+ prog->progver, prog->progport);
+ ret = 0;
+close_err:
+ if (ret == -1)
+ rpcsvc_conn_unref (newconn);
+
+err:
+ return ret;
+}
+
+
+/* Register the program with the local portmapper service. */
+int
+rpcsvc_program_register_portmap (rpcsvc_program_t *newprog)
+{
+ if (!newprog)
+ return -1;
+
+ if (!(pmap_set(newprog->prognum, newprog->progver, IPPROTO_TCP,
+ newprog->progport))) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not register with"
+ " portmap");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+rpcsvc_program_unregister_portmap (rpcsvc_program_t *prog)
+{
+ if (!prog)
+ return -1;
+
+ if (!(pmap_unset(prog->prognum, prog->progver))) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not unregister with"
+ " portmap");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+rpcsvc_stage_program_register (rpcsvc_stage_t *stg, rpcsvc_program_t *newprog)
+{
+ rpcsvc_conn_t *newconn = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if ((!stg) || (!newprog))
+ return -1;
+
+ svc = rpcsvc_stage_service (stg);
+ /* Create a listening socket */
+ newconn = rpcsvc_conn_listen_init (svc, newprog);
+ if (!newconn) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "could not create listening"
+ " connection");
+ return -1;
+ }
+
+ if ((rpcsvc_stage_conn_associate (stg, newconn,
+ rpcsvc_conn_listening_handler,
+ newconn)) == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,"could not associate stage with"
+ " listening connection");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t program)
+{
+ rpcsvc_program_t *newprog = NULL;
+ rpcsvc_stage_t *selectedstage = NULL;
+ int ret = -1;
+
+ if (!svc)
+ return -1;
+
+ newprog = CALLOC (1, sizeof(*newprog));
+ if (!newprog)
+ return -1;
+
+ if (!program.actors)
+ goto free_prog;
+
+ memcpy (newprog, &program, sizeof (program));
+ selectedstage = rpcsvc_select_stage (svc);
+
+ ret = rpcsvc_stage_program_register (selectedstage, newprog);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "stage registration of program"
+ " failed");
+ goto free_prog;
+ }
+
+ ret = rpcsvc_program_register_portmap (newprog);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "portmap registration of"
+ " program failed");
+ goto free_prog;
+ }
+
+ ret = 0;
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New program registered: %s, Num: %d,"
+ " Ver: %d, Port: %d", newprog->progname, newprog->prognum,
+ newprog->progver, newprog->progport);
+
+free_prog:
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program registration failed:"
+ " %s, Num: %d, Ver: %d, Port: %d", newprog->progname,
+ newprog->prognum, newprog->progver, newprog->progport);
+ FREE (newprog);
+ }
+
+ return ret;
+}
+
+/* The only difference between the generic submit and this one is that the
+ * generic submit is also used for submitting RPC error replies in where there
+ * are no payloads so the msgvec and msgbuf can be NULL.
+ * Since RPC programs should be using this function along with their payloads
+ * we must perform NULL checks before calling the generic submit.
+ */
+int
+rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec msgvec,
+ struct iobuf *msg)
+{
+ if ((!req) || (!req->conn) || (!msg) || (!msgvec.iov_base))
+ return -1;
+
+ return rpcsvc_submit_generic (req, msgvec, msg);
+}
+
+
+int
+rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t prog)
+{
+ int ret = -1;
+
+ if (!svc)
+ return -1;
+
+ /* TODO: De-init the listening connection for this program. */
+ ret = rpcsvc_program_unregister_portmap (&prog);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "portmap unregistration of"
+ " program failed");
+ goto err;
+ }
+
+ ret = 0;
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Program unregistered: %s, Num: %d,"
+ " Ver: %d, Port: %d", prog.progname, prog.prognum,
+ prog.progver, prog.progport);
+
+err:
+ if (ret == -1)
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program unregistration failed"
+ ": %s, Num: %d, Ver: %d, Port: %d", prog.progname,
+ prog.prognum, prog.progver, prog.progport);
+
+ return ret;
+}
+
+
+int
+rpcsvc_conn_peername (rpcsvc_conn_t *conn, char *hostname, int hostlen)
+{
+ if (!conn)
+ return -1;
+
+ return rpcsvc_socket_peername (conn->sockfd, hostname, hostlen);
+}
+
+
+int
+rpcsvc_conn_peeraddr (rpcsvc_conn_t *conn, char *addrstr, int addrlen,
+ struct sockaddr *sa, socklen_t sasize)
+{
+ if (!conn)
+ return -1;
+
+ return rpcsvc_socket_peeraddr (conn->sockfd, addrstr, addrlen, sa,
+ sasize);
+}
+
diff --git a/xlators/nfs/lib/src/rpcsvc.h b/xlators/nfs/lib/src/rpcsvc.h
new file mode 100644
index 00000000000..2746288f82b
--- /dev/null
+++ b/xlators/nfs/lib/src/rpcsvc.h
@@ -0,0 +1,715 @@
+/*
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _RPCSVC_H
+#define _RPCSVC_H
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "event.h"
+#include "transport.h"
+#include "logging.h"
+#include "dict.h"
+#include "mem-pool.h"
+#include "list.h"
+#include "iobuf.h"
+#include "xdr-rpc.h"
+#include "glusterfs.h"
+
+#include <pthread.h>
+#include <sys/uio.h>
+
+#define GF_RPCSVC "rpc-service"
+#define RPCSVC_THREAD_STACK_SIZE ((size_t)(1024 * GF_UNIT_KB))
+
+#define RPCSVC_DEFAULT_MEMFACTOR 15
+#define RPCSVC_EVENTPOOL_SIZE_MULT 1024
+#define RPCSVC_POOLCOUNT_MULT 35
+#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
+#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
+
+/* Defines for RPC record and fragment assembly */
+
+#define RPCSVC_FRAGHDR_SIZE 4 /* 4-byte RPC fragment header size */
+
+/* Given the 4-byte fragment header, returns non-zero if this fragment
+ * is the last fragment for the RPC record being assemebled.
+ * RPC Record marking standard defines a 32 bit value as the fragment
+ * header with the MSB signifying whether the fragment is the last
+ * fragment for the record being asembled.
+ */
+#define RPCSVC_LASTFRAG(fraghdr) ((uint32_t)(fraghdr & 0x80000000U))
+
+/* Given the 4-byte fragment header, extracts the bits that contain
+ * the fragment size.
+ */
+#define RPCSVC_FRAGSIZE(fraghdr) ((uint32_t)(fraghdr & 0x7fffffffU))
+
+/* RPC Record States */
+#define RPCSVC_READ_FRAGHDR 1
+#define RPCSVC_READ_FRAG 2
+/* The size in bytes, if crossed by a fragment will be handed over to the
+ * vectored actor so that it can allocate its buffers the way it wants.
+ * In our RPC layer, we assume that vectored RPC requests/records are never
+ * spread over multiple RPC fragments since that prevents us from determining
+ * whether the record should be handled in RPC layer completely or handed to
+ * the vectored handler.
+ */
+#define RPCSVC_VECTORED_FRAGSZ 4096
+#define RPCSVC_VECTOR_READCRED 1003
+#define RPCSVC_VECTOR_READVERFSZ 1004
+#define RPCSVC_VECTOR_READVERF 1005
+#define RPCSVC_VECTOR_IGNORE 1006
+#define RPCSVC_VECTOR_READVEC 1007
+#define RPCSVC_VECTOR_READPROCHDR 1008
+
+#define rpcsvc_record_vectored_baremsg(rs) (((rs)->state == RPCSVC_READ_FRAG) && (rs)->vecstate == 0)
+#define rpcsvc_record_vectored_cred(rs) ((rs)->vecstate == RPCSVC_VECTOR_READCRED)
+#define rpcsvc_record_vectored_verfsz(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERFSZ)
+#define rpcsvc_record_vectored_verfread(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERF)
+#define rpcsvc_record_vectored_ignore(rs) ((rs)->vecstate == RPCSVC_VECTOR_IGNORE)
+#define rpcsvc_record_vectored_readvec(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVEC)
+#define rpcsvc_record_vectored_readprochdr(rs) ((rs)->vecstate == RPCSVC_VECTOR_READPROCHDR)
+#define rpcsvc_record_vectored(rs) ((rs)->fragsize > RPCSVC_VECTORED_FRAGSZ)
+/* Includes bytes up to and including the credential length field. The credlen
+ * will be followed by @credlen bytes of credential data which will have to be
+ * read separately by the vectored reader. After the credentials comes the
+ * verifier which will also have to be read separately including the 8 bytes of
+ * verf flavour and verflen.
+ */
+#define RPCSVC_BARERPC_MSGSZ 32
+#define rpcsvc_record_readfraghdr(rs) ((rs)->state == RPCSVC_READ_FRAGHDR)
+#define rpcsvc_record_readfrag(rs) ((rs)->state == RPCSVC_READ_FRAG)
+
+#define rpcsvc_conn_rpcsvc(conn) ((conn)->stage->svc)
+#define RPCSVC_LOWVERS 2
+#define RPCSVC_HIGHVERS 2
+
+typedef struct rpc_svc_program rpcsvc_program_t;
+/* A Stage is the event handler thread together with
+ * the connections being served by this thread.
+ * It is called a stage because all the actors, i.e, protocol actors,
+ * defined by higher level users of the RPC layer, are executed here.
+ */
+typedef struct rpc_svc_stage_context {
+ pthread_t tid;
+ struct event_pool *eventpool; /* Per-stage event-pool */
+ void *svc; /* Ref to the rpcsvc_t */
+} rpcsvc_stage_t;
+
+
+/* RPC Records and Fragments assembly state.
+ * This is per-connection state that is used to determine
+ * how much data has come in, how much more needs to be read
+ * and where it needs to be read.
+ *
+ * All this state is then used to re-assemble network buffers into
+ * RPC fragments, which are then re-assembled into RPC records.
+ *
+ * See RFC 1831: "RPC: Remote Procedure Call Protocol Specification Version 2",
+ * particularly the section on Record Marking Standard.
+ */
+typedef struct rpcsvc_record_state {
+
+ /* Pending messages storage
+ * This memory area is currently being used to assemble
+ * the latest RPC record.
+ *
+ * Note that this buffer contains the data other than the
+ * fragment headers received from the network. This is so that we can
+ * directly pass this buffer to higher layers without requiring to
+ * perform memory copies and marshalling of data.
+ */
+ struct iobuf *activeiob;
+
+ struct iobuf *vectoriob;
+ /* The pointer into activeiob memory, into which will go the
+ * contents from the next read from the network.
+ */
+ char *fragcurrent;
+
+ /* Size of the currently incomplete RPC fragment.
+ * This is filled in when the fragment header comes in.
+ * Even though only the 31 least significant bits are used from the
+ * fragment header, we use a 32 bit variable to store the size.
+ */
+ uint32_t fragsize;
+
+ /* The fragment header is always read in here so that
+ * the RPC messages contained in a RPC records can be processed
+ * separately without copying them out of the activeiob above.
+ */
+ char fragheader[RPCSVC_FRAGHDR_SIZE];
+ char *hdrcurrent;
+
+ /* Bytes remaining to come in for the current fragment. */
+ uint32_t remainingfrag;
+
+ /* It is possible for the frag header to be split over separate
+ * read calls, so we need to keep track of how much is left.
+ */
+ uint32_t remainingfraghdr;
+
+ /* Record size, the total size of the RPC record, i.e. the total
+ * of all fragment sizes received till now. Does not include the size
+ * of a partial fragment which is continuing to be assembled right now.
+ */
+ int recordsize;
+
+ /* Current state of the record */
+ int state;
+
+ /* Current state of the vectored reading process. */
+ int vecstate;
+
+ /* Set to non-zero when the currently partial or complete fragment is
+ * the last fragment being received for the current RPC record.
+ */
+ uint32_t islastfrag;
+
+} rpcsvc_record_state_t;
+
+
+#define RPCSVC_CONNSTATE_CONNECTED 1
+#define RPCSVC_CONNSTATE_DISCONNECTED 2
+
+#define rpcsvc_conn_check_active(conn) ((conn)->connstate==RPCSVC_CONNSTATE_CONNECTED)
+
+typedef struct rpcsvc_request rpcsvc_request_t;
+/* Contains the state for each connection that is used for transmitting and
+ * receiving RPC messages.
+ *
+ * There is also an eventidx because each connection's fd is added to the event
+ * pool of the stage to which a connection belongs.
+ * Anything that can be accessed by a RPC program must be synced through
+ * connlock.
+ */
+typedef struct rpc_conn_state {
+
+ /* Transport or connection state */
+
+ /* Once we start working on RDMA support, this TCP specific state will
+ * have to be abstracted away.
+ */
+ int sockfd;
+ int eventidx;
+ int windowsize;
+
+ /* Reference to the stage which is handling this
+ * connection.
+ */
+ rpcsvc_stage_t *stage;
+
+ /* RPC Records and Fragments assembly state.
+ * All incoming data is staged here before being
+ * called a full RPC message.
+ */
+ rpcsvc_record_state_t rstate;
+
+ /* It is possible that a client disconnects while
+ * the higher layer RPC service is busy in a call.
+ * In this case, we cannot just free the conn
+ * structure, since the higher layer service could
+ * still have a reference to it.
+ * The refcount avoids freeing until all references
+ * have been given up, although the connection is clos()ed at the first
+ * call to unref.
+ */
+ int connref;
+ pthread_mutex_t connlock;
+ int connstate;
+
+ /* The program that is listening for requests on this connection. */
+ rpcsvc_program_t *program;
+
+ /* List of buffers awaiting transmission */
+ /* Accesses to txbufs between multiple threads calling
+ * rpcsvc_submit is synced through connlock. Prefer spinlock over
+ * mutex because this is a low overhead op that needs simple
+ * appending to the tx list.
+ */
+ struct list_head txbufs;
+
+ /* Mem pool for the txbufs above. */
+ struct mem_pool *txpool;
+
+ /* Memory pool for rpcsvc_request_t */
+ struct mem_pool *rxpool;
+
+ /* The request which hasnt yet been handed to the RPC program because
+ * this request is being treated as a vector request and so needs some
+ * more data to be got from the network.
+ */
+ rpcsvc_request_t *vectoredreq;
+} rpcsvc_conn_t;
+
+
+#define RPCSVC_MAX_AUTH_BYTES 400
+typedef struct rpcsvc_auth_data {
+ int flavour;
+ int datalen;
+ char authdata[RPCSVC_MAX_AUTH_BYTES];
+} rpcsvc_auth_data_t;
+
+#define rpcsvc_auth_flavour(au) ((au).flavour)
+
+/* The container for the RPC call handed up to an actor.
+ * Dynamically allocated. Lives till the call reply is completely
+ * transmitted.
+ * */
+struct rpcsvc_request {
+ /* Connection over which this request came. */
+ rpcsvc_conn_t *conn;
+
+ /* The identifier for the call from client.
+ * Needed to pair the reply with the call.
+ */
+ uint32_t xid;
+
+ int prognum;
+
+ int progver;
+
+ int procnum;
+ /* Uid and gid filled by the rpc-auth module during the authentication
+ * phase.
+ */
+ uid_t uid;
+ gid_t gid;
+
+ /* Might want to move this to AUTH_UNIX specifix state since this array
+ * is not available for every authenticatino scheme.
+ */
+ gid_t auxgids[NGRPS];
+ int auxgidcount;
+
+
+ /* The RPC message payload, contains the data required
+ * by the program actors. This is the buffer that will need to
+ * be de-xdred by the actor.
+ */
+ struct iovec msg;
+
+ /* The full message buffer allocated to store the RPC headers.
+ * This buffer is ref'd when allocated why RPC svc and unref'd after
+ * the buffer is handed to the actor. That means if the actor or any
+ * higher layer wants to keep this buffer around, they too must ref it
+ * right after entering the program actor.
+ */
+ struct iobuf *recordiob;
+
+ /* Status of the RPC call, whether it was accepted or denied. */
+ int rpc_stat;
+
+ /* In case, the call was denied, the RPC error is stored here
+ * till the reply is sent.
+ */
+ int rpc_err;
+
+ /* In case the failure happened because of an authentication problem
+ * , this value needs to be assigned the correct auth error number.
+ */
+ int auth_err;
+
+ /* There can be cases of RPC requests where the reply needs to
+ * be built from multiple sources. For eg. where even the NFS reply can
+ * contain a payload, as in the NFSv3 read reply. Here the RPC header
+ * ,NFS header and the read data are brought together separately from
+ * different buffers, so we need to stage the buffers temporarily here
+ * before all of them get added to the connection's transmission list.
+ */
+ struct list_head txlist;
+
+ /* While the reply record is being built, this variable keeps track
+ * of how many bytes have been added to the record.
+ */
+ size_t payloadsize;
+
+ /* The credentials extracted from the rpc request */
+ rpcsvc_auth_data_t cred;
+
+ /* The verified extracted from the rpc request. In request side
+ * processing this contains the verifier sent by the client, on reply
+ * side processing, it is filled with the verified that will be
+ * sent to the client.
+ */
+ rpcsvc_auth_data_t verf;
+
+ /* Container for a RPC program wanting to store a temp
+ * request-specific item.
+ */
+ void *private;
+
+};
+
+#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->conn->program))
+#define rpcsvc_request_program_private(req) (((rpcsvc_program_t *)((req)->conn->program))->private)
+#define rpcsvc_request_conn(req) (req)->conn
+#define rpcsvc_request_accepted(req) ((req)->rpc_stat == MSG_ACCEPTED)
+#define rpcsvc_request_accepted_success(req) ((req)->rpc_err == SUCCESS)
+#define rpcsvc_request_uid(req) ((req)->uid)
+#define rpcsvc_request_gid(req) ((req)->gid)
+#define rpcsvc_stage_service(stg) ((rpcsvc_t *)((stg)->svc))
+#define rpcsvc_conn_stage(conn) ((conn)->stage)
+#define rpcsvc_request_service(req) (rpcsvc_stage_service(rpcsvc_conn_stage(rpcsvc_request_conn(req))))
+#define rpcsvc_request_prog_minauth(req) (rpcsvc_request_program(req)->min_auth)
+#define rpcsvc_request_cred_flavour(req) (rpcsvc_auth_flavour(req->cred))
+#define rpcsvc_request_verf_flavour(req) (rpcsvc_auth_flavour(req->verf))
+
+#define rpcsvc_request_uid(req) ((req)->uid)
+#define rpcsvc_request_gid(req) ((req)->gid)
+#define rpcsvc_request_private(req) ((req)->private)
+#define rpcsvc_request_xid(req) ((req)->xid)
+#define rpcsvc_request_set_private(req,prv) (req)->private = (void *)(prv)
+#define rpcsvc_request_record_iob(rq) ((rq)->recordiob)
+#define rpcsvc_request_record_ref(req) (iobuf_ref ((req)->recordiob))
+#define rpcsvc_request_record_unref(req) (iobuf_unref ((req)->recordiob))
+
+
+#define RPCSVC_ACTOR_SUCCESS 0
+#define RPCSVC_ACTOR_ERROR (-1)
+
+/* Functor for every type of protocol actor
+ * must be defined like this.
+ *
+ * See the request structure for info on how to handle the request
+ * in the program actor.
+ *
+ * On successful santify checks inside the actor, it should return
+ * RPCSVC_ACTOR_SUCCESS.
+ * On an error, on which the RPC layer is expected to return a reply, the actor
+ * should return RPCSVC_ACTOR_ERROR.
+ *
+ */
+typedef int (*rpcsvc_actor) (rpcsvc_request_t *req);
+typedef int (*rpcsvc_vector_actor) (rpcsvc_request_t *req, struct iobuf *iob);
+typedef int (*rpcsvc_vector_sizer) (rpcsvc_request_t *req, ssize_t *readsize,
+ int *newiob);
+
+/* Every protocol actor will also need to specify the function the RPC layer
+ * will use to serialize or encode the message into XDR format just before
+ * transmitting on the connection.
+ */
+typedef void *(*rpcsvc_encode_reply) (void *msg);
+
+/* Once the reply has been transmitted, the message will have to be de-allocated
+ * , so every actor will need to provide a function that deallocates the message
+ * it had allocated as a response.
+ */
+typedef void (*rpcsvc_deallocate_reply) (void *msg);
+
+
+#define RPCSVC_NAME_MAX 32
+/* The descriptor for each procedure/actor that runs
+ * over the RPC service.
+ */
+typedef struct rpc_svc_actor_desc {
+ char procname[RPCSVC_NAME_MAX];
+ int procnum;
+ rpcsvc_actor actor;
+
+ /* Handler for cases where the RPC requests fragments are large enough
+ * to benefit from being decoded into aligned memory addresses. While
+ * decoding the request in a non-vectored manner, due to the nature of
+ * the XDR scheme, RPC cannot guarantee memory aligned addresses for
+ * the resulting message-specific structures. Allowing a specialized
+ * handler for letting the RPC program read the data from the network
+ * directly into its alligned buffers.
+ */
+ rpcsvc_vector_actor vector_actor;
+ rpcsvc_vector_sizer vector_sizer;
+
+} rpcsvc_actor_t;
+
+typedef int (*rpcsvc_conn_notify_fn) (void *progpriv, rpcsvc_conn_t *conn);
+
+/* Describes a program and its version along with the function pointers
+ * required to handle the procedures/actors of each program/version.
+ * Never changed ever by any thread so no need for a lock.
+ */
+struct rpc_svc_program {
+ char progname[RPCSVC_NAME_MAX];
+ int prognum;
+ int progver;
+ uint16_t progport; /* Registered with portmap */
+ int progaddrfamily; /* AF_INET or AF_INET6 */
+ char *proghost; /* Bind host, can be NULL */
+ rpcsvc_actor_t *actors; /* All procedure handlers */
+ int numactors; /* Num actors in actor array */
+ int proghighvers; /* Highest ver for program
+ supported by the system. */
+ int proglowvers; /* Lowest ver */
+
+ /* Program specific state handed to actors */
+ void *private;
+
+ /* This upcall is made when a connection's refcount reaches 0 and the
+ * connection is about to be destroyed. We want to let the RPC program
+ * know that it should also now free any state it is maintaining
+ * for this connection.
+ */
+ rpcsvc_conn_notify_fn conn_destroy;
+
+ /* Used to tell RPC program to init the state it needs to associate
+ * with the new connection.
+ */
+ rpcsvc_conn_notify_fn conn_init;
+
+ /* An integer that identifies the min auth strength that is required
+ * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
+ * See RFC 1813, Section 5.2.1.
+ */
+ int min_auth;
+};
+
+
+/* Contains global state required for all the RPC services.
+ */
+typedef struct rpc_svc_state {
+
+ /* Contains the list of rpcsvc_stage_t
+ * list of (program, version) handlers.
+ * other options.
+ */
+
+ /* At this point, lock is not used to protect anything. Later, it'll
+ * be used for protecting stages.
+ */
+ pthread_mutex_t rpclock;
+
+ /* This is the first stage that is inited, so that any RPC based
+ * services that do not need multi-threaded support can just use the
+ * service right away. This is not added to the stages list
+ * declared later.
+ * This is also the stage over which all service listeners are run.
+ */
+ rpcsvc_stage_t *defaultstage;
+
+ /* When we have multi-threaded RPC support, we'll use this to link
+ * to the multiple Stages.
+ */
+ struct list_head stages; /* All stages */
+
+ unsigned int memfactor;
+
+ /* List of the authentication schemes available. */
+ struct list_head authschemes;
+
+ /* Reference to the options */
+ dict_t *options;
+
+ /* Allow insecure ports. */
+ int allow_insecure;
+
+ glusterfs_ctx_t *ctx;
+} rpcsvc_t;
+
+
+/* All users of RPC services should use this API to register their
+ * procedure handlers.
+ */
+extern int
+rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t program);
+
+extern int
+rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t program);
+
+/* Inits the global RPC service data structures.
+ * Called in main.
+ */
+extern rpcsvc_t *
+rpcsvc_init (glusterfs_ctx_t *ctx, dict_t *options);
+
+
+extern int
+rpcsvc_submit_message (rpcsvc_request_t * req, struct iovec msg,
+ struct iobuf *iob);
+
+int
+rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec msgvec,
+ struct iobuf *msg);
+#define rpcsvc_record_currentfrag_addr(rs) ((rs)->fragcurrent)
+#define rpcsvc_record_currenthdr_addr(rs) ((rs)->hdrcurrent)
+
+#define rpcsvc_record_update_currentfrag(rs, size) \
+ do { \
+ (rs)->fragcurrent += size; \
+ } while (0) \
+
+#define rpcsvc_record_update_currenthdr(rs, size) \
+ do { \
+ (rs)->hdrcurrent += size; \
+ } while (0) \
+
+
+/* These are used to differentiate between multiple txbufs which form
+ * a single RPC record. For eg, one purpose we use these for is to
+ * prevent dividing a RPC record over multiple TCP segments. Multiple
+ * TCP segments are possible for a single RPC record because we generally do not
+ * have control over how the kernel's TCP segments the buffers when putting
+ * them on the wire. So, on Linux, we use these to set TCP_CORK to create
+ * a single TCP segment from multiple txbufs that are part of the same RPC
+ * record. This improves network performance by reducing tiny message
+ * transmissions.
+ */
+#define RPCSVC_TXB_FIRST 0x1
+#define RPCSVC_TXB_LAST 0x2
+
+/* The list of buffers appended to a connection's pending
+ * transmission list.
+ */
+typedef struct rpcsvc_txbuf {
+ struct list_head txlist;
+ /* The iobuf which contains the full message to be transmitted */
+ struct iobuf *iob;
+
+ /* For vectored messages from an RPC program, we need to be able
+ * maintain a ref to an iobuf which we do not have access to directly
+ * except through the iobref which in turn could've been passed to
+ * the RPC program by a higher layer.
+ *
+ * So either the iob is defined or iobref is defined for a reply,
+ * never both.
+ */
+ struct iobref *iobref;
+ /* In order to handle non-blocking writes, we'll need to keep track of
+ * how much data from an iobuf has been written and where the next
+ * transmission needs to start from. This iov.base points to the base of
+ * the iobuf, iov.len is the size of iobuf being used for the message
+ * from the total size in the iobuf.
+ */
+ struct iovec buf;
+ /* offset is the point from where the next transmission for this buffer
+ * should start.
+ */
+ size_t offset;
+
+ /* This is a special field that tells us what kind of transmission
+ * behaviour to provide to a particular buffer.
+ * See the RPCSVC_TXB_* defines for more info.
+ */
+ int txbehave;
+} rpcsvc_txbuf_t;
+
+extern int
+rpcsvc_error_reply (rpcsvc_request_t *req);
+
+#define RPCSVC_PEER_STRLEN 1024
+#define RPCSVC_AUTH_ACCEPT 1
+#define RPCSVC_AUTH_REJECT 2
+#define RPCSVC_AUTH_DONTCARE 3
+
+extern int
+rpcsvc_conn_peername (rpcsvc_conn_t *conn, char *hostname, int hostlen);
+
+extern int
+rpcsvc_conn_peeraddr (rpcsvc_conn_t *conn, char *addrstr, int addrlen,
+ struct sockaddr *returnsa, socklen_t sasize);
+
+extern int
+rpcsvc_conn_peer_check (dict_t *options, char *volname, rpcsvc_conn_t *conn);
+
+extern int
+rpcsvc_conn_privport_check (rpcsvc_t *svc, char *volname, rpcsvc_conn_t *conn);
+#define rpcsvc_request_seterr(req, err) (req)->rpc_err = err
+#define rpcsvc_request_set_autherr(req, err) (req)->auth_err = err
+
+extern void
+rpcsvc_conn_deinit (rpcsvc_conn_t *conn);
+extern void rpcsvc_conn_ref (rpcsvc_conn_t *conn);
+extern void rpcsvc_conn_unref (rpcsvc_conn_t *conn);
+
+extern int rpcsvc_submit_vectors (rpcsvc_request_t *req);
+
+extern int rpcsvc_request_attach_vector (rpcsvc_request_t *req,
+ struct iovec msgvec, struct iobuf *iob,
+ struct iobref *ioref, int finalvector);
+
+
+typedef int (*auth_init_conn) (rpcsvc_conn_t *conn, void *priv);
+typedef int (*auth_init_request) (rpcsvc_request_t *req, void *priv);
+typedef int (*auth_request_authenticate) (rpcsvc_request_t *req, void *priv);
+
+/* This structure needs to be registered by every authentication scheme.
+ * Our authentication schemes are stored per connection because
+ * each connection will end up using a different authentication scheme.
+ */
+typedef struct rpcsvc_auth_ops {
+ auth_init_conn conn_init;
+ auth_init_request request_init;
+ auth_request_authenticate authenticate;
+} rpcsvc_auth_ops_t;
+
+typedef struct rpcsvc_auth_flavour_desc {
+ char authname[RPCSVC_NAME_MAX];
+ int authnum;
+ rpcsvc_auth_ops_t *authops;
+ void *authprivate;
+} rpcsvc_auth_t;
+
+typedef void * (*rpcsvc_auth_initer_t) (rpcsvc_t *svc, dict_t *options);
+
+struct rpcsvc_auth_list {
+ struct list_head authlist;
+ rpcsvc_auth_initer_t init;
+ /* Should be the name with which we identify the auth scheme given
+ * in the volfile options.
+ * This should be different from the authname in rpc_auth_t
+ * in way that makes it easier to specify this scheme in the volfile.
+ * This is because the technical names of the schemes can be a bit
+ * arcane.
+ */
+ char name[RPCSVC_NAME_MAX];
+ rpcsvc_auth_t *auth;
+ int enable;
+};
+
+extern int
+rpcsvc_auth_request_init (rpcsvc_request_t *req);
+
+extern int
+rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options);
+
+extern int
+rpcsvc_auth_conn_init (rpcsvc_conn_t *conn);
+
+extern int
+rpcsvc_authenticate (rpcsvc_request_t *req);
+
+extern int
+rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
+
+/* If the request has been sent using AUTH_UNIX, this function returns the
+ * auxiliary gids as an array, otherwise, it returns NULL.
+ * Move to auth-unix specific source file when we need to modularize the
+ * authentication code even further to support mode auth schemes.
+ */
+extern gid_t *
+rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen);
+
+extern int
+rpcsvc_combine_gen_spec_volume_checks (int gen, int spec);
+
+extern char *
+rpcsvc_volume_allowed (dict_t *options, char *volname);
+#endif