summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShreyas Siravara <sshreyas@fb.com>2015-10-22 12:29:04 -0700
committerShreyas Siravara <sshreyas@fb.com>2017-08-30 01:23:51 +0000
commitc87639ad04a3c1a0d1fccbf426ad040d36505693 (patch)
tree022b92bdb059b8ded93c508914f4743b2137c2bf
parentd5bc1267359cf78a5e5d65bd70f51e41239f5e0e (diff)
gNFSd: Auto re-register NFS/Mount programs with rpcbind periodically
Summary: Every once in a while rpcbind crashes and the NFS endpoints go bye-bye. This diff makes it such that we should almost never encounter the case where we have NFS up and rpcbind down causing bad endpoints and hanging mounts for our customers. Test Plan: Added prove tests + tested on dev server Reviewers: dph, moox, rwareing Reviewed By: rwareing Differential Revision: https://phabricator.fb.com/D2571724 Tasks: 8803558 Change-Id: I35acb2d731185a7b20020cb57bdd4d879e978df4 Signature: t1:2571724:1445555327:3276a4dcc4da71346b09d4aeb46c69dddcc7c5ba Reviewed-on: https://review.gluster.org/17961 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shreyas Siravara <sshreyas@fb.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c14
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h2
-rwxr-xr-x[-rw-r--r--]tests/basic/accept-v6v4.t26
-rw-r--r--xlators/nfs/server/src/nfs.c77
-rw-r--r--xlators/nfs/server/src/nfs.h2
5 files changed, 112 insertions, 9 deletions
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index 87a918ce8cb..617e3cc76ed 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -1371,7 +1371,7 @@ rpcsvc_error_reply (rpcsvc_request_t *req)
#ifdef IPV6_DEFAULT
int
-rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port)
+rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port, gf_boolean_t unregister_first)
{
const int IP_BUF_LEN = 64;
char addr_buf[IP_BUF_LEN];
@@ -1405,11 +1405,13 @@ rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port)
goto out;
}
- /* Force the unregistration of the program first.
- * This call may fail if nothing has been registered,
- * which is fine.
- */
- rpcsvc_program_unregister_rpcbind6 (newprog);
+ if (unregister_first) {
+ /* Force the unregistration of the program first.
+ * This call may fail if nothing has been registered,
+ * which is fine.
+ */
+ rpcsvc_program_unregister_rpcbind6 (newprog);
+ }
success = rpcb_set (newprog->prognum, newprog->progver, nc, nb);
if (!success) {
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index 33141256cf3..9a2257a6087 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -439,7 +439,7 @@ rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port);
#ifdef IPV6_DEFAULT
extern int
-rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port);
+rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port, gf_boolean_t unregister_first);
extern int
rpcsvc_program_unregister_rpcbind6 (rpcsvc_program_t *newprog);
#endif
diff --git a/tests/basic/accept-v6v4.t b/tests/basic/accept-v6v4.t
index 7128c12c6be..ce3a1bae7f9 100644..100755
--- a/tests/basic/accept-v6v4.t
+++ b/tests/basic/accept-v6v4.t
@@ -119,4 +119,30 @@ EXPECT "Y" check_ip_port $V6 $NFSD_PORT "v4"
EXPECT "Y" check_nfs $V6 "v6"
EXPECT "Y" check_nfs $V4 "v4"
+# Test a rpcbind crash
+pkill -9 rpcbind && service rpcbind start
+sleep 15
+
+# Test that the port re-registered
+rpcinfo=$(rpcinfo -s | grep nfs | grep -v nfs_acl)
+
+function check_rpcinfo {
+ support=$1
+ type=$2
+
+ if [ ! $support ]; then
+ echo "Y"
+ return
+ fi
+
+ if [ "$type" == "v6" ]; then
+ echo $(echo $rpcinfo | grep tcp6 && echo "Y" || echo "N")
+ else
+ echo $(echo $rpcinfo | grep tcp && echo "Y" || echo "N")
+ fi
+}
+
+EXPECT "Y" check_rpcinfo $IPV4_SUPPORT "v4"
+EXPECT "Y" check_rpcinfo $IPV6_SUPPORT "v6"
+
cleanup;
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index f2e202d0510..e94cb03b771 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -33,6 +33,7 @@
#include "syscall.h"
#include "rpcsvc.h"
#include "nfs-messages.h"
+#include "syncop.h"
#define OPT_SERVER_AUX_GIDS "nfs.server-aux-gids"
#define OPT_SERVER_GID_CACHE_TIMEOUT "nfs.server.aux-gid-timeout"
@@ -205,7 +206,8 @@ nfs_program_register_portmap_all (struct nfs_state *nfs)
prog->progport = nfs->override_portnum;
(void) rpcsvc_program_register_portmap (prog, prog->progport);
#ifdef IPV6_DEFAULT
- (void) rpcsvc_program_register_rpcbind6 (prog, prog->progport);
+ (void) rpcsvc_program_register_rpcbind6 (prog, prog->progport,
+ TRUE);
#endif
}
@@ -288,6 +290,55 @@ nfs_deinit_versions (struct list_head *versions, xlator_t *this)
return 0;
}
+void rpcbind_register_prog (rpcsvc_program_t *prog)
+{
+ if (!prog) {
+ return;
+ }
+
+ /*
+ * Attempt to register the program with rpcbind. In 99.9% of cases,
+ * This call will most likely *always* fail, since the program should already
+ * be registered. We don't care if this call fails since it is best effort.
+ */
+ rpcsvc_program_register_portmap (prog, prog->progport);
+#ifdef IPV6_DEFAULT
+ rpcsvc_program_register_rpcbind6 (prog, prog->progport, FALSE);
+#endif
+}
+
+/**
+ * rpcbind_autoregister_task
+ *
+ * The purpose of this task is to attempt to ensure that NFS stays
+ * registered with rpcbind. The thread is "best effort", and as a
+ * result we do not care what the result of the call is.
+ */
+int rpcbind_autoregister_task (void *arg)
+{
+ struct nfs_state *nfs = arg;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+ struct list_head *versions = &nfs->versions;
+
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ rpcbind_register_prog (version->program);
+ }
+
+ return 0;
+}
+
+void *nfs_janitor (void *arg)
+{
+ struct nfs_state *nfs = arg;
+ while (_gf_true) {
+ synctask_new (nfs->this->ctx->env, rpcbind_autoregister_task,
+ NULL, NULL, nfs);
+ sleep (10);
+ }
+}
+
int
nfs_init_versions (struct nfs_state *nfs, xlator_t *this)
{
@@ -344,7 +395,8 @@ nfs_init_versions (struct nfs_state *nfs, xlator_t *this)
}
#ifdef IPV6_DEFAULT
ret = rpcsvc_program_register_rpcbind6 (prog,
- prog->progport);
+ prog->progport,
+ TRUE);
if (ret == -1) {
gf_msg (GF_NFS, GF_LOG_ERROR, 0,
NFS_MSG_PGM_REG_FAIL,
@@ -362,6 +414,18 @@ err:
return ret;
}
+int
+nfs_janitor_init (struct nfs_state *nfs)
+{
+ int ret = pthread_create (&nfs->janitor_thread, NULL, nfs_janitor, nfs);
+ if (ret != 0) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "Unable to start rpcbind register thread! Error=%s",
+ strerror (ret));
+ return -1;
+ }
+ return 0;
+}
int
nfs_add_all_initiators (struct nfs_state *nfs)
@@ -773,6 +837,8 @@ nfs_init_state (xlator_t *this)
return NULL;
}
+ nfs->this = this;
+
nfs->memfactor = GF_NFS_DEFAULT_MEMFACTOR;
if (dict_get (this->options, "nfs.mem-factor")) {
ret = dict_get_str (this->options, "nfs.mem-factor",
@@ -1558,6 +1624,13 @@ init (xlator_t *this) {
return (-1);
}
+ ret = nfs_janitor_init (nfs);
+ if (ret) {
+ gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_INIT_FAIL,
+ "Failed to initialize janitor");
+ return (-1);
+ }
+
gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_STARTED,
"NFS service started");
return (0); /* SUCCESS */
diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h
index 9bcc88f5548..4f5faf29f6b 100644
--- a/xlators/nfs/server/src/nfs.h
+++ b/xlators/nfs/server/src/nfs.h
@@ -96,6 +96,8 @@ struct nfs_state {
uint32_t server_aux_gids_max_age;
gid_cache_t gid_cache;
uint32_t generation;
+ pthread_t janitor_thread;
+ xlator_t *this;
gf_boolean_t register_portmap;
char *rpc_statd;
char *rpc_statd_pid_file;