diff options
| author | Shreyas Siravara <sshreyas@fb.com> | 2015-10-22 12:29:04 -0700 |
|---|---|---|
| committer | Shreyas Siravara <sshreyas@fb.com> | 2017-08-30 01:23:51 +0000 |
| commit | c87639ad04a3c1a0d1fccbf426ad040d36505693 (patch) | |
| tree | 022b92bdb059b8ded93c508914f4743b2137c2bf | |
| parent | d5bc1267359cf78a5e5d65bd70f51e41239f5e0e (diff) | |
gNFSd: Auto re-register NFS/Mount programs with rpcbind periodically
Summary:
Every once in a while rpcbind crashes and the NFS endpoints go bye-bye.
This diff makes it such that we should almost never encounter the case
where we have NFS up and rpcbind down causing bad endpoints and hanging
mounts for our customers.
Test Plan: Added prove tests + tested on dev server
Reviewers: dph, moox, rwareing
Reviewed By: rwareing
Differential Revision: https://phabricator.fb.com/D2571724
Tasks: 8803558
Change-Id: I35acb2d731185a7b20020cb57bdd4d879e978df4
Signature: t1:2571724:1445555327:3276a4dcc4da71346b09d4aeb46c69dddcc7c5ba
Reviewed-on: https://review.gluster.org/17961
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shreyas Siravara <sshreyas@fb.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
| -rw-r--r-- | rpc/rpc-lib/src/rpcsvc.c | 14 | ||||
| -rw-r--r-- | rpc/rpc-lib/src/rpcsvc.h | 2 | ||||
| -rwxr-xr-x[-rw-r--r--] | tests/basic/accept-v6v4.t | 26 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs.c | 77 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs.h | 2 |
5 files changed, 112 insertions, 9 deletions
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c index 87a918ce8cb..617e3cc76ed 100644 --- a/rpc/rpc-lib/src/rpcsvc.c +++ b/rpc/rpc-lib/src/rpcsvc.c @@ -1371,7 +1371,7 @@ rpcsvc_error_reply (rpcsvc_request_t *req) #ifdef IPV6_DEFAULT int -rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port) +rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port, gf_boolean_t unregister_first) { const int IP_BUF_LEN = 64; char addr_buf[IP_BUF_LEN]; @@ -1405,11 +1405,13 @@ rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port) goto out; } - /* Force the unregistration of the program first. - * This call may fail if nothing has been registered, - * which is fine. - */ - rpcsvc_program_unregister_rpcbind6 (newprog); + if (unregister_first) { + /* Force the unregistration of the program first. + * This call may fail if nothing has been registered, + * which is fine. + */ + rpcsvc_program_unregister_rpcbind6 (newprog); + } success = rpcb_set (newprog->prognum, newprog->progver, nc, nb); if (!success) { diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h index 33141256cf3..9a2257a6087 100644 --- a/rpc/rpc-lib/src/rpcsvc.h +++ b/rpc/rpc-lib/src/rpcsvc.h @@ -439,7 +439,7 @@ rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port); #ifdef IPV6_DEFAULT extern int -rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port); +rpcsvc_program_register_rpcbind6 (rpcsvc_program_t *newprog, uint32_t port, gf_boolean_t unregister_first); extern int rpcsvc_program_unregister_rpcbind6 (rpcsvc_program_t *newprog); #endif diff --git a/tests/basic/accept-v6v4.t b/tests/basic/accept-v6v4.t index 7128c12c6be..ce3a1bae7f9 100644..100755 --- a/tests/basic/accept-v6v4.t +++ b/tests/basic/accept-v6v4.t @@ -119,4 +119,30 @@ EXPECT "Y" check_ip_port $V6 $NFSD_PORT "v4" EXPECT "Y" check_nfs $V6 "v6" EXPECT "Y" check_nfs $V4 "v4" +# Test a rpcbind crash +pkill -9 rpcbind && service rpcbind start +sleep 15 + +# Test that the port re-registered +rpcinfo=$(rpcinfo -s | grep nfs | grep -v nfs_acl) + +function check_rpcinfo { + support=$1 + type=$2 + + if [ ! $support ]; then + echo "Y" + return + fi + + if [ "$type" == "v6" ]; then + echo $(echo $rpcinfo | grep tcp6 && echo "Y" || echo "N") + else + echo $(echo $rpcinfo | grep tcp && echo "Y" || echo "N") + fi +} + +EXPECT "Y" check_rpcinfo $IPV4_SUPPORT "v4" +EXPECT "Y" check_rpcinfo $IPV6_SUPPORT "v6" + cleanup; diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index f2e202d0510..e94cb03b771 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -33,6 +33,7 @@ #include "syscall.h" #include "rpcsvc.h" #include "nfs-messages.h" +#include "syncop.h" #define OPT_SERVER_AUX_GIDS "nfs.server-aux-gids" #define OPT_SERVER_GID_CACHE_TIMEOUT "nfs.server.aux-gid-timeout" @@ -205,7 +206,8 @@ nfs_program_register_portmap_all (struct nfs_state *nfs) prog->progport = nfs->override_portnum; (void) rpcsvc_program_register_portmap (prog, prog->progport); #ifdef IPV6_DEFAULT - (void) rpcsvc_program_register_rpcbind6 (prog, prog->progport); + (void) rpcsvc_program_register_rpcbind6 (prog, prog->progport, + TRUE); #endif } @@ -288,6 +290,55 @@ nfs_deinit_versions (struct list_head *versions, xlator_t *this) return 0; } +void rpcbind_register_prog (rpcsvc_program_t *prog) +{ + if (!prog) { + return; + } + + /* + * Attempt to register the program with rpcbind. In 99.9% of cases, + * This call will most likely *always* fail, since the program should already + * be registered. We don't care if this call fails since it is best effort. + */ + rpcsvc_program_register_portmap (prog, prog->progport); +#ifdef IPV6_DEFAULT + rpcsvc_program_register_rpcbind6 (prog, prog->progport, FALSE); +#endif +} + +/** + * rpcbind_autoregister_task + * + * The purpose of this task is to attempt to ensure that NFS stays + * registered with rpcbind. The thread is "best effort", and as a + * result we do not care what the result of the call is. + */ +int rpcbind_autoregister_task (void *arg) +{ + struct nfs_state *nfs = arg; + struct nfs_initer_list *version = NULL; + struct nfs_initer_list *tmp = NULL; + rpcsvc_program_t *prog = NULL; + struct list_head *versions = &nfs->versions; + + list_for_each_entry_safe (version, tmp, versions, list) { + rpcbind_register_prog (version->program); + } + + return 0; +} + +void *nfs_janitor (void *arg) +{ + struct nfs_state *nfs = arg; + while (_gf_true) { + synctask_new (nfs->this->ctx->env, rpcbind_autoregister_task, + NULL, NULL, nfs); + sleep (10); + } +} + int nfs_init_versions (struct nfs_state *nfs, xlator_t *this) { @@ -344,7 +395,8 @@ nfs_init_versions (struct nfs_state *nfs, xlator_t *this) } #ifdef IPV6_DEFAULT ret = rpcsvc_program_register_rpcbind6 (prog, - prog->progport); + prog->progport, + TRUE); if (ret == -1) { gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_PGM_REG_FAIL, @@ -362,6 +414,18 @@ err: return ret; } +int +nfs_janitor_init (struct nfs_state *nfs) +{ + int ret = pthread_create (&nfs->janitor_thread, NULL, nfs_janitor, nfs); + if (ret != 0) { + gf_log (GF_NFS, GF_LOG_WARNING, + "Unable to start rpcbind register thread! Error=%s", + strerror (ret)); + return -1; + } + return 0; +} int nfs_add_all_initiators (struct nfs_state *nfs) @@ -773,6 +837,8 @@ nfs_init_state (xlator_t *this) return NULL; } + nfs->this = this; + nfs->memfactor = GF_NFS_DEFAULT_MEMFACTOR; if (dict_get (this->options, "nfs.mem-factor")) { ret = dict_get_str (this->options, "nfs.mem-factor", @@ -1558,6 +1624,13 @@ init (xlator_t *this) { return (-1); } + ret = nfs_janitor_init (nfs); + if (ret) { + gf_msg (GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_INIT_FAIL, + "Failed to initialize janitor"); + return (-1); + } + gf_msg (GF_NFS, GF_LOG_INFO, 0, NFS_MSG_STARTED, "NFS service started"); return (0); /* SUCCESS */ diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h index 9bcc88f5548..4f5faf29f6b 100644 --- a/xlators/nfs/server/src/nfs.h +++ b/xlators/nfs/server/src/nfs.h @@ -96,6 +96,8 @@ struct nfs_state { uint32_t server_aux_gids_max_age; gid_cache_t gid_cache; uint32_t generation; + pthread_t janitor_thread; + xlator_t *this; gf_boolean_t register_portmap; char *rpc_statd; char *rpc_statd_pid_file; |
