diff options
| author | Niels de Vos <ndevos@redhat.com> | 2017-06-21 16:25:33 +0200 | 
|---|---|---|
| committer | Niels de Vos <ndevos@redhat.com> | 2017-07-09 09:12:58 +0000 | 
| commit | fafe1491ead527ba1024c521013aa90d2ee2b355 (patch) | |
| tree | a4237d7f2e08b27391badaa0db6e18c420791519 | |
| parent | 26241777bf59c7d64c582ce09e557bc2dc97dabb (diff) | |
nfs/nlm: handle reconnect for non-NLM4_LOCK requests
When a reply on an NLM-procedure gets stuck, the NFS-client will resend
the request. This can happen through a re-connect in case the connection
was terminated (long delay in the reply on the initial request). Once
that happens, not all NLM-procedures are handled correctly.
Testing this is difficult and time-consuming. There still may be
problems with certain operations, but this definitely makes it behave
much better than before.
The problem occured due to a problem in EC, change-id I18a782903ba
addressed the root cause.
Change-Id: I23b385568e27232951fa3fbd7198a0e5d775a8c2
BUG: 1467313
Signed-off-by: Niels de Vos <ndevos@redhat.com>
Reviewed-on: https://review.gluster.org/17698
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
| -rw-r--r-- | xlators/nfs/server/src/nlm4.c | 101 | 
1 files changed, 79 insertions, 22 deletions
diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c index 65642c8db0c..fab9e62cb3a 100644 --- a/xlators/nfs/server/src/nlm4.c +++ b/xlators/nfs/server/src/nlm4.c @@ -919,38 +919,20 @@ nlm4svc_send_granted_cbk (struct rpc_req *req, struct iovec *iov, int count,          return 0;  } -void -nlm4svc_send_granted (nfs3_call_state_t *cs); +static int +nlm_handle_connect (struct rpc_clnt *rpc_clnt, nfs3_call_state_t *cs);  int  nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,                      rpc_clnt_event_t fn, void *data)  { -        int                ret         = 0; -        char              *caller_name = NULL;          nfs3_call_state_t *cs          = NULL;          cs = mydata;          switch (fn) {          case RPC_CLNT_CONNECT: -                if (!cs->req) { -                        gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, -                                NFS_MSG_RPC_CLNT_ERROR, "Spurious notify?!"); -                        goto err; -                } - -                caller_name = cs->args.nlm4_lockargs.alock.caller_name; -                ret = nlm_set_rpc_clnt (rpc_clnt, caller_name); -                if (ret == -1) { -                        gf_msg (GF_NLM, GF_LOG_ERROR, 0, -                                NFS_MSG_RPC_CLNT_ERROR, "Failed to set " -                                "rpc clnt"); -                        goto err; -                } -                nlm4svc_send_granted (cs); -                rpc_clnt_unref (rpc_clnt); - +                nlm_handle_connect (rpc_clnt, cs);                  break;          case RPC_CLNT_MSG: @@ -963,7 +945,6 @@ nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,                  break;          } - err:          return 0;  } @@ -2368,6 +2349,82 @@ nlm4svc_sm_notify (struct nlm_sm_status *status)          nlm_cleanup_fds (status->mon_name);  } + +/* RPC_CLNT_CONNECT gets called on (re)connects and should be able to handle + * different NLM requests. */ +static int +nlm_handle_connect (struct rpc_clnt *rpc_clnt, nfs3_call_state_t *cs) +{ +        int                 ret         = -1; +        int                 nlm_proc    = NLM4_NULL; +        struct nlm4_lock   *alock       = NULL; +        char               *caller_name = NULL; + +        if (!cs || !cs->req) { +                gf_msg (GF_NLM, GF_LOG_ERROR, EINVAL, NFS_MSG_RPC_CLNT_ERROR, +                        "Spurious notify?!"); +                goto out; +        } + +        /* NLM4_* actions from nlm4.h */ +        if (cs->req->prognum == NLM_PROGRAM) { +                nlm_proc = cs->req->procnum; +        } else { +                /* hmm, cs->req has not been filled completely */ +                if (cs->resume_fn == nlm4_lock_fd_resume) +                        nlm_proc = NLM4_LOCK; +                else if (cs->resume_fn == nlm4_cancel_fd_resume) +                        nlm_proc = NLM4_CANCEL; +                else if (cs->resume_fn == nlm4_unlock_fd_resume) +                        nlm_proc = NLM4_UNLOCK; +                else { +                        gf_msg (GF_NLM, GF_LOG_ERROR, 0, +                                NFS_MSG_RPC_CLNT_ERROR, "(re)connect with an " +                                "unexpected NLM4 procedure (%d)", nlm_proc); +                        goto out; +                } +        } + +        switch (nlm_proc) { +        case NLM4_LOCK: +                alock = &cs->args.nlm4_lockargs.alock; +                caller_name = alock->caller_name; + +                ret = nlm_set_rpc_clnt (rpc_clnt, caller_name); +                if (ret == -1) { +                        gf_msg (GF_NLM, GF_LOG_ERROR, 0, +                                NFS_MSG_RPC_CLNT_ERROR, "Failed to set " +                                "rpc clnt"); +                        goto out; +                } + +                /* extra ref taken with nlm_set_rpc_clnt() */ +                rpc_clnt_unref (rpc_clnt); + +                nlm4svc_send_granted (cs); +                break; + +        case NLM4_CANCEL: +                /* alock = &cs->args.nlm4_cancargs.alock; */ +                ret = nlm4svc_cancel (cs->req); +                break; + +        case NLM4_UNLOCK: +                /* alock = &cs->args.nlm4_unlockargs.alock; */ +                ret = nlm4svc_unlock (cs->req); +                break; + +        default: +                gf_msg (GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_RPC_CLNT_ERROR, +                        "(re)connect with an unexpected NLM4 procedure " +                        "(%d)", nlm_proc); +        } + +out: +        return ret; +} + +  rpcsvc_actor_t  nlm4svc_actors[NLM4_PROC_COUNT] = {          /* 0 */          {"NULL",       NLM4_NULL,         nlm4svc_null,      NULL, 0, DRC_IDEMPOTENT},  | 
