diff options
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 3 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 6 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 481 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-lk.c | 1 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-mem-types.h | 1 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client.c | 165 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client.h | 16 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client3_1-fops.c | 16 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-handshake.c | 78 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-helpers.c | 48 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-helpers.h | 6 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-mem-types.h | 1 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server.c | 96 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server.h | 5 | 
14 files changed, 876 insertions, 47 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 923271f0a2e..ac5378b1acb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -203,6 +203,9 @@ static struct volopt_map_entry glusterd_volopt_map[] = {          {VKEY_FEATURES_LIMIT_USAGE,              "features/quota",            "limit-set", NULL, NO_DOC, 0},          {"features.quota-timeout",               "features/quota",            "timeout", "0", DOC, 0},          {"server.statedump-path",                "protocol/server",           "statedump-path", NULL, NO_DOC, 0}, +        {"client.lk-heal",                       "protocol/client",           "lk-heal", NULL, DOC, 0}, +        {"client.grace-timeout",                 "protocol/client",           "grace-timeout", NULL, DOC, 0}, +        {"server.grace-timeout",                 "protocol/server",           "grace-timeout", NULL, DOC, 0},          {NULL,                                                                }  }; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index e644290e42d..e44aca1d0c1 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -3066,13 +3066,19 @@ static int  fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  int32_t op_ret, int32_t op_errno, struct gf_flock *lock)  { +        uint32_t      op    = 0;          fuse_state_t *state = NULL;          state = frame->root->state; +        op    = state->finh->opcode;          if (op_ret == 0) {                  gf_log ("glusterfs-fuse", GF_LOG_TRACE,                          "%"PRIu64": ERR => 0", frame->root->unique); +                fd_lk_insert_and_merge (state->fd, +                                        (op == FUSE_SETLK) ? F_SETLK : F_SETLKW, +                                        &state->lk_lock); +                  send_fuse_err (this, state->finh, 0);          } else {                  if (op_errno == ENOSYS) { diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 91cda6d0c45..1896e6b6391 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -22,6 +22,7 @@  #include "config.h"  #endif +#include "fd-lk.h"  #include "client.h"  #include "xlator.h"  #include "defaults.h" @@ -39,6 +40,18 @@ extern rpc_clnt_prog_t clnt_pmap_prog;  int client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count,                       void *myframe); +int client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov, +                               int count, void *myframe); + +int client_set_lk_version (xlator_t *this); + +typedef struct client_fd_lk_local { +        int             ref; +        gf_boolean_t    error; +        gf_lock_t       lock; +        clnt_fd_ctx_t *fdctx; +}clnt_fd_lk_local_t; +  /* Handshake */  void @@ -391,6 +404,411 @@ client_notify_parents_child_up (xlator_t *this)  }  int +client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov, +                           int count, void *myframe) +{ +        int32_t           ret    = -1; +        call_frame_t     *fr     = NULL; +        gf_set_lk_ver_rsp rsp    = {0,}; + +        fr = (call_frame_t *) myframe; +        GF_VALIDATE_OR_GOTO ("client", fr, out); + +        if (req->rpc_status == -1) { +                gf_log (fr->this->name, GF_LOG_WARNING, +                        "received RPC status error"); +                goto out; +        } + +        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_set_lk_ver_rsp); +        if (ret < 0) +                gf_log (fr->this->name, GF_LOG_WARNING, +                        "xdr decoding failed"); +        else +                gf_log (fr->this->name, GF_LOG_DEBUG, +                        "Server lk version = %d", rsp.lk_ver); + +        ret = 0; +out: +        if (fr) +                STACK_DESTROY (fr->root); + +        return ret; +} + +int +client_set_lk_version (xlator_t *this) +{ +        int                 ret      = -1; +        clnt_conf_t        *conf     = NULL; +        call_frame_t       *frame    = NULL; +        gf_set_lk_ver_req   req      = {0, }; + +        conf = (clnt_conf_t *) this->private; + +        req.lk_ver = client_get_lk_ver (conf); +        req.uid    = this->ctx->process_uuid; + +        gf_log (this->name, GF_LOG_DEBUG, "Sending SET_LK_VERSION"); + +        frame = create_frame (this, this->ctx->pool); +        if (!frame) +                goto out; + +        ret = client_submit_request (this, &req, frame, +                                     conf->handshake, +                                     GF_HNDSK_SET_LK_VER, +                                     client_set_lk_version_cbk, +                                     NULL, NULL, 0, NULL, 0, NULL, +                                     (xdrproc_t)xdr_gf_set_lk_ver_req); +out: +        if (ret < 0) +                gf_log (this->name, GF_LOG_WARNING, +                        "Failed to send SET_LK_VERSION to server"); + +        return ret; +} + +int +client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx) +{ +        int  ret = 1; + +        GF_VALIDATE_OR_GOTO ("client", lk_ctx, out); + +        LOCK (&lk_ctx->lock); +        { +                ret = list_empty (&lk_ctx->lk_list); +        } +        UNLOCK (&lk_ctx->lock); +out: +        return ret; +} + +int +client_fd_lk_count (fd_lk_ctx_t *lk_ctx) +{ +        int               count     = 0; +        fd_lk_ctx_node_t *fd_lk   = NULL; + +        GF_VALIDATE_OR_GOTO ("client", lk_ctx, err); + +        LOCK (&lk_ctx->lock); +        { +                list_for_each_entry (fd_lk, &lk_ctx->lk_list, next) +                        count++; +        } +        UNLOCK (&lk_ctx->lock); + +        return count; +err: +        return -1; +} + +clnt_fd_lk_local_t * +clnt_fd_lk_local_ref (xlator_t *this, clnt_fd_lk_local_t *local) +{ +        GF_VALIDATE_OR_GOTO (this->name, local, out); + +        LOCK (&local->lock); +        { +                local->ref++; +        } +        UNLOCK (&local->lock); +out: +        return local; +} + +int +clnt_fd_lk_local_unref (xlator_t *this, clnt_fd_lk_local_t *local) +{ +        int   ref = -1; + +        GF_VALIDATE_OR_GOTO (this->name, local, out); + +        LOCK (&local->lock); +        { +                ref = --local->ref; +        } +        UNLOCK (&local->lock); + +        if (ref == 0) { +                LOCK_DESTROY (&local->lock); +                GF_FREE (local); +        } +        ref = 0; +out: +        return ref; +} + +clnt_fd_lk_local_t * +clnt_fd_lk_local_create (clnt_fd_ctx_t *fdctx) +{ +        clnt_fd_lk_local_t *local = NULL; + +        local = GF_CALLOC (1, sizeof (clnt_fd_lk_local_t), +                           gf_client_mt_clnt_fd_lk_local_t); +        if (!local) +                goto out; + +        local->ref    = 1; +        local->error  = _gf_false; +        local->fdctx = fdctx; + +        LOCK_INIT (&local->lock); +out: +        return local; +} + +void +clnt_mark_fd_bad (clnt_conf_t *conf, clnt_fd_ctx_t *fdctx) +{ +        pthread_mutex_lock (&conf->lock); +        { +                fdctx->remote_fd = -1; +        } +        pthread_mutex_unlock (&conf->lock); +} + +// call decrement_reopen_fd_count +int +clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov, +                            int count, void *myframe) +{ +        xlator_t       *this   = NULL; +        call_frame_t   *frame  = NULL; +        clnt_conf_t    *conf   = NULL; +        clnt_fd_ctx_t  *fdctx  = NULL; + +        frame  = myframe; +        this   = frame->this; +        fdctx  = (clnt_fd_ctx_t *) frame->local; +        conf   = (clnt_conf_t *) this->private; + +        clnt_mark_fd_bad (conf, fdctx); + +        decrement_reopen_fd_count (this, conf); + +        frame->local = NULL; +        STACK_DESTROY (frame->root); + +        return 0; +} + +int +clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ +        int               ret     = -1; +        clnt_conf_t      *conf    = NULL; +        call_frame_t     *frame   = NULL; +        gfs3_release_req  req     = {{0,},}; + +        conf = (clnt_conf_t *) this->private; + +        frame  = create_frame (THIS, THIS->ctx->pool); +        if (!frame) +                goto out; + +        frame->local = (void *) fdctx; +        req.fd       = fdctx->remote_fd; + +        ret    = client_submit_request (this, &req, frame, conf->fops, +                                        GFS3_OP_RELEASE, +                                        clnt_release_reopen_fd_cbk, NULL, +                                        NULL, 0, NULL, 0, NULL, +                                        (xdrproc_t)xdr_gfs3_releasedir_req); +out: +        if (ret) { +                decrement_reopen_fd_count (this, conf); +                clnt_mark_fd_bad (conf, fdctx); +                if (frame) { +                        frame->local = NULL; +                        STACK_DESTROY (frame->root); +                } +        } + +        return 0; +} + +int +clnt_fd_lk_local_mark_error (xlator_t *this, +                             clnt_fd_lk_local_t *local) +{ +        gf_boolean_t  error = _gf_false; + +        LOCK (&local->lock); +        { +                error        = local->error; +                local->error = _gf_true; +        } +        UNLOCK (&local->lock); + +        if (error) +                clnt_release_reopen_fd (this, local->fdctx); + +        return 0; +} + +// Also, I think in reopen_cbk, the fdctx is added to +// saved_fd list.. avoid that, may cause a problem +// Reason: While the locks on the fd are reacquired, a release +// fop may be received by the client-protocol translator +// which will free the fdctx datastructure. +int +client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov, +                           int count, void *myframe) +{ +        int32_t             ret        = -1; +        xlator_t           *this       = NULL; +        gf_common_rsp       rsp        = {0,}; +        call_frame_t       *frame      = NULL; +        clnt_fd_lk_local_t *local      = NULL; + +        frame = (call_frame_t *) myframe; +        this  = frame->this; +        local = (clnt_fd_lk_local_t *) frame->local; + +        if (req->rpc_status == -1) { +                gf_log ("client", GF_LOG_WARNING, +                        "request failed at rpc"); +                goto out; +        } + +        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed"); +                goto out; +        } + +        if (rsp.op_ret == -1) { +                gf_log (this->name, GF_LOG_ERROR, "lock request failed"); +                ret = -1; +                goto out; +        } + +        // TODO: Add more info to log. +        gf_log (this->name, GF_LOG_DEBUG, "Reacquired lock"); + +        ret = 0; +out: +        if (ret < 0) +                clnt_fd_lk_local_mark_error (this, local); + +        (void) clnt_fd_lk_local_unref (this, local); +        frame->local = NULL; +        STACK_DESTROY (frame->root); + +        return ret; +} + +int +_client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ +        int32_t             ret       = -1; +        int32_t             gf_cmd    = 0; +        int32_t             gf_type   = 0; +        gfs3_lk_req         req       = {{0,},}; +        struct gf_flock     flock     = {0,}; +        fd_lk_ctx_t        *lk_ctx    = NULL; +        clnt_fd_lk_local_t *local     = NULL; +        fd_lk_ctx_node_t   *fd_lk     = NULL; +        call_frame_t       *frame     = NULL; +        clnt_conf_t        *conf      = NULL; + +        conf   = (clnt_conf_t *) this->private; +        lk_ctx = fdctx->lk_ctx; + +        local = clnt_fd_lk_local_create (fdctx); +        if (!local) { +                clnt_release_reopen_fd (this, fdctx); +                goto out; +        } + +        list_for_each_entry (fd_lk, &lk_ctx->lk_list, next) { +                memcpy (&flock, &fd_lk->user_flock, +                        sizeof (struct gf_flock)); + +                ret = client_cmd_to_gf_cmd (fd_lk->cmd, &gf_cmd); +                if (ret) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "client_cmd_to_gf_cmd failed, " +                                "aborting reacquiring of locks"); +                        break; +                } + +                gf_type   = client_type_to_gf_type (flock.l_type); +                req.fd    = fdctx->remote_fd; +                req.cmd   = gf_cmd; +                req.type  = gf_type; +                (void) gf_proto_flock_from_flock (&req.flock, +                                                  &flock); + +                memcpy (req.gfid, fdctx->inode->gfid, 16); + +                frame = create_frame (THIS, THIS->ctx->pool); +                if (!frame) { +                        ret = -1; +                        break; +                } + +                frame->local          = clnt_fd_lk_local_ref (this, local); +                frame->root->lk_owner = fd_lk->user_flock.l_owner; + +                ret = client_submit_request (this, &req, frame, +                                             conf->fops, GFS3_OP_LK, +                                             client_reacquire_lock_cbk, +                                             NULL, NULL, 0, NULL, 0, NULL, +                                             (xdrproc_t)xdr_gfs3_lk_req); +                if (ret) +                        break; + +                ret   = 0; +                frame = NULL; +        } + +        if (ret) { +                clnt_fd_lk_local_mark_error (this, local); + +                if (frame) { +                        if (frame->local) { +                                clnt_fd_lk_local_unref (this, frame->local); +                                frame->local = NULL; +                        } +                        STACK_DESTROY (frame->root); +                } +        } +        if (local) +                (void) clnt_fd_lk_local_unref (this, local); +out: +        return ret; +} + +int +client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ +        int32_t          ret       = -1; +        fd_lk_ctx_t     *lk_ctx    = NULL; + +        if (client_fd_lk_list_empty (fdctx->lk_ctx)) { +                gf_log (this->name, GF_LOG_WARNING, +                        "fd lock list is empty"); +                decrement_reopen_fd_count (this, (clnt_conf_t *)this->private); +                ret = 0; +                goto out; +        } + +        lk_ctx = fdctx->lk_ctx; + +        LOCK (&lk_ctx->lock); +        { +                ret = _client_reacquire_lock (this, fdctx); +        } +        UNLOCK (&lk_ctx->lock); +out: +        return ret; +} + +int  client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,                        void           *myframe)  { @@ -402,11 +820,13 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,          clnt_conf_t   *conf                  = NULL;          clnt_fd_ctx_t *fdctx                 = NULL;          call_frame_t  *frame                 = NULL; +        xlator_t      *this                  = NULL;          frame = myframe;          if (!frame || !frame->this)                  goto out; +        this  = frame->this;          local = frame->local;          conf  = frame->this->private; @@ -454,7 +874,7 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,                  fdctx->remote_fd = rsp.fd;                  if (!fdctx->released) {                          list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); -                        if (!list_empty (&fdctx->lock_list)) +                        if (!client_fd_lk_list_empty (fdctx->lk_ctx))                                  attempt_lock_recovery = _gf_true;                          fdctx = NULL;                  } @@ -463,31 +883,27 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,          ret = 0; -        attempt_lock_recovery = _gf_false; /* temporarily */ - -        if (attempt_lock_recovery) { -                ret = client_attempt_lock_recovery (frame->this, local->fdctx); -                if (ret < 0) { -                        gf_log (frame->this->name, GF_LOG_DEBUG, -                                "lock recovery not attempted on fd"); -                } else { -                        gf_log (frame->this->name, GF_LOG_INFO, -                                "need to attempt lock recovery on %"PRIu64 -                                " open fds", fd_count); -                } +        if (conf->lk_heal && attempt_lock_recovery) { +                /* Delay decrement the reopen fd count untill all the +                   locks corresponding to this fd are acquired.*/ +                gf_log (frame->this->name, GF_LOG_WARNING, "acquiring locks on " +                        "%s", local->loc.path); +                ret = client_reacquire_lock (frame->this, local->fdctx);          } else {                  fd_count = decrement_reopen_fd_count (frame->this, conf);          }  out:          if (fdctx) -                client_fdctx_destroy (frame->this, fdctx); +                client_fdctx_destroy (this, fdctx);          if ((ret < 0) && frame && frame->this && conf)                  decrement_reopen_fd_count (frame->this, conf); -        frame->local = NULL; -        STACK_DESTROY (frame->root); +        if (frame) { +                frame->local = NULL; +                STACK_DESTROY (frame->root); +        }          client_local_wipe (local); @@ -792,7 +1208,8 @@ client_post_handshake (call_frame_t *frame, xlator_t *this)                  }          } else {                  gf_log (this->name, GF_LOG_DEBUG, -                        "no open fds - notifying all parents child up"); +                        "no fds to open - notifying all parents child up"); +                client_set_lk_version (this);                  client_notify_parents_child_up (this);          }  out: @@ -814,6 +1231,7 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m          int32_t               op_ret        = 0;          int32_t               op_errno      = 0;          gf_boolean_t          auth_fail     = _gf_false; +        uint32_t              lk_ver        = 0;          frame = myframe;          this  = frame->this; @@ -895,6 +1313,15 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m                  goto out;          } +        ret = dict_get_uint32 (reply, "clnt-lk-version", &lk_ver); +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING, +                        "failed to find key 'clnt-lk-version' in the options"); +                goto out; +        } + +        gf_log (this->name, GF_LOG_INFO, "clnt-lk-version = %d, " +                "server-lk-version = %d", client_get_lk_ver (conf), lk_ver);          /* TODO: currently setpeer path is broken */          /*          if (process_uuid && req->conn && @@ -930,8 +1357,15 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m          conf->need_different_port = 0; -        /* TODO: more to test */ -        client_post_handshake (frame, frame->this); +        if (lk_ver != client_get_lk_ver (conf)) { +                client_mark_fd_bad (this); +                client_post_handshake (frame, frame->this); +        } else { +                /*TODO: Traverse the saved fd list, and send +                  release to the server on fd's that were closed +                  during grace period */ +                ; +        }  out:          if (auth_fail) { @@ -1043,6 +1477,14 @@ client_setvolume (xlator_t *this, struct rpc_clnt *rpc)                                  "failed to set 'volfile-checksum'");          } +        ret = dict_set_int16 (options, "clnt-lk-version", +                              client_get_lk_ver (conf)); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_WARNING, +                        "failed to set clnt-lk-version(%"PRIu32") in handshake msg", +                        client_get_lk_ver (conf)); +        } +          req.dict.dict_len = dict_serialized_length (options);          if (req.dict.dict_len < 0) {                  gf_log (this->name, GF_LOG_ERROR, @@ -1366,6 +1808,7 @@ char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {          [GF_HNDSK_SETVOLUME]    = "SETVOLUME",          [GF_HNDSK_GETSPEC]      = "GETSPEC",          [GF_HNDSK_PING]         = "PING", +        [GF_HNDSK_SET_LK_VER]   = "SET_LK_VER"  };  rpc_clnt_prog_t clnt_handshake_prog = { diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c index 842e3ec5b62..e99fe774de6 100644 --- a/xlators/protocol/client/src/client-lk.c +++ b/xlators/protocol/client/src/client-lk.c @@ -608,6 +608,7 @@ decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf)          if (fd_count == 0) {                  gf_log (this->name, GF_LOG_INFO,                          "last fd open'd/lock-self-heal'd - notifying CHILD-UP"); +                client_set_lk_version (this);                  client_notify_parents_child_up (this);          } diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h index c2aa690b1c5..6bc7daad271 100644 --- a/xlators/protocol/client/src/client-mem-types.h +++ b/xlators/protocol/client/src/client-mem-types.h @@ -29,6 +29,7 @@ enum gf_client_mem_types_ {          gf_client_mt_clnt_req_buf_t,          gf_client_mt_clnt_fdctx_t,          gf_client_mt_clnt_lock_t, +        gf_client_mt_clnt_fd_lk_local_t,          gf_client_mt_end,  };  #endif /* __CLIENT_MEM_TYPES_H__ */ diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 229e0191725..8955e237dee 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -40,6 +40,81 @@ int client_handshake (xlator_t *this, struct rpc_clnt *rpc);  void client_start_ping (void *data);  int client_init_rpc (xlator_t *this);  int client_destroy_rpc (xlator_t *this); +int client_mark_fd_bad (xlator_t *this); + +int32_t +client_type_to_gf_type (short l_type) +{ +        int32_t  gf_type; + +        switch (l_type) { +        case F_RDLCK: +                gf_type = GF_LK_F_RDLCK; +                break; +        case F_WRLCK: +                gf_type = GF_LK_F_WRLCK; +                break; +        case F_UNLCK: +                gf_type = GF_LK_F_UNLCK; +                break; +        } + +        return gf_type; +} + +uint32_t +client_get_lk_ver (clnt_conf_t *conf) +{ +        uint32_t  lk_ver = 0; + +        GF_VALIDATE_OR_GOTO ("client", conf, out); + +        pthread_mutex_lock (&conf->lock); +        { +                lk_ver = conf->lk_version; +        } +        pthread_mutex_unlock (&conf->lock); +out: +        return lk_ver; +} + +void +client_grace_timeout (void *data) +{ +        int               ver  = 0; +        xlator_t         *this = NULL; +        struct clnt_conf *conf = NULL; +        struct rpc_clnt  *rpc  = NULL; + +        GF_VALIDATE_OR_GOTO ("client", data, out); + +        this = THIS; + +        rpc = (struct rpc_clnt *) data; + +        conf = (struct clnt_conf *) this->private; + +        pthread_mutex_lock (&conf->lock); +        { +                ver = ++conf->lk_version; +                /* ver == 0 is a special value used by server +                   to notify client that this is a fresh connect.*/ +                if (ver == 0) +                        ver = ++conf->lk_version; + +                gf_timer_call_cancel (this->ctx, conf->grace_timer); +                conf->grace_timer = NULL; +        } +        pthread_mutex_unlock (&conf->lock); + +        gf_log (this->name, GF_LOG_WARNING, +                "client grace timer expired, updating " +                "the lk-version to %d", ver); + +        client_mark_fd_bad (this); +out: +        return; +}  int  client_submit_request (xlator_t *this, void *req, call_frame_t *frame, @@ -828,7 +903,6 @@ out:  } -  int32_t  client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)  { @@ -1455,7 +1529,6 @@ out:  	return 0;  } -  int32_t  client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,             struct gf_flock *lock) @@ -1841,7 +1914,7 @@ out:  } - int +int  client_mark_fd_bad (xlator_t *this)  {          clnt_conf_t            *conf = NULL; @@ -1908,11 +1981,42 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,                                  conf->last_sent_event = GF_EVENT_CHILD_UP;                          }                  } + +                /* Cancel grace timer if set */ +                pthread_mutex_lock (&conf->lock); +                { +                        if (conf->grace_timer) { +                                gf_log (this->name, GF_LOG_WARNING, +                                        "Cancelling the grace timer"); + +                                gf_timer_call_cancel (this->ctx, +                                                      conf->grace_timer); +                                conf->grace_timer = NULL; +                        } +                } +                pthread_mutex_unlock (&conf->lock); +                  break;          }          case RPC_CLNT_DISCONNECT: +                /* client_mark_fd_bad (this); */ -                client_mark_fd_bad (this); +                pthread_mutex_lock (&conf->lock); +                { +                        if (conf->grace_timer) { +                                gf_log (this->name, GF_LOG_DEBUG, +                                        "Client grace timer is already set"); +                        } else { +                                gf_log (this->name, GF_LOG_WARNING, +                                        "Registering a grace timer"); +                                conf->grace_timer = +                                        gf_timer_call_after (this->ctx, +                                                             conf->grace_tv, +                                                             client_grace_timeout, +                                                             conf->rpc); +                        } +                } +                pthread_mutex_unlock (&conf->lock);                  if (!conf->skip_notify) {                          if (conf->connected) @@ -2107,6 +2211,40 @@ out:  int +client_init_grace_timer (xlator_t *this, dict_t *options, +                         clnt_conf_t *conf) +{ +        char     *lk_heal        = NULL; +        int32_t   ret            = -1; +        int32_t   grace_timeout  = -1; + +        GF_VALIDATE_OR_GOTO ("client", this, out); +        GF_VALIDATE_OR_GOTO (this->name, options, out); +        GF_VALIDATE_OR_GOTO (this->name, conf, out); + +        conf->lk_heal = _gf_true; + +        ret = dict_get_str (options, "lk-heal", &lk_heal); +        if (!ret) +                gf_string2boolean (lk_heal, &conf->lk_heal); + +        ret = dict_get_int32 (options, "grace-timeout", &grace_timeout); +        if (!ret) +                conf->grace_tv.tv_sec = grace_timeout; +        else +                conf->grace_tv.tv_sec = 10; + +        conf->grace_tv.tv_usec  = 0; + +        gf_log (this->name, GF_LOG_INFO, "lk-heal = %s", +                (conf->lk_heal) ? "on" : "off"); + +        ret = 0; +out: +        return ret; +} + +int  reconfigure (xlator_t *this, dict_t *options)  {  	clnt_conf_t *conf              = NULL; @@ -2153,6 +2291,10 @@ reconfigure (xlator_t *this, dict_t *options)                  }          } +        ret = client_init_grace_timer (this, options, conf); +        if (ret) +                goto out; +          ret = 0;  out:  	return ret; @@ -2186,6 +2328,14 @@ init (xlator_t *this)          pthread_mutex_init (&conf->lock, NULL);          INIT_LIST_HEAD (&conf->saved_fds); +        /* Initialize parameters for lock self healing*/ +        conf->lk_version  = 1; +        conf->grace_timer = NULL; + +        ret = client_init_grace_timer (this, this->options, conf); +        if (ret) +                goto out; +          LOCK_INIT (&conf->rec_lock);          conf->last_sent_event = -1; /* To start with we don't have any events */ @@ -2207,7 +2357,6 @@ init (xlator_t *this)                  goto out;          } -          ret = client_init_rpc (this);  out:          if (ret) @@ -2409,5 +2558,11 @@ struct volume_options options[] = {          { .key   = {"client-bind-insecure"},            .type  = GF_OPTION_TYPE_BOOL          }, +        { .key   = {"lk-heal"}, +          .type  = GF_OPTION_TYPE_STR +        }, +        { .key   = {"grace-timeout"}, +          .type  = GF_OPTION_TYPE_INT +        },          { .key   = {NULL} },  }; diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 2dda451c9cb..00addf34ceb 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -29,6 +29,7 @@  #include "client-mem-types.h"  #include "protocol-common.h"  #include "glusterfs3.h" +#include "fd-lk.h"  /* FIXME: Needs to be defined in a common file */  #define CLIENT_CMD_CONNECT    "trusted.glusterfs.client-connect" @@ -91,6 +92,12 @@ typedef struct clnt_conf {          char                   need_different_port; /* flag used to change the                                                         portmap path in case of                                                         'tcp,rdma' on server */ +        gf_boolean_t           lk_heal; +        uint16_t               lk_version; /* this variable is used to distinguish +                                              client-server transaction while +                                              performing lock healing */ +        struct timeval         grace_tv; +        gf_timer_t            *grace_timer;  } clnt_conf_t;  typedef struct _client_fd_ctx { @@ -105,7 +112,7 @@ typedef struct _client_fd_ctx {          char              released;          int32_t           flags;          int32_t           wbflags; - +        fd_lk_ctx_t      *lk_ctx;          pthread_mutex_t   mutex;          struct list_head  lock_list;     /* List of all granted locks on this fd */  } clnt_fd_ctx_t; @@ -211,4 +218,11 @@ int32_t client_dump_locks (char *name, inode_t *inode,                             dict_t *dict);  int client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx); +uint32_t client_get_lk_ver (clnt_conf_t *conf); + +int32_t client_type_to_gf_type (short l_type); + +int client_mark_fd_bad (xlator_t *this); + +int client_set_lk_version (xlator_t *this);  #endif /* !_CLIENT_H */ diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 76d4fb0d691..4d6d57528f3 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -351,6 +351,7 @@ client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count,                  fdctx->inode     = inode_ref (fd->inode);                  fdctx->flags     = local->flags;                  fdctx->wbflags   = local->wbflags; +                fdctx->lk_ctx = fd_lk_ctx_ref (fd->lk_ctx);                  INIT_LIST_HEAD (&fdctx->sfd_pos);                  INIT_LIST_HEAD (&fdctx->lock_list); @@ -2279,17 +2280,30 @@ client3_1_releasedir_cbk (struct rpc_req *req, struct iovec *iov, int count,  int  client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx)  { +        clnt_conf_t  *conf = NULL;          call_frame_t *fr = NULL;          int32_t       ret = -1; +        fd_lk_ctx_t  *lk_ctx = NULL;          if (!fdctx)                  goto out; +        conf = (clnt_conf_t *) this->private; +          if (fdctx->remote_fd == -1) {                  gf_log (this->name, GF_LOG_DEBUG, "not a valid fd");                  goto out;          } +        pthread_mutex_lock (&conf->lock); +        { +                lk_ctx = fdctx->lk_ctx; +                fdctx->lk_ctx = NULL; +        } +        pthread_mutex_unlock (&conf->lock); + +        fd_lk_ctx_unref (lk_ctx); +          fr = create_frame (this, this->ctx->pool);          if (fdctx->is_dir) { @@ -4466,7 +4480,6 @@ unwind:          return 0;  } -  int32_t  client3_1_lk (call_frame_t *frame, xlator_t *this,                void *data) @@ -4523,6 +4536,7 @@ client3_1_lk (call_frame_t *frame, xlator_t *this,          req.cmd   = gf_cmd;          req.type  = gf_type;          gf_proto_flock_from_flock (&req.flock, args->flock); +          memcpy (req.gfid, args->fd->inode->gfid, 16);          ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LK, diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index 2c8cf059be3..374f5a49ae7 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -354,6 +354,7 @@ server_setvolume (rpcsvc_request_t *req)          int32_t              op_errno      = EINVAL;          int32_t              fop_version   = 0;          int32_t              mgmt_version  = 0; +        uint32_t             lk_version    = 0;          char                *buf           = NULL;          params = dict_new (); @@ -408,8 +409,33 @@ server_setvolume (rpcsvc_request_t *req)                  goto fail;          } +        /*lk_verion :: [1..2^31-1]*/ +        ret = dict_get_uint32 (params, "clnt-lk-version", &lk_version); +        if (ret < 0) { +                ret = dict_set_str (reply, "ERROR", +                                    "lock state verison not supplied"); +                if (ret < 0) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "failed to set error msg"); + +                op_ret = -1; +                op_errno = EINVAL; +                goto fail; +        }          conn = server_connection_get (this, process_uuid); +        if (!conn) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto fail; +        } + +        server_cancel_conn_timer (this, conn); +        if (conn->lk_version != 0 && +            conn->lk_version != lk_version) { +                (void) server_connection_cleanup (this, conn); +        } +          if (req->trans->xl_private != conn)                  req->trans->xl_private = conn; @@ -595,6 +621,12 @@ server_setvolume (rpcsvc_request_t *req)                  gf_log (this->name, GF_LOG_DEBUG,                          "failed to set 'process-uuid'"); +        ret = dict_set_uint32 (reply, "clnt-lk-version", +                               conn->lk_version); +        if (ret) +                gf_log (this->name, GF_LOG_WARNING, +                        "failed to set 'clnt-lk-version'"); +          ret = dict_set_uint64 (reply, "transport-ptr",                                 ((uint64_t) (long) req->trans));          if (ret) @@ -663,12 +695,50 @@ server_ping (rpcsvc_request_t *req)          return 0;  } +int +server_set_lk_version (rpcsvc_request_t *req) +{ +        int                     op_ret          = -1; +        int                     op_errno        = EINVAL; +        gf_set_lk_ver_req       args            = {0, }; +        gf_set_lk_ver_rsp       rsp             = {0,}; +        server_connection_t     *conn           = NULL; +        xlator_t                *this           = NULL; + +        this = req->svc->mydata; +        //TODO: Decide on an appropriate errno for the error-path +        //below +        if (!this) +                goto fail; + +        if (!xdr_to_generic (req->msg[0], &args, +                             (xdrproc_t)xdr_gf_set_lk_ver_req)) { +                //failed to decode msg; +                req->rpc_err = GARBAGE_ARGS; +                goto fail; +        } + +        conn = server_connection_get (this, args.uid); +        conn->lk_version = args.lk_ver; +        server_connection_put (this, conn); + +        rsp.lk_ver   = args.lk_ver; + +        op_ret = 0; +fail: +        rsp.op_ret   = op_ret; +        rsp.op_errno = op_errno; +        server_submit_reply (NULL, req, &rsp, NULL, 0, NULL, +                             (xdrproc_t)xdr_gf_set_lk_ver_rsp); +        return 0; +}  rpcsvc_actor_t gluster_handshake_actors[] = { -        [GF_HNDSK_NULL]      = {"NULL",      GF_HNDSK_NULL,      server_null, NULL, NULL, 0}, -        [GF_HNDSK_SETVOLUME] = {"SETVOLUME", GF_HNDSK_SETVOLUME, server_setvolume, NULL, NULL, 0}, -        [GF_HNDSK_GETSPEC]   = {"GETSPEC",   GF_HNDSK_GETSPEC,   server_getspec, NULL, NULL, 0}, -        [GF_HNDSK_PING]      = {"PING",      GF_HNDSK_PING,      server_ping, NULL, NULL, 0}, +        [GF_HNDSK_NULL]       = {"NULL",      GF_HNDSK_NULL,      server_null, NULL, NULL, 0}, +        [GF_HNDSK_SETVOLUME]  = {"SETVOLUME", GF_HNDSK_SETVOLUME, server_setvolume, NULL, NULL, 0}, +        [GF_HNDSK_GETSPEC]    = {"GETSPEC",   GF_HNDSK_GETSPEC,   server_getspec, NULL, NULL, 0}, +        [GF_HNDSK_PING]       = {"PING",      GF_HNDSK_PING,      server_ping, NULL, NULL, 0}, +        [GF_HNDSK_SET_LK_VER] = {"SET_LK_VER", GF_HNDSK_SET_LK_VER, server_set_lk_version, NULL, NULL },  }; diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index 4980424d350..9de1082dc94 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -774,6 +774,7 @@ server_connection_t *  server_connection_get (xlator_t *this, const char *id)  {          server_connection_t *conn = NULL; +        server_connection_t *trav = NULL;          server_conf_t       *conf = NULL;          GF_VALIDATE_OR_GOTO ("server", this, out); @@ -783,20 +784,29 @@ server_connection_get (xlator_t *this, const char *id)          pthread_mutex_lock (&conf->mutex);          { +                list_for_each_entry (trav, &conf->conns, list) { +                        if (!strncmp (trav->id, id, strlen (id))) { +                                conn = trav; +                                conn->ref++; +                                goto unlock; +                        } +                } +                  conn = (void *) GF_CALLOC (1, sizeof (*conn),                                             gf_server_mt_conn_t);                  if (!conn)                          goto unlock;                  conn->id = gf_strdup (id); +                /*'0' denotes uninitialised lock state*/ +                conn->lk_version = 0; +                conn->ref++;                  conn->fdtable = gf_fd_fdtable_alloc ();                  conn->ltable  = gf_lock_table_new ();                  conn->this    = this;                  pthread_mutex_init (&conn->lock, NULL); -                  list_add (&conn->list, &conf->conns); -                conn->ref++;          }  unlock:          pthread_mutex_unlock (&conf->mutex); @@ -982,6 +992,17 @@ out:          return ret;  } +void +put_server_conn_state (xlator_t *this, rpc_transport_t *xprt) +{ +        GF_VALIDATE_OR_GOTO ("server", this, out); +        GF_VALIDATE_OR_GOTO ("server", xprt, out); + +        xprt->xl_private = NULL; +out: +        return; +} +  server_connection_t *  get_server_conn_state (xlator_t *this, rpc_transport_t *xprt)  { @@ -1497,3 +1518,26 @@ gf_server_check_setxattr_cmd (call_frame_t *frame, dict_t *dict)          return 0;  } + +void +server_cancel_conn_timer (xlator_t *this, server_connection_t *conn) +{ +        if (!this || !conn) { +                gf_log (THIS->name, GF_LOG_ERROR, "Invalid arguments to " +                        "cancel connection timer"); +                return; +        } + +        pthread_mutex_lock (&conn->lock); +        { +                if (!conn->timer) +                        goto unlock; + +                gf_timer_call_cancel (this->ctx, conn->timer); +                conn->timer = NULL; +        } +unlock: +        pthread_mutex_unlock (&conn->lock); + +        return; +} diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h index 844c98c27bf..99ba7e546b4 100644 --- a/xlators/protocol/server/src/server-helpers.h +++ b/xlators/protocol/server/src/server-helpers.h @@ -68,6 +68,12 @@ server_print_request (call_frame_t *frame);  call_frame_t *  get_frame_from_request (rpcsvc_request_t *req); +void +server_cancel_conn_timer (xlator_t *this, server_connection_t *conn); + +void +put_server_conn_state (xlator_t *this, rpc_transport_t *xprt); +  server_connection_t *  get_server_conn_state (xlator_t *this, rpc_transport_t *xptr); diff --git a/xlators/protocol/server/src/server-mem-types.h b/xlators/protocol/server/src/server-mem-types.h index 88bae8cb45d..5438ed6db1a 100644 --- a/xlators/protocol/server/src/server-mem-types.h +++ b/xlators/protocol/server/src/server-mem-types.h @@ -33,6 +33,7 @@ enum gf_server_mem_types_ {          gf_server_mt_dirent_rsp_t,          gf_server_mt_rsp_buf_t,          gf_server_mt_volfile_ctx_t, +        gf_server_mt_timer_data_t,          gf_server_mt_end,  };  #endif /* __SERVER_MEM_TYPES_H__ */ diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index b0697bb7b9d..b45b77baae0 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -36,6 +36,26 @@  #include "authenticate.h"  #include "rpcsvc.h" +void +grace_time_handler (void *data) +{ +        server_connection_t     *conn = NULL; +        xlator_t                *this = NULL; + +        conn = data; +        this = conn->this; + +        GF_VALIDATE_OR_GOTO (THIS->name, conn, out); +        GF_VALIDATE_OR_GOTO (THIS->name, this, out); + +        gf_log (this->name, GF_LOG_INFO, "grace timer expired"); + +        server_cancel_conn_timer (this, conn); +        server_connection_put (this, conn); +out: +        return; +} +  struct iobuf *  gfs_serialize_reply (rpcsvc_request_t *req, void *arg, struct iovec *outmsg,                       xdrproc_t xdrproc) @@ -554,11 +574,10 @@ int  server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,                     void *data)  { -        xlator_t            *this = NULL; -        rpc_transport_t     *xprt = NULL; -        server_connection_t *conn = NULL; -        server_conf_t       *conf = NULL; - +        xlator_t            *this       = NULL; +        rpc_transport_t     *xprt       = NULL; +        server_connection_t *conn       = NULL; +        server_conf_t       *conf       = NULL;          if (!xl || !data) {                  gf_log_callingfn ("server", GF_LOG_WARNING, @@ -589,20 +608,37 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,          }          case RPCSVC_EVENT_DISCONNECT:                  conn = get_server_conn_state (this, xprt); -                if (conn) -                        server_connection_cleanup (this, conn); - -                gf_log (this->name, GF_LOG_INFO, -                        "disconnected connection from %s", -                        xprt->peerinfo.identifier); +                if (!conn) +                        break; +                put_server_conn_state (this, xprt); +                gf_log (this->name, GF_LOG_INFO, "disconnecting connection" +                        "from %s", xprt->peerinfo.identifier);                  list_del (&xprt->list); +                pthread_mutex_lock (&conn->lock); +                { +                        if (conn->timer) +                                goto unlock; + +                        gf_log (this->name, GF_LOG_INFO, "starting a grace " +                                "timer for %s", xprt->name); + +                        conn->timer = gf_timer_call_after (this->ctx, +                                                           conf->grace_tv, +                                                           grace_time_handler, +                                                           conn); +                } +        unlock: +                pthread_mutex_unlock (&conn->lock); +                  break;          case RPCSVC_EVENT_TRANSPORT_DESTROY: -                conn = get_server_conn_state (this, xprt); -                if (conn) -                        server_connection_put (this, conn); +                /*- conn obj has been disassociated from xprt on first +                 *  disconnect. +                 *  conn cleanup and destruction is handed over to +                 *  grace_time_handler or the subsequent handler that 'owns' +                 *  the conn. Nothing left to be done here. */                  break;          default:                  break; @@ -668,6 +704,30 @@ _copy_auth_opt (dict_t *unused,  int +server_init_grace_timer (xlator_t *this, dict_t *options, +                         server_conf_t *conf) +{ +        int32_t   ret            = -1; +        int32_t   grace_timeout  = -1; + +        GF_VALIDATE_OR_GOTO ("server", this, out); +        GF_VALIDATE_OR_GOTO (this->name, options, out); +        GF_VALIDATE_OR_GOTO (this->name, conf, out); + +        ret = dict_get_int32 (options, "grace-timeout", &grace_timeout); +        if (!ret) +                conf->grace_tv.tv_sec = grace_timeout; +        else +                conf->grace_tv.tv_sec = 10; + +        conf->grace_tv.tv_usec  = 0; + +        ret = 0; +out: +        return ret; +} + +int  reconfigure (xlator_t *this, dict_t *options)  { @@ -761,6 +821,7 @@ reconfigure (xlator_t *this, dict_t *options)                                          "Reconfigure not found for transport" );                  }          } +        ret = server_init_grace_timer (this, options, conf);  out:          gf_log ("", GF_LOG_DEBUG, "returning %d", ret); @@ -797,6 +858,10 @@ init (xlator_t *this)          INIT_LIST_HEAD (&conf->xprt_list);          pthread_mutex_init (&conf->mutex, NULL); +        ret = server_init_grace_timer (this, this->options, conf); +        if (ret) +                goto out; +          ret = server_build_config (this, conf);          if (ret)                  goto out; @@ -1032,5 +1097,8 @@ struct volume_options options[] = {            .type          = GF_OPTION_TYPE_PATH,            .default_value = "/tmp"          }, +        {.key  = {"grace-timeout"}, +         .type = GF_OPTION_TYPE_INT, +        },          { .key   = {NULL} },  }; diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 92785c5a9d6..091a02ccba2 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -28,6 +28,7 @@  #include "protocol-common.h"  #include "server-mem-types.h"  #include "glusterfs3.h" +#include "timer.h"  #define DEFAULT_BLOCK_SIZE         4194304   /* 4MB */  #define DEFAULT_VOLUME_FILE_PATH   CONFDIR "/glusterfs.vol" @@ -60,8 +61,10 @@ struct _server_connection {          pthread_mutex_t     lock;          fdtable_t          *fdtable;          struct _lock_table *ltable; +        gf_timer_t         *timer;          xlator_t           *bound_xl;          xlator_t           *this; +        uint32_t           lk_version;  };  typedef struct _server_connection server_connection_t; @@ -92,7 +95,7 @@ struct server_conf {          gf_boolean_t            trace;          char                   *conf_dir;          struct _volfile_ctx    *volfile; - +        struct timeval          grace_tv;          dict_t                 *auth_modules;          pthread_mutex_t         mutex;          struct list_head        conns;  | 
