diff options
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 150 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 3 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-common.h | 5 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.c | 700 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.h | 3 | 
5 files changed, 435 insertions, 426 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index b9adbd6647c..2652f02b4ea 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -485,94 +485,72 @@ br_log_object_path (xlator_t *this, char *op,  }  static void -br_send_dummy_write (xlator_t *this, fd_t *fd, br_child_t *child, -                     dict_t *xdata) +br_trigger_sign (xlator_t *this, br_child_t *child, +                 inode_t *linked_inode, loc_t *loc, gf_boolean_t need_reopen)  { -        struct iovec   iov    = {0, }; -        struct iobref *iobref = NULL; -        struct iobuf  *iobuf  = NULL; -        char          *msg    = NULL; -        size_t         size   = 0; -        int32_t        ret    = -1; - -        GF_VALIDATE_OR_GOTO ("bit-rot", this, out); -        GF_VALIDATE_OR_GOTO (this->name, fd, out); -        GF_VALIDATE_OR_GOTO (this->name, child, out); - -        msg = gf_strdup ("GLUSTERFS"); -        if (!msg) -                goto out; +        fd_t     *fd   = NULL; +        int32_t   ret  = -1; +        uint32_t  val  = 0; +        dict_t   *dict = NULL; +        pid_t     pid  = GF_CLIENT_PID_BITD; -        size = strlen (msg); +        syncopctx_setfspid (&pid); -        iov.iov_base = msg; -        iov.iov_len = size; +        val = (need_reopen == _gf_true) ? BR_OBJECT_REOPEN : BR_OBJECT_RESIGN; -        iobref = iobref_new (); -        if (!iobref) -                goto free_msg; +        dict = dict_new (); +        if (!dict) +                goto out; -        iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); -        if (!iobuf) -                goto free_iobref; +        ret = dict_set_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, val); +        if (ret) +                goto cleanup_dict; -        iobref_add (iobref, iobuf); +        ret = -1; +        fd = fd_create (linked_inode, 0); +        if (!fd) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to create fd " +                        "[GFID %s]", uuid_utoa (linked_inode->gfid)); +                goto cleanup_dict; +        } -        iov_unload (iobuf_ptr (iobuf), &iov, 1);  /* FIXME!!! */ +        ret = syncop_open (child->xl, loc, O_RDWR, fd, NULL, NULL); +	if (ret) { +                br_log_object (this, "open", linked_inode->gfid, -ret); +                goto unref_fd; +	} -        iov.iov_base = iobuf_ptr (iobuf); -        iov.iov_len = size; +        fd_bind (fd); -        ret = syncop_writev (child->xl, fd, &iov, 1, 0, iobref, 0, xdata, NULL); -        if (ret <= 0) { -                ret = -1; -                gf_log (this->name, GF_LOG_ERROR, -                        "dummy write failed (%s)", strerror (errno)); -                goto free_iobuf; -        } +        ret = syncop_fsetxattr (child->xl, fd, dict, 0, NULL, NULL); +        if (ret) +                br_log_object (this, "fsetxattr", linked_inode->gfid, -ret); -        /* iobref_unbref() takes care of iobuf unref */ -        ret = 0; +        /* passthough: fd_unref() */ - free_iobuf: -        iobuf_unref (iobuf); - free_iobref: -        iobref_unref (iobref); - free_msg: -        GF_FREE (msg); + unref_fd: +        fd_unref (fd); + cleanup_dict: +        dict_unref (dict);   out: -        return; +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Could not trigger signingd for %s (reopen hint: %d)", +                        uuid_utoa (linked_inode->gfid), val); +        }  }  static void -br_object_handle_reopen (xlator_t *this, -                         br_object_t *object, inode_t *linked_inode) +br_object_resign (xlator_t *this, +                  br_object_t *object, inode_t *linked_inode)  { -        int32_t  ret  = -1; -        dict_t  *dict = NULL; -        loc_t    loc  = {0, }; - -        /** -         * Here dict is purposefully not checked for NULL, because at any cost -         * sending a re-open should not be missed. This re-open is an indication -         * for the stub to properly mark inode's status. -         */ -        dict = dict_new (); -        if (dict) { -                /* TODO: Make it a #define */ -                ret = dict_set_int32 (dict, "br-fd-reopen", 1); -                if (ret) -                        gf_log (this->name, GF_LOG_WARNING, -                                "Object reopen would trigger versioning."); -        } +        loc_t loc = {0, };          loc.inode = inode_ref (linked_inode);          gf_uuid_copy (loc.gfid, linked_inode->gfid); -        br_trigger_sign (this, object->child, linked_inode, &loc, dict); +        br_trigger_sign (this, object->child, linked_inode, &loc, _gf_false); -        if (dict) -                dict_unref (dict);          loc_wipe (&loc);  } @@ -618,7 +596,7 @@ static inline int32_t br_sign_object (br_object_t *object)           * actual signing of the object.           */          if (sign_info == BR_SIGN_REOPEN_WAIT) { -                br_object_handle_reopen (this, object, linked_inode); +                br_object_resign (this, object, linked_inode);                  goto unref_inode;          } @@ -903,41 +881,7 @@ out:          return need_sign;  } -void -br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode, -                 loc_t *loc, dict_t *xdata) -{ -        fd_t      *fd = NULL; -        int32_t    ret = -1; -        pid_t      pid = GF_CLIENT_PID_BITD; - -        syncopctx_setfspid (&pid); - -        fd = fd_create (linked_inode, 0); -        if (!fd) { -                gf_log (this->name, GF_LOG_ERROR, -                        "Failed to create fd [GFID %s]", -                        uuid_utoa (linked_inode->gfid)); -                goto out; -        } - -        ret = syncop_open (child->xl, loc, O_RDWR, fd, NULL, NULL); -	if (ret) { -                br_log_object (this, "open", linked_inode->gfid, -ret); -		fd_unref (fd); -		fd = NULL; -	} else { -		fd_bind (fd); -	} - -        if (fd) { -                br_send_dummy_write (this, fd, child, xdata); -                syncop_close (fd); -        } -out: -        return; -}  int32_t  br_prepare_loc (xlator_t *this, br_child_t *child, loc_t *parent, @@ -1076,7 +1020,7 @@ bitd_oneshot_crawl (xlator_t *subvol,          gf_log (this->name, GF_LOG_INFO,                  "Triggering signing for %s [GFID: %s | Brick: %s]",                  loc.path, uuid_utoa (linked_inode->gfid), child->brick_path); -        br_trigger_sign (this, child, linked_inode, &loc, NULL); +        br_trigger_sign (this, child, linked_inode, &loc, _gf_true);          ret = 0; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 1705f715f0c..6543be763d6 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -182,8 +182,5 @@ br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);  gf_boolean_t  bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *); -void -br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode, -                 loc_t *loc, dict_t *xdata);  #endif /* __BIT_ROT_H__ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h index 7fd584e5970..a8285d2b560 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-common.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h @@ -119,6 +119,11 @@ typedef enum {  /* BitRot stub start time (virtual xattr) */  #define GLUSTERFS_GET_BR_STUB_INIT_TIME  "trusted.glusterfs.bit-rot.stub-init" +/* signing/reopen hint */ +#define BR_OBJECT_RESIGN 0 +#define BR_OBJECT_REOPEN  1 +#define BR_REOPEN_SIGN_HINT_KEY  "trusted.glusterfs.bit-rot.reopen-hint" +  static inline int  br_is_signature_type_valid (int8_t signaturetype)  { diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 93db072f671..524c235b549 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -255,7 +255,7 @@ br_stub_fill_local (br_stub_local_t *local,          local->fopstub = stub;          local->versioningtype = versioningtype;          local->u.context.version = memversion; -        if (fd && !local->u.context.fd) +        if (fd)                  local->u.context.fd = fd_ref (fd);          if (inode)                  local->u.context.inode = inode_ref (inode); @@ -279,6 +279,126 @@ br_stub_cleanup_local (br_stub_local_t *local)          memset (local->u.context.gfid, '\0', sizeof (uuid_t));  } +static int +br_stub_need_versioning (xlator_t *this, +                         fd_t *fd, gf_boolean_t *versioning, +                         gf_boolean_t *modified, br_stub_inode_ctx_t **ctx) +{ +        int32_t ret      = -1; +        uint64_t ctx_addr = 0; +        br_stub_inode_ctx_t *c = NULL; + +        *versioning = _gf_false; +        *modified = _gf_false; + +        ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " +                        "context for the inode %s", +                        uuid_utoa (fd->inode->gfid)); +                goto error_return; +        } + +        c = (br_stub_inode_ctx_t *) (long) ctx_addr; + +        LOCK (&fd->inode->lock); +        { +                if (__br_stub_is_inode_dirty (c)) +                        *versioning = _gf_true; +                if (__br_stub_is_inode_modified (c)) +                        *modified = _gf_true; +        } +        UNLOCK (&fd->inode->lock); + +        if (ctx) +                *ctx = c; +        return 0; + + error_return: +        return -1; +} + +static int32_t +br_stub_anon_fd_ctx (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) +{ +        int32_t  ret = -1; +        br_stub_fd_t *br_stub_fd = NULL; + +        br_stub_fd = br_stub_fd_ctx_get (this, fd); +        if (!br_stub_fd) { +                ret = br_stub_add_fd_to_inode (this, fd, ctx); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "failed to " +                                "add fd to the inode (gfid: %s)", +                                uuid_utoa (fd->inode->gfid)); +                        goto out; +                } +        } + +        ret = 0; + +out: +        return ret; +} + +static int +br_stub_versioning_prep (call_frame_t *frame, +                         xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) +{ +        int32_t          ret = -1; +        br_stub_local_t *local   = NULL; + +        local = br_stub_alloc_local (this); +        if (!local) { +                gf_log (this->name, GF_LOG_ERROR, "local allocation failed " +                        "(gfid: %s)", uuid_utoa (fd->inode->gfid)); +                goto error_return; +        } + +        if (fd_is_anonymous (fd)) { +                ret = br_stub_anon_fd_ctx (this, fd, ctx); +                if (ret) +                        goto free_local; +        } + +        frame->local = local; + +        return 0; + + free_local: +        br_stub_dealloc_local (local); + error_return: +        return -1; +} + +static int +br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local) +{ +        fd_t                *fd       = NULL; +        int32_t              ret      = 0; +        uint64_t             ctx_addr = 0; +        br_stub_inode_ctx_t *ctx      = NULL; + +        fd = local->u.context.fd; + +        ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); +        if (ret < 0) +                goto error_return; + +        ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; + +        LOCK (&fd->inode->lock); +        { +                __br_stub_set_inode_modified (ctx); +        } +        UNLOCK (&fd->inode->lock); + +        return 0; + + error_return: +        return -1; +} +  /**   * callback for inode/fd versioning   */ @@ -379,7 +499,6 @@ br_stub_fd_versioning (xlator_t *this, call_frame_t *frame,                              fd->inode, fd->inode->gfid,                              versioningtype, memversion); -        frame->local = local;          STACK_WIND (frame, callback,                      FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr,                      fd, dict, flags, xdata); @@ -409,7 +528,6 @@ br_stub_perform_incversioning (xlator_t *this,          writeback_version = __br_stub_writeback_version (ctx); -        /* inode requires writeback to disk */          op_errno = ENOMEM;          dict = dict_new ();          if (!dict) @@ -518,54 +636,160 @@ br_stub_prepare_signature (xlator_t *this, dict_t *dict,          return -1;  } -int -br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, -                   fd_t *fd, dict_t *dict, int flags, dict_t *xdata) +static void +br_stub_handle_object_signature (call_frame_t *frame, +                                 xlator_t *this, fd_t *fd, dict_t *dict, +                                 br_isignature_t *sign, dict_t *xdata)  { -        int32_t          ret  = 0; -        br_isignature_t *sign = NULL; -        gf_boolean_t     xref = _gf_false; +        int32_t      ret  = -1; +        gf_boolean_t xref = _gf_false; -        if (!IA_ISREG (fd->inode->ia_type)) -                goto wind; -        ret = dict_get_bin (dict, GLUSTERFS_SET_OBJECT_SIGNATURE, -                            (void **) &sign); -        if (ret < 0) -                goto wind;          if (frame->root->pid != GF_CLIENT_PID_BITD) -                goto unwind; +                goto dofop;          ret = br_stub_prepare_signature (this, dict, fd->inode, sign);          if (ret) -                goto unwind; +                goto dofop;          dict_del (dict, GLUSTERFS_SET_OBJECT_SIGNATURE); +        ret = -1;          if (!xdata) {                  xdata = dict_new ();                  if (!xdata) -                        goto unwind; +                        goto dofop;          } else {                  dict_ref (xdata);          }          xref = _gf_true;          ret = dict_set_int32 (xdata, GLUSTERFS_DURABLE_OP, 0); + + dofop: +        if (ret) +                STACK_UNWIND_STRICT (fsetxattr, frame, -1, EINVAL, NULL); +        else { +                gf_log (this->name, GF_LOG_DEBUG, "SIGNED VERSION: %lu", +                        sign->signedversion); + +                STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this), +                            FIRST_CHILD (this)->fops->fsetxattr, fd, dict, 0, +                            xdata); +        } + +        if (xref) +                dict_unref (xdata); +} + +int32_t +br_stub_fsetxattr_resume (call_frame_t *frame, void *cookie, xlator_t *this, +                          int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ +        int32_t ret = -1; +        br_stub_local_t *local = NULL; + +        local = frame->local; +        frame->local = NULL; + +        ret = br_stub_mark_inode_modified (this, local); +        if (ret) { +                op_ret = -1; +                op_errno = EINVAL; +        } + +        STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + +        br_stub_cleanup_local (local); +        br_stub_dealloc_local (local); + +        return 0; +} + +static void +br_stub_handle_object_reopen (call_frame_t *frame, +                              xlator_t *this, fd_t *fd, uint32_t val) +{ +        int32_t              ret         = -1; +        int32_t              op_ret      = -1; +        int32_t              op_errno    = EINVAL; +        call_stub_t         *stub        = NULL; +        gf_boolean_t         inc_version = _gf_false; +        gf_boolean_t         modified    = _gf_false; +        br_stub_inode_ctx_t *ctx         = NULL; +        br_stub_local_t     *local       = NULL; + +        ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx);          if (ret)                  goto unwind; -        gf_log (this->name, GF_LOG_DEBUG, "SIGNED VERSION: %lu", -                sign->signedversion); +        LOCK (&fd->inode->lock); +        { +                (void) __br_stub_inode_sign_state (ctx, GF_FOP_FSETXATTR, fd); +        } +        UNLOCK (&fd->inode->lock); + +        if ((val == BR_OBJECT_RESIGN) || !inc_version) { +                op_ret = op_errno = 0; +                goto unwind; +        } + +        ret = br_stub_versioning_prep (frame, this, fd, ctx); +        if (ret) +                goto unwind; +        local = frame->local; + +        stub = fop_fsetxattr_cbk_stub (frame, br_stub_fsetxattr_resume, +                                       0, 0, NULL); +        if (!stub) { +                gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for " +                        "fsetxattr fop (gfid: %s), unwinding", +                        uuid_utoa (fd->inode->gfid)); +                goto cleanup_local; +        } + +        (void) br_stub_perform_incversioning (this, frame, stub, fd, ctx); +        return; + + cleanup_local: +        br_stub_cleanup_local (local); +        br_stub_dealloc_local (local); + + unwind: +        frame->local = NULL; +        STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); +} + +int +br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, +                   fd_t *fd, dict_t *dict, int flags, dict_t *xdata) +{ +        int32_t          ret  = 0; +        uint32_t val = 0; +        br_isignature_t *sign = NULL; + +        if (!IA_ISREG (fd->inode->ia_type)) +                goto wind; + +        /* object signature request */ +        ret = dict_get_bin (dict, GLUSTERFS_SET_OBJECT_SIGNATURE, +                            (void **) &sign); +        if (!ret) { +                br_stub_handle_object_signature (frame, this, +                                                 fd, dict, sign, xdata); +                goto done; +        } + +        /* object reopen request */ +        ret = dict_get_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, &val); +        if (!ret) { +                br_stub_handle_object_reopen (frame, this, fd, val); +                goto done; +        } +   wind:          STACK_WIND (frame, default_setxattr_cbk,                      FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd,                      dict, flags, xdata); -        goto done; - - unwind: -        STACK_UNWIND_STRICT (setxattr, frame, -1, EINVAL, NULL);   done: -        if (xref) -                dict_unref (xdata);          return 0;  } @@ -812,110 +1036,29 @@ br_stub_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                      int32_t op_ret, int32_t op_errno, struct iatt *prebuf,                      struct iatt *postbuf, dict_t *xdata)  { -        int32_t              ret         = 0; -        uint64_t             ctx_addr    = 0; -        br_stub_inode_ctx_t *ctx         = NULL; -        br_stub_local_t     *local       = NULL; +        int32_t          ret   = 0; +        br_stub_local_t *local = NULL; -        if (frame->local) { -                local = frame->local; -                frame->local = NULL; -        } +        local = frame->local; +        frame->local = NULL;          if (op_ret < 0)                  goto unwind; -        ret = br_stub_get_inode_ctx (this, local->u.context.fd->inode, -                                     &ctx_addr); -        if (ret < 0) -                goto unwind; - -        ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - -        /* Mark the flag to indicate the inode has been modified */ -        LOCK (&local->u.context.fd->inode->lock); -        { -                if (!__br_stub_is_inode_modified (ctx)) -                        __br_stub_set_inode_modified (ctx); +        ret = br_stub_mark_inode_modified (this, local); +        if (ret) { +                op_ret = -1; +                op_errno = EINVAL;          } -        UNLOCK (&local->u.context.fd->inode->lock); -  unwind: -        STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, -                             xdata); +        STACK_UNWIND_STRICT (writev, frame, +                             op_ret, op_errno, prebuf, postbuf, xdata); +          br_stub_cleanup_local (local);          br_stub_dealloc_local (local); -        return 0; -} - -/** - * Ongoing version is increased only for the first modify operation. - * First modify version means the first write or truncate call coming on the - * first fd in the list of inodes. - * For anonymous fds open would not have come, so check if its the first write - * by doing both inode dirty check and ensuring list of fds is empty - */ -static inline gf_boolean_t -br_stub_inc_version (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) -{ -        gf_boolean_t inc_version = _gf_false; - -        GF_VALIDATE_OR_GOTO (this->name, fd, out); -        GF_VALIDATE_OR_GOTO (this->name, ctx, out); - -        LOCK (&fd->inode->lock); -        { -                if (__br_stub_is_inode_dirty (ctx)) -                        inc_version = _gf_true; -        } -        UNLOCK (&fd->inode->lock); - -out: -        return inc_version; -} - -/** - * Since NFS does not do open, writes from NFS are sent over an anonymous - * fd. It means each write fop might come on a different anonymous fd and - * will lead to very large number of notifications being sent. It might - * affect the perfromance as, there will too many sign requests. - * To avoid that whenever the last fd released from an inode (logical release) - * is an anonymous fd the release notification is sent with a flag being set - * __br_stub_anon_release (ctx); - * BitD checks for the flag and if set, it will send a dummy write request - * (again on an anonymous fd) instead of triggering sign. - * Bit-rot-stub should identify such dummy writes and should send success to - * them instead of winding them downwards. - */ -gf_boolean_t -br_stub_dummy_write (call_frame_t *frame) -{ -        return (frame->root->pid == GF_CLIENT_PID_BITD) -                        ? _gf_true : _gf_false; -} - -int32_t -br_stub_anon_fd_ctx (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) -{ -        int32_t  ret = -1; -        br_stub_fd_t *br_stub_fd = NULL; -        br_stub_fd = br_stub_fd_ctx_get (this, fd); -        if (!br_stub_fd) { -                ret = br_stub_add_fd_to_inode (this, fd, ctx); -                if (ret) { -                        gf_log (this->name, GF_LOG_ERROR, "failed to " -                                "add fd to the inode (gfid: %s)", -                                uuid_utoa (fd->inode->gfid)); -                        goto out; -                } -        } - -        ret = 0; - -out: -        return ret; +        return 0;  }  int32_t @@ -923,107 +1066,71 @@ br_stub_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,                         struct iovec *vector, int32_t count, off_t offset,                         uint32_t flags, struct iobref *iobref, dict_t *xdata)  { -        if (frame->root->pid == GF_CLIENT_PID_BITD) -                br_stub_writev_cbk (frame, NULL, this, vector->iov_len, 0, -                                    NULL, NULL, NULL); -        else -                STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this), -                            FIRST_CHILD(this)->fops->writev, fd, vector, count, -                            offset, flags, iobref, xdata); +        STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->writev, fd, vector, count, +                    offset, flags, iobref, xdata);          return 0;  }  /** -   TODO: If possible add pictorial represention of below comment. - -   Before sending writev on the ANONYMOUS FD, increase the ongoing -   version first. This brings anonymous fd write closer to the regular -   fd write by having the ongoing version increased before doing the -   write (In regular fd, after open the ongoing version is incremented). -   Do following steps to handle writes on anonymous fds: -   1) Increase the on-disk ongoing version -   2) Once versioning is successfully done send write operation. If versioning -      fails, then fail the write fop. -   3) In writev_cbk do below things: -      a) Increase in-memory version -      b) set the fd context (so that br_stub_release is invoked) -      c) add the fd to the list of fds maintained in the inode context of -         bitrot-stub. -      d) Mark inode as non dirty -      e) Mard inode as modified (in the inode context) -**/ + * This is probably the most crucial part about the whole versioning thing. + * There's absolutely no differentiation as such between an anonymous fd + * and a regular fd except the fd context initialization. Object versioning + * is perfomed when the inode is dirty. Parallel write operations are no + * special with each write performing object versioning followed by marking + * the inode as non-dirty (synced). This is followed by the actual operation + * (writev() in this case) which on a success marks the inode as modified. + * This prevents signing of objects that have not been modified. + */  int32_t  br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,                  struct iovec *vector, int32_t count, off_t offset,                  uint32_t flags, struct iobref *iobref, dict_t *xdata)  { -        br_stub_local_t     *local       = NULL;          call_stub_t         *stub        = NULL;          int32_t              op_ret      = -1;          int32_t              op_errno    = EINVAL;          gf_boolean_t         inc_version = _gf_false; +        gf_boolean_t         modified    = _gf_false;          br_stub_inode_ctx_t *ctx         = NULL; -        uint64_t             ctx_addr    = 0;          int32_t              ret         = -1; +        fop_writev_cbk_t     cbk         = default_writev_cbk; +        br_stub_local_t     *local       = NULL;          GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);          GF_VALIDATE_OR_GOTO (this->name, frame, unwind);          GF_VALIDATE_OR_GOTO (this->name, fd, unwind); -        local = br_stub_alloc_local (this); -        if (!local) { -                gf_log (this->name, GF_LOG_ERROR, "local allocation failed " -                        "(gfid: %s)", uuid_utoa (fd->inode->gfid)); -                op_ret = -1; -                op_errno = ENOMEM; -                goto unwind; -        } - -        local->u.context.fd = fd_ref (fd); -        frame->local = local; - -        ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); -        if (ret < 0) { -                gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " -                        "context for the inode %s", -                        uuid_utoa (fd->inode->gfid)); +        ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); +        if (ret)                  goto unwind; -        } -        ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; -        if (fd_is_anonymous (fd)) { -                ret = br_stub_anon_fd_ctx (this, fd, ctx); -                if (ret) -                        goto unwind; -        } - -        /* TODO: Better to do a dummy fsetxattr instead of write. Keep write -           simple */ -        if (br_stub_dummy_write (frame)) { -                LOCK (&fd->inode->lock); -                { -                        (void) __br_stub_inode_sign_state -                                             (ctx, GF_FOP_WRITE, fd); -                } -                UNLOCK (&fd->inode->lock); - -                if (xdata && dict_get (xdata, "br-fd-reopen")) { -                        op_ret = vector->iov_len; -                        op_errno = 0; -                        goto unwind; -                } -        } +        /** +         * The inode is not dirty and also witnessed atleast one successful +         * modification operation. Therefore, subsequent operations need not +         * perform any special tracking. +         */ +        if (!inc_version && modified) +                goto wind;          /** -         * Check whether this is the first write on this inode since the last -         * sign notification has been sent. If so, do versioning. Otherwise -         * go ahead with the fop. +         * okay.. so, either the inode needs versioning or the modification +         * needs to be tracked. ->cbk is set to the appropriate callback +         * routine for this. +         * NOTE: ->local needs to be deallocated on failures from here on.           */ -        inc_version = br_stub_inc_version (this, fd, ctx); -        if (!inc_version) +        ret = br_stub_versioning_prep (frame, this, fd, ctx); +        if (ret) +                goto unwind; + +        local = frame->local; +        if (!inc_version) { +                br_stub_fill_local (local, NULL, fd, fd->inode, +                                    fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); +                cbk = br_stub_writev_cbk;                  goto wind; +        } -        /* Create the stub for the write fop */          stub = fop_writev_stub (frame, br_stub_writev_resume, fd, vector, count,                                  offset, flags, iobref, xdata); @@ -1031,24 +1138,27 @@ br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,                  gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for "                          "write fop (gfid: %s), unwinding",                          uuid_utoa (fd->inode->gfid)); -                goto unwind; +                goto cleanup_local;          }          /* Perform Versioning */          return br_stub_perform_incversioning (this, frame, stub, fd, ctx); -wind: -        STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this), -                    FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, -                    flags, iobref, xdata); + wind: +        STACK_WIND (frame, cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->writev, +                    fd, vector, count, offset, flags, iobref, xdata);          return 0; -unwind: + cleanup_local: +        br_stub_cleanup_local (local); +        br_stub_dealloc_local (local); + + unwind:          frame->local = NULL;          STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, NULL, NULL,                               NULL); -        br_stub_cleanup_local (local); -        br_stub_dealloc_local (local); +          return 0;  } @@ -1057,40 +1167,28 @@ br_stub_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                         int32_t op_ret, int32_t op_errno, struct iatt *prebuf,                         struct iatt *postbuf, dict_t *xdata)  { -        int32_t              ret         = 0; -        uint64_t             ctx_addr    = 0; -        br_stub_inode_ctx_t *ctx         = NULL; -        br_stub_local_t     *local       = NULL; +        int32_t          ret   = -1; +        br_stub_local_t *local = NULL; -        if (frame->local) { -                local = frame->local; -                frame->local = NULL; -        } +        local = frame->local; +        frame->local = NULL;          if (op_ret < 0)                  goto unwind; -        ret = br_stub_get_inode_ctx (this, local->u.context.fd->inode, -                                     &ctx_addr); -        if (ret < 0) -                goto unwind; - -        ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - -        /* Mark the flag to indicate the inode has been modified */ -        LOCK (&local->u.context.fd->inode->lock); -        { -                if (!__br_stub_is_inode_modified (ctx)) -                        __br_stub_set_inode_modified (ctx); +        ret = br_stub_mark_inode_modified (this, local); +        if (ret) { +                op_ret = -1; +                op_errno = EINVAL;          } -        UNLOCK (&local->u.context.fd->inode->lock); -  unwind: -        STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf, -                             xdata); +        STACK_UNWIND_STRICT (ftruncate, frame, +                             op_ret, op_errno, prebuf, postbuf, xdata); +          br_stub_cleanup_local (local);          br_stub_dealloc_local (local); +          return 0;  } @@ -1103,6 +1201,7 @@ br_stub_ftruncate_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,          return 0;  } +/* c.f. br_stub_writev() for explanation */  int32_t  br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,                     off_t offset, dict_t *xdata) @@ -1112,72 +1211,59 @@ br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,          int32_t              op_ret      = -1;          int32_t              op_errno    = EINVAL;          gf_boolean_t         inc_version = _gf_false; +        gf_boolean_t         modified    = _gf_false;          br_stub_inode_ctx_t *ctx         = NULL; -        uint64_t             ctx_addr    = 0;          int32_t              ret         = -1; +        fop_ftruncate_cbk_t  cbk         = default_ftruncate_cbk;          GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);          GF_VALIDATE_OR_GOTO (this->name, frame, unwind);          GF_VALIDATE_OR_GOTO (this->name, fd, unwind); -        local = br_stub_alloc_local (this); -        if (!local) { -                gf_log (this->name, GF_LOG_ERROR, "local allocation failed " -                        "(gfid: %s)", uuid_utoa (fd->inode->gfid)); -                op_ret = -1; -                op_errno = ENOMEM; +        ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); +        if (ret)                  goto unwind; -        } -        local->u.context.fd = fd_ref (fd); -        frame->local = local; +        if (!inc_version && modified) +                goto wind; -        ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); -        if (ret < 0) { -                gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " -                        "context for the inode %s", -                        uuid_utoa (fd->inode->gfid)); +        ret = br_stub_versioning_prep (frame, this, fd, ctx); +        if (ret)                  goto unwind; -        } -        ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; -        if (fd_is_anonymous (fd)) { -                ret = br_stub_anon_fd_ctx (this, fd, ctx); -                if (ret) -                        goto unwind; -        } - -        /** -         * c.f. br_stub_writev() -         */ -        inc_version = br_stub_inc_version (this, fd, ctx); -        if (!inc_version) +        local = frame->local; +        if (!inc_version) { +                br_stub_fill_local (local, NULL, fd, fd->inode, +                                    fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); +                cbk = br_stub_ftruncate_cbk;                  goto wind; +        } -        /* Create the stub for the ftruncate fop */          stub = fop_ftruncate_stub (frame, br_stub_ftruncate_resume, fd, offset,                                     xdata);          if (!stub) {                  gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for "                          "ftruncate fop (gfid: %s), unwinding",                          uuid_utoa (fd->inode->gfid)); -                goto unwind; +                goto cleanup_local;          } -        /* Perform Versioning */          return br_stub_perform_incversioning (this, frame, stub, fd, ctx); -wind: -        STACK_WIND (frame, br_stub_ftruncate_cbk, FIRST_CHILD(this), + wind: +        STACK_WIND (frame, cbk, FIRST_CHILD(this),                      FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);          return 0; -unwind: + cleanup_local: +        br_stub_cleanup_local (local); +        br_stub_dealloc_local (local); + + unwind:          frame->local = NULL;          STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, NULL, NULL,                               NULL); -        br_stub_cleanup_local (local); -        br_stub_dealloc_local (local); +          return 0;  } @@ -1186,34 +1272,20 @@ br_stub_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                        int32_t op_ret, int32_t op_errno, struct iatt *prebuf,                        struct iatt *postbuf, dict_t *xdata)  { -        int32_t              ret         = 0; -        uint64_t             ctx_addr    = 0; -        br_stub_inode_ctx_t *ctx         = NULL; -        br_stub_local_t     *local       = NULL; +        int32_t          ret   = 0; +        br_stub_local_t *local = NULL; -        if (frame->local) { -                local = frame->local; -                frame->local = NULL; -        } +        local = frame->local; +        frame->local = NULL;          if (op_ret < 0)                  goto unwind; -        ret = br_stub_get_inode_ctx (this, local->u.context.fd->inode, -                                     &ctx_addr); -        if (ret < 0) -                goto unwind; - -        ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - -        /* Mark the flag to indicate the inode has been modified */ -        LOCK (&local->u.context.fd->inode->lock); -        { -                if (!__br_stub_is_inode_modified (ctx)) -                        __br_stub_set_inode_modified (ctx); +        ret = br_stub_mark_inode_modified (this, local); +        if (ret) { +                op_ret = -1; +                op_errno = EINVAL;          } -        UNLOCK (&local->u.context.fd->inode->lock); -  unwind:          STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -1243,6 +1315,8 @@ br_stub_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,   * on an anonymous fd. The fd will be valid till the completion of the   * truncate call. It guarantees that release on this anonymous fd will happen   * after the truncate call and notification is sent after the truncate call. + * + * c.f. br_writev_cbk() for explanation   */  int32_t  br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, @@ -1253,10 +1327,11 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,          int32_t              op_ret      = -1;          int32_t              op_errno    = EINVAL;          gf_boolean_t         inc_version = _gf_false; +        gf_boolean_t         modified    = _gf_false;          br_stub_inode_ctx_t *ctx         = NULL; -        uint64_t             ctx_addr    = 0;          int32_t              ret         = -1;          fd_t                *fd          = NULL; +        fop_truncate_cbk_t   cbk         = default_truncate_cbk;          GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);          GF_VALIDATE_OR_GOTO (this->name, frame, unwind); @@ -1270,63 +1345,50 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,                  goto unwind;          } -        local = br_stub_alloc_local (this); -        if (!local) { -                gf_log (this->name, GF_LOG_ERROR, "local allocation failed " -                        "(gfid: %s)", uuid_utoa (loc->inode->gfid)); -                op_ret = -1; -                op_errno = ENOMEM; +        ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); +        if (ret)                  goto unwind; -        } - -        local->u.context.fd = fd; -        frame->local = local; -        ret = br_stub_get_inode_ctx (this, loc->inode, &ctx_addr); -        if (ret < 0) { -                gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " -                        "context for the inode %s", -                        uuid_utoa (fd->inode->gfid)); -                goto unwind; -        } +        if (!inc_version && modified) +                goto wind; -        ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; -        ret = br_stub_anon_fd_ctx (this, local->u.context.fd, ctx); +        ret = br_stub_versioning_prep (frame, this, fd, ctx);          if (ret)                  goto unwind; -        /** -         * c.f. br_stub_writev() -         */ -        inc_version = br_stub_inc_version (this, fd, ctx); -        if (!inc_version) +        local = frame->local; +        if (!inc_version) { +                br_stub_fill_local (local, NULL, fd, fd->inode, +                                    fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); +                cbk = br_stub_truncate_cbk;                  goto wind; +        } -        /* Create the stub for the truncate fop */          stub = fop_truncate_stub (frame, br_stub_truncate_resume, loc, offset,                                    xdata);          if (!stub) {                  gf_log (this->name, GF_LOG_ERROR, "failed to allocate stub for "                          "truncate fop (gfid: %s), unwinding",                          uuid_utoa (fd->inode->gfid)); -                goto unwind; +                goto cleanup_local;          } -        /* Perform Versioning */ -        return br_stub_perform_incversioning (this, frame, stub, -                                              local->u.context.fd, ctx); +        return br_stub_perform_incversioning (this, frame, stub, fd, ctx); -wind: -        STACK_WIND (frame, br_stub_truncate_cbk, FIRST_CHILD(this), + wind: +        STACK_WIND (frame, cbk, FIRST_CHILD(this),                      FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);          return 0; -unwind: + cleanup_local: +        br_stub_cleanup_local (local); +        br_stub_dealloc_local (local); + + unwind:          frame->local = NULL;          STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, NULL, NULL,                               NULL); -        br_stub_cleanup_local (local); -        br_stub_dealloc_local (local); +          return 0;  } @@ -1810,7 +1872,7 @@ __br_stub_inode_sign_state (br_stub_inode_ctx_t *ctx,          switch (fop) { -        case GF_FOP_WRITE: +        case GF_FOP_FSETXATTR:                  sign_info = ctx->info_sign = BR_SIGN_QUICK;                  break; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index 69e212bb81f..e1e7b383f42 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -66,7 +66,8 @@ typedef struct br_stub_local {          } u;  } br_stub_local_t; -#define BR_STUB_INCREMENTAL_VERSIONING (1<<1) +#define BR_STUB_NO_VERSIONING (1 << 0) +#define BR_STUB_INCREMENTAL_VERSIONING (1 << 1)  typedef struct br_stub_private {          gf_boolean_t go;  | 
