diff options
Diffstat (limited to 'xlators')
20 files changed, 1318 insertions, 16 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 5f2d3096f66..c1ec69a5505 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -2609,3 +2609,253 @@ out:          return 0;  } + + +/* {{{ zerofill */ + +static int +afr_zerofill_unwind (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t     *local            = NULL; +        call_frame_t    *main_frame       = NULL; + +        local = frame->local; + +        LOCK (&frame->lock); +        { +                if (local->transaction.main_frame) { +                        main_frame = local->transaction.main_frame; +                } +                local->transaction.main_frame = NULL; +        } +        UNLOCK (&frame->lock); + +        if (main_frame) { +                AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret, +                                  local->op_errno, +                                  &local->cont.zerofill.prebuf, +                                  &local->cont.zerofill.postbuf, +                                  NULL); +        } +        return 0; +} + +static int +afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                     struct iatt *postbuf, dict_t *xdata) +{ +        afr_local_t       *local             = NULL; +        afr_private_t     *priv              = NULL; +        int                child_index       = (long) cookie; +        int                call_count        = -1; +        int                need_unwind       = 0; +        int                read_child        = 0; + +        local = frame->local; +        priv  = this->private; + +        read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); + +        LOCK (&frame->lock); +        { +                if (child_index == read_child) { +                        local->read_child_returned = _gf_true; +                } + +                if (afr_fop_failed (op_ret, op_errno)) { +                        afr_transaction_fop_failed (frame, this, child_index); +                } + +                if (op_ret != -1) { +                        if (local->success_count == 0) { +                                local->op_ret = op_ret; +                                local->cont.zerofill.prebuf  = *prebuf; +                                local->cont.zerofill.postbuf = *postbuf; +                        } + +                        if (child_index == read_child) { +                                local->cont.zerofill.prebuf  = *prebuf; +                                local->cont.zerofill.postbuf = *postbuf; +                        } + +                        local->success_count++; + +                        if ((local->success_count >= priv->wait_count) +                            && local->read_child_returned) { +                                need_unwind = 1; +                        } +                } +                local->op_errno = op_errno; +        } +        UNLOCK (&frame->lock); + +        if (need_unwind) { +                local->transaction.unwind (frame, this); +        } +        call_count = afr_frame_return (frame); + +        if (call_count == 0) { +                local->transaction.resume (frame, this); +        } + +        return 0; +} + +static int +afr_zerofill_wind (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t    *local         = NULL; +        afr_private_t  *priv          = NULL; +        int             call_count    = -1; +        int             i             = 0; + +        local = frame->local; +        priv = this->private; + +        call_count = afr_pre_op_done_children_count (local->transaction.pre_op, +                                                     priv->child_count); + +        if (call_count == 0) { +                local->transaction.resume (frame, this); +                return 0; +        } + +        local->call_count = call_count; + +        for (i = 0; i < priv->child_count; i++) { +                if (local->transaction.pre_op[i]) { +                        STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, +                                           (void *) (long) i, +                                           priv->children[i], +                                           priv->children[i]->fops->zerofill, +                                           local->fd, +                                           local->cont.zerofill.offset, +                                           local->cont.zerofill.len, +                                           NULL); + +                        if (!--call_count) +                                break; +                } +        } + +        return 0; +} + +static int +afr_zerofill_done (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t *local = NULL; + +        local = frame->local; + +        local->transaction.unwind (frame, this); + +        AFR_STACK_DESTROY (frame); + +        return 0; +} + +static int +afr_do_zerofill(call_frame_t *frame, xlator_t *this) +{ +        call_frame_t  *transaction_frame = NULL; +        afr_local_t   *local             = NULL; +        int            op_ret            = -1; +        int            op_errno          = 0; + +        local = frame->local; + +        transaction_frame = copy_frame (frame); +        if (!transaction_frame) { +                goto out; +        } + +        transaction_frame->local = local; +        frame->local = NULL; + +        local->op = GF_FOP_ZEROFILL; + +        local->transaction.fop    = afr_zerofill_wind; +        local->transaction.done   = afr_zerofill_done; +        local->transaction.unwind = afr_zerofill_unwind; + +        local->transaction.main_frame = frame; + +        local->transaction.start   = local->cont.zerofill.offset; +        local->transaction.len     = 0; + +        op_ret = afr_transaction (transaction_frame, this, +                                  AFR_DATA_TRANSACTION); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } + +        op_ret = 0; +out: +        if (op_ret < 0) { +                if (transaction_frame) { +                        AFR_STACK_DESTROY (transaction_frame); +                } +                AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL, +                                  NULL, NULL); +        } + +        return 0; +} + +int +afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +              size_t len, dict_t *xdata) +{ +        afr_private_t   *priv               = NULL; +        afr_local_t     *local              = NULL; +        call_frame_t    *transaction_frame  = NULL; +        int              ret                = -1; +        int              op_errno           = 0; + +        VALIDATE_OR_GOTO (frame, out); +        VALIDATE_OR_GOTO (this, out); +        VALIDATE_OR_GOTO (this->private, out); + +        priv = this->private; + +        if (afr_is_split_brain (this, fd->inode)) { +                op_errno = EIO; +                goto out; +        } +        QUORUM_CHECK(zerofill, out); + +        AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); +        local = frame->local; + +        ret = afr_local_init (local, priv, &op_errno); +        if (ret < 0) { +                goto out; +        } +        local->cont.zerofill.offset  = offset; +        local->cont.zerofill.len = len; + +        local->fd = fd_ref (fd); + +        afr_open_fd_fix (fd, this); + +        afr_do_zerofill(frame, this); + +        ret = 0; +out: +        if (ret < 0) { +                if (transaction_frame) { +                        AFR_STACK_DESTROY (transaction_frame); +                } +                AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, +                                  NULL, NULL); +        } + +        return 0; +} + +/* }}} */ + + diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h index 883faae6cb5..8e93ca44aaa 100644 --- a/xlators/cluster/afr/src/afr-inode-write.h +++ b/xlators/cluster/afr/src/afr-inode-write.h @@ -75,4 +75,8 @@ afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,  int  afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,                 off_t offset, size_t len, dict_t *xdata); + +int +afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +             size_t len, dict_t *xdata);  #endif /* __INODE_WRITE_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index aa8d002209c..c724eb2ae42 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -499,6 +499,7 @@ struct xlator_fops fops = {          .fentrylk    = afr_fentrylk,  	.fallocate   = afr_fallocate,  	.discard     = afr_discard, +        .zerofill    = afr_zerofill,          /* inode read */          .access      = afr_access, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 12dce541cf3..21064db58d9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -710,6 +710,14 @@ typedef struct _afr_local {  			size_t len;  		} discard; +                struct { +                        off_t offset; +                        size_t len; +                        struct iatt prebuf; +                        struct iatt postbuf; +                } zerofill; + +          } cont;          struct { diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index da8923e9b24..8c3449f0b3b 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -695,6 +695,8 @@ int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,  		      int32_t mode, off_t offset, size_t len, dict_t *xdata);  int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,  		    off_t offset, size_t len, dict_t *xdata); +int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, +                    off_t offset, size_t len, dict_t *xdata);  int32_t dht_init (xlator_t *this);  void    dht_fini (xlator_t *this); diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 9bcd84ae15a..26db8a533c1 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -21,6 +21,7 @@ int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);  int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);  int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);  int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret); +int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);  int  dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -624,6 +625,141 @@ err:          return 0;  } +int +dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                int op_ret, int op_errno, struct iatt *prebuf, +                struct iatt *postbuf, dict_t *xdata) +{ +        dht_local_t  *local = NULL; +        call_frame_t *prev  = NULL; +        int           ret   = -1; + +        GF_VALIDATE_OR_GOTO ("dht", frame, err); +        GF_VALIDATE_OR_GOTO ("dht", this, out); +        GF_VALIDATE_OR_GOTO ("dht", frame->local, out); +        GF_VALIDATE_OR_GOTO ("dht", cookie, out); + +        local = frame->local; +        prev = cookie; + +        if ((op_ret == -1) && (op_errno != ENOENT)) { +                local->op_errno = op_errno; +                local->op_ret = -1; +                gf_log (this->name, GF_LOG_DEBUG, +                        "subvolume %s returned -1 (%s)", +                        prev->this->name, strerror (op_errno)); +                goto out; +        } + +        if (local->call_cnt != 1) { +                if (local->stbuf.ia_blocks) { +                        dht_iatt_merge (this, postbuf, &local->stbuf, NULL); +                        dht_iatt_merge (this, prebuf, &local->prebuf, NULL); +                } +                goto out; +        } +        local->rebalance.target_op_fn = dht_zerofill2; +        /* Phase 2 of migration */ +        if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { +                ret = dht_rebalance_complete_check (this, frame); +                if (!ret) +                        return 0; +        } + +        /* Check if the rebalance phase1 is true */ +        if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { +                dht_iatt_merge (this, &local->stbuf, postbuf, NULL); +                dht_iatt_merge (this, &local->prebuf, prebuf, NULL); +                ret = fd_ctx_get (local->fd, this, NULL); +                if (!ret) { +                        dht_zerofill2 (this, frame, 0); +                        return 0; +                } +                ret = dht_rebalance_in_progress_check (this, frame); +                if (!ret) +                        return 0; +        } + +out: +        DHT_STRIP_PHASE1_FLAGS (postbuf); +        DHT_STRIP_PHASE1_FLAGS (prebuf); +        DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno, +                          prebuf, postbuf, xdata); +err: +        return 0; +} + +int +dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t  *local          = NULL; +        xlator_t     *subvol         = NULL; +        uint64_t      tmp_subvol     = 0; +        int           ret            = -1; + +        local = frame->local; + +        if (local->fd) +                ret = fd_ctx_get (local->fd, this, &tmp_subvol); +        if (!ret) +                subvol = (xlator_t *)(long)tmp_subvol; + +        if (!subvol) +                subvol = local->cached_subvol; + +        local->call_cnt = 2; /* This is the second attempt */ + +        STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill, +                   local->fd, local->rebalance.offset, local->rebalance.size, +                   NULL); + +        return 0; +} + +int +dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +            size_t len, dict_t *xdata) +{ +        xlator_t     *subvol       = NULL; +        int           op_errno     = -1; +        dht_local_t  *local        = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        local->rebalance.offset = offset; +        local->rebalance.size = len; + +        local->call_cnt = 1; +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill, +                    fd, offset, len, xdata); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); + +        return 0; +} + + +  /* handle cases of migration here for 'setattr()' calls */  int  dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 0349b63a91f..fc0ca2f7735 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -72,6 +72,7 @@ struct xlator_fops fops = {          .fsetattr    = dht_fsetattr,  	.fallocate   = dht_fallocate,  	.discard     = dht_discard, +        .zerofill    = dht_zerofill,  };  struct xlator_dumpops dumpops = { diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index 8faaec19673..d366b352c5b 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -4107,6 +4107,191 @@ err:  }  int32_t +stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                   int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                   struct iatt *postbuf, dict_t *xdata) +{ +        int32_t         callcnt    = 0; +        stripe_local_t *local      = NULL; +        stripe_local_t *mlocal     = NULL; +        call_frame_t   *prev       = NULL; +        call_frame_t   *mframe     = NULL; + +        if (!this || !frame || !frame->local || !cookie) { +                gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); +                goto out; +        } + +        prev  = cookie; +        local = frame->local; +        mframe = local->orig_frame; +        mlocal = mframe->local; + +        LOCK(&frame->lock); +        { +                callcnt = ++mlocal->call_count; + +                if (op_ret == 0) { +                        mlocal->post_buf = *postbuf; +                        mlocal->pre_buf = *prebuf; + +                        mlocal->prebuf_blocks  += prebuf->ia_blocks; +                        mlocal->postbuf_blocks += postbuf->ia_blocks; + +                        correct_file_size(prebuf, mlocal->fctx, prev); +                        correct_file_size(postbuf, mlocal->fctx, prev); + +                        if (mlocal->prebuf_size < prebuf->ia_size) +                                mlocal->prebuf_size = prebuf->ia_size; +                        if (mlocal->postbuf_size < postbuf->ia_size) +                                mlocal->postbuf_size = postbuf->ia_size; +                } + +                /* return the first failure */ +                if (mlocal->op_ret == 0) { +                        mlocal->op_ret = op_ret; +                        mlocal->op_errno = op_errno; +                } +        } +        UNLOCK (&frame->lock); + +        if ((callcnt == mlocal->wind_count) && mlocal->unwind) { +                mlocal->pre_buf.ia_size = mlocal->prebuf_size; +                mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; +                mlocal->post_buf.ia_size = mlocal->postbuf_size; +                mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + +                STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret, +                                     mlocal->op_errno, &mlocal->pre_buf, +                                     &mlocal->post_buf, NULL); +        } +out: +        STRIPE_STACK_DESTROY(frame); +        return 0; +} + +int32_t +stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +               size_t len, dict_t *xdata) +{ +        stripe_local_t   *local            = NULL; +        stripe_fd_ctx_t  *fctx             = NULL; +        int32_t           op_errno         = 1; +        int32_t           idx              = 0; +        int32_t           offset_offset    = 0; +        int32_t           remaining_size   = 0; +        off_t             fill_size        = 0; +        uint64_t          stripe_size      = 0; +        uint64_t          tmp_fctx         = 0; +        off_t             dest_offset      = 0; +        call_frame_t     *fframe           = NULL; +        stripe_local_t   *flocal           = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); +        VALIDATE_OR_GOTO (fd->inode, err); + +        inode_ctx_get (fd->inode, this, &tmp_fctx); +        if (!tmp_fctx) { +                op_errno = EINVAL; +                goto err; +        } +        fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; +        stripe_size = fctx->stripe_size; + +        STRIPE_VALIDATE_FCTX (fctx, err); + +        remaining_size = len; + +        local = mem_get0 (this->local_pool); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } +        fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; +        stripe_size = fctx->stripe_size; + +        STRIPE_VALIDATE_FCTX (fctx, err); + +        remaining_size = len; + +        local = mem_get0 (this->local_pool); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } +        fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; +        stripe_size = fctx->stripe_size; + +        STRIPE_VALIDATE_FCTX (fctx, err); + +        remaining_size = len; + +        local = mem_get0 (this->local_pool); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } +        frame->local = local; +        local->stripe_size = stripe_size; +        local->fctx = fctx; + +        if (!stripe_size) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "Wrong stripe size for the file"); +                op_errno = EINVAL; +                goto err; +        } + +        while (1) { +                fframe = copy_frame(frame); +                flocal = mem_get0(this->local_pool); +                if (!flocal) { +                        op_errno = ENOMEM; +                        goto err; +                } +                flocal->orig_frame = frame; +                fframe->local = flocal; + +                idx = (((offset + offset_offset) / +                        local->stripe_size) % fctx->stripe_count); + +                fill_size = (local->stripe_size - +                             ((offset + offset_offset) % local->stripe_size)); +                if (fill_size > remaining_size) +                        fill_size = remaining_size; + +                remaining_size -= fill_size; + +                local->wind_count++; +                if (remaining_size == 0) +                        local->unwind = 1; + +                dest_offset = offset + offset_offset; +                if (fctx->stripe_coalesce) +                        dest_offset = coalesced_offset(dest_offset, +                                                       local->stripe_size, +                                                       fctx->stripe_count); + +                STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx], +                           fctx->xl_array[idx]->fops->zerofill, fd, +                           dest_offset, fill_size, xdata); +                offset_offset += fill_size; +                if (remaining_size == 0) +                        break; +        } + +        return 0; +err: +        if (fframe) +                STRIPE_STACK_DESTROY(fframe); + +        STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); +        return 0; +} + +int32_t  stripe_release (xlator_t *this, fd_t *fd)  {  	return 0; @@ -5558,6 +5743,7 @@ struct xlator_fops fops = {          .readdirp       = stripe_readdirp,  	.fallocate	= stripe_fallocate,  	.discard	= stripe_discard, +        .zerofill       = stripe_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 4f27a2e418f..7fb697ae45d 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -1746,6 +1746,16 @@ io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,  	return 0;  } +int +io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                     struct iatt *postbuf, dict_t *xdata) +{ +        UPDATE_PROFILE_STATS(frame, ZEROFILL); +        STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf, +                            xdata); +        return 0; +}  int  io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -2441,6 +2451,18 @@ io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,  	return 0;  } +int +io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +                 size_t len, dict_t *xdata) +{ +        START_FOP_LATENCY(frame); + +        STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + +        return 0; +} +  int  io_stats_lk (call_frame_t *frame, xlator_t *this, @@ -2870,6 +2892,7 @@ struct xlator_fops fops = {          .fsetattr    = io_stats_fsetattr,  	.fallocate   = io_stats_fallocate,  	.discard     = io_stats_discard, +        .zerofill    = io_stats_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index 59152db43c7..6a2c856913e 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1953,6 +1953,73 @@ err:          return 0;  } +int32_t +marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                   int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                   struct iatt *postbuf, dict_t *xdata) +{ +        marker_local_t     *local   = NULL; +        marker_conf_t      *priv    = NULL; + +        if (op_ret == -1) { +                gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill", +                        strerror (op_errno)); +        } + +        local = (marker_local_t *) frame->local; + +        frame->local = NULL; + +        STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, +                             postbuf, xdata); + +        if (op_ret == -1 || local == NULL) +                goto out; + +        priv = this->private; + +        if (priv->feature_enabled & GF_QUOTA) +                mq_initiate_quota_txn (this, &local->loc); + +        if (priv->feature_enabled & GF_XTIME) +                marker_xtime_update_marks (this, local); +out: +        marker_local_unref (local); + +        return 0; +} + +int32_t +marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +               size_t len, dict_t *xdata) +{ +        int32_t          ret   = 0; +        marker_local_t  *local = NULL; +        marker_conf_t   *priv  = NULL; + +        priv = this->private; + +        if (priv->feature_enabled == 0) +                goto wind; + +        local = mem_get0 (this->local_pool); + +        MARKER_INIT_LOCAL (frame, local); + +        ret = marker_inode_loc_fill (fd->inode, &local->loc); + +        if (ret == -1) +                goto err; +wind: +        STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); +        return 0; +err: +        STACK_UNWIND_STRICT (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + +        return 0; +} +  /* when a call from the special client is received on   * key trusted.glusterfs.volume-mark with value "RESET" @@ -2778,6 +2845,7 @@ struct xlator_fops fops = {          .readdirp    = marker_readdirp,  	.fallocate   = marker_fallocate,  	.discard     = marker_discard, +        .zerofill    = marker_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 68ca4c56515..201777b380e 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -1449,6 +1449,31 @@ ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,         return 0;  } +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                int32_t op_ret, int32_t op_errno, struct iatt *pre, +                struct iatt *post, dict_t *xdata) +{ +        STACK_UNWIND_STRICT(zerofill, frame, op_ret, +                            op_errno, pre, post, xdata); +        return 0; +} + +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +            size_t len, dict_t *xdata) +{ +        uint64_t ioc_inode = 0; + +        inode_ctx_get (fd->inode, this, &ioc_inode); + +        if (ioc_inode) +                ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + +        STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); +       return 0; +}  int32_t @@ -2077,6 +2102,7 @@ struct xlator_fops fops = {          .readdirp    = ioc_readdirp,  	.discard     = ioc_discard, +        .zerofill    = ioc_zerofill,  }; diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index a5fcd0300d0..bbcf4ed26ca 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -309,6 +309,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)          case GF_FOP_RCHECKSUM:  	case GF_FOP_FALLOCATE:  	case GF_FOP_DISCARD: +        case GF_FOP_ZEROFILL:                  pri = IOT_PRI_LO;                  break; @@ -2510,6 +2511,55 @@ out:  }  int +iot_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                  int32_t op_ret, int32_t op_errno, +                  struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ +        STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, preop, postop, +                             xdata); +        return 0; +} + +int +iot_zerofill_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, +                     off_t offset, size_t len, dict_t *xdata) +{ +        STACK_WIND (frame, iot_zerofill_cbk, FIRST_CHILD (this), +                    FIRST_CHILD (this)->fops->zerofill, fd, offset, len, xdata); +        return 0; +} + +int +iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +            size_t len, dict_t *xdata) +{ +        call_stub_t     *stub     = NULL; +        int              ret      = -1; + +        stub = fop_zerofill_stub(frame, iot_zerofill_wrapper, fd, +                                 offset, len, xdata); +        if (!stub) { +                gf_log (this->name, GF_LOG_ERROR, "cannot create zerofill stub" +                        "(out of memory)"); +                ret = -ENOMEM; +                goto out; +        } + +        ret = iot_schedule (frame, this, stub); + +out: +        if (ret < 0) { +                STACK_UNWIND_STRICT (zerofill, frame, -1, -ret, NULL, NULL, +                                     NULL); +                if (stub != NULL) { +                        call_stub_destroy (stub); +                } +        } +        return 0; +} + + +int  __iot_workers_scale (iot_conf_t *conf)  {          int       scale = 0; @@ -2840,6 +2890,7 @@ struct xlator_fops fops = {          .rchecksum   = iot_rchecksum,  	.fallocate   = iot_fallocate,  	.discard     = iot_discard, +        .zerofill    = iot_zerofill,  };  struct xlator_cbks cbks; diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 36d81887c7b..3a5b7a5d1ae 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -2098,6 +2098,46 @@ int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,  }  int +mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                int32_t op_ret, int32_t op_errno, +                struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ +        mdc_local_t  *local = NULL; + +        local = frame->local; + +        if (op_ret != 0) +                goto out; + +        if (!local) +                goto out; + +        mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: +        MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, +                         xdata); + +        return 0; +} + +int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +                size_t len, dict_t *xdata) +{ +        mdc_local_t *local; + +        local = mdc_local_get(frame); +        local->fd = fd_ref(fd); + +        STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->zerofill, fd, offset, len, +                   xdata); + +        return 0; +} + + +int  mdc_forget (xlator_t *this, inode_t *inode)  {          mdc_inode_wipe (this, inode); @@ -2229,6 +2269,7 @@ struct xlator_fops fops = {  	.readdir     = mdc_readdir,  	.fallocate   = mdc_fallocate,  	.discard     = mdc_discard, +        .zerofill    = mdc_zerofill,  }; diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index df4027509a9..7e5b5727872 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -720,6 +720,26 @@ err:  }  int +ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +           size_t len, dict_t *xdata) +{ +        call_stub_t *stub; + +        stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, +                                 offset, len, xdata); +        if (!stub) +                goto err; + +        open_and_resume(this, fd, stub); + +        return 0; +err: +        STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); +        return 0; +} + + +int  ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,  	   dict_t *xdata)  { @@ -946,6 +966,7 @@ struct xlator_fops fops = {  	.fsetattr    = ob_fsetattr,  	.fallocate   = ob_fallocate,  	.discard     = ob_discard, +        .zerofill    = ob_zerofill,  	.unlink      = ob_unlink,  	.rename      = ob_rename,  	.lk          = ob_lk, diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index 241fa477fda..069ab1f1a91 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -993,6 +993,57 @@ unwind:  }  int +ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +               int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +               struct iatt *postbuf, dict_t *xdata) +{ +        GF_ASSERT (frame); + +        STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, +                             postbuf, xdata); +        return 0; +} + +static int +ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +             size_t len, dict_t *xdata) +{ +        ra_file_t *file    = NULL; +        fd_t      *iter_fd = NULL; +        inode_t   *inode   = NULL; +        uint64_t  tmp_file = 0; +        int32_t   op_errno = EINVAL; + +        GF_ASSERT (frame); +        GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); +        GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + +        inode = fd->inode; + +        LOCK (&inode->lock); +        { +                list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { +                        fd_ctx_get (iter_fd, this, &tmp_file); +                        file = (ra_file_t *)(long)tmp_file; +                        if (!file) +                                continue; + +                        flush_region(frame, file, offset, len, 1); +                } +        } +        UNLOCK (&inode->lock); + +        STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this), +                    FIRST_CHILD (this)->fops->zerofill, fd, +                    offset, len, xdata); +        return 0; + +unwind: +        STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL); +        return 0; +} + +int  ra_priv_dump (xlator_t *this)  {          ra_conf_t       *conf                           = NULL; @@ -1173,6 +1224,7 @@ struct xlator_fops fops = {          .ftruncate   = ra_ftruncate,          .fstat       = ra_fstat,  	.discard     = ra_discard, +        .zerofill    = ra_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index 38f4391145d..6355450c393 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -2048,6 +2048,62 @@ out:  }  int +client3_3_zerofill_cbk(struct rpc_req *req, struct iovec *iov, int count, +                      void *myframe) +{ +        call_frame_t    *frame         = NULL; +        gfs3_zerofill_rsp rsp          = {0,}; +        struct iatt      prestat       = {0,}; +        struct iatt      poststat      = {0,}; +        int              ret           = 0; +        xlator_t *this                 = NULL; +        dict_t  *xdata                 = NULL; + +        this = THIS; + +        frame = myframe; + +        if (-1 == req->rpc_status) { +                rsp.op_ret   = -1; +                rsp.op_errno = ENOTCONN; +                goto out; +        } +        ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_zerofill_rsp); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed"); +                rsp.op_ret   = -1; +                rsp.op_errno = EINVAL; +                goto out; +        } + +        if (-1 != rsp.op_ret) { +                gf_stat_to_iatt (&rsp.statpre, &prestat); +                gf_stat_to_iatt (&rsp.statpost, &poststat); +        } + +        GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val), +                                      (rsp.xdata.xdata_len), ret, +                                      rsp.op_errno, out); + +out: +        if (rsp.op_ret == -1) { +                gf_log (this->name, GF_LOG_WARNING, +                        "remote operation failed: %s", +                        strerror (gf_error_to_errno (rsp.op_errno))); +        } +        CLIENT_STACK_UNWIND (zerofill, frame, rsp.op_ret, +                             gf_error_to_errno (rsp.op_errno), &prestat, +                             &poststat, xdata); + +        free (rsp.xdata.xdata_val); + +        if (xdata) +                dict_unref (xdata); + +        return 0; +} + +int  client3_3_setattr_cbk (struct rpc_req *req, struct iovec *iov, int count,                         void *myframe)  { @@ -5987,6 +6043,50 @@ unwind:          return 0;  } +int32_t +client3_3_zerofill(call_frame_t *frame, xlator_t *this, void *data) +{ +        clnt_args_t       *args        = NULL; +        int64_t            remote_fd   = -1; +        clnt_conf_t       *conf        = NULL; +        gfs3_zerofill_req   req        = {{0},}; +        int                op_errno    = ESTALE; +        int                ret         = 0; + +        if (!frame || !this || !data) +                goto unwind; + +        args = data; +        conf = this->private; + +        CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD, +                              remote_fd, op_errno, unwind); + +        req.fd = remote_fd; +        req.offset = args->offset; +        req.size = args->size; +        memcpy(req.gfid, args->fd->inode->gfid, 16); + +        GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val), +                                    req.xdata.xdata_len, op_errno, unwind); + +        ret = client_submit_request(this, &req, frame, conf->fops, +                                    GFS3_OP_ZEROFILL, client3_3_zerofill_cbk, +                                    NULL, NULL, 0, NULL, 0, NULL, +                                    (xdrproc_t) xdr_gfs3_zerofill_req); +        if (ret) +                gf_log (this->name, GF_LOG_WARNING, "failed to send the fop"); + +        GF_FREE (req.xdata.xdata_val); + +        return 0; +unwind: +        CLIENT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); +        GF_FREE (req.xdata.xdata_val); + +        return 0; +} +  /* Table Specific to FOPS */ @@ -6034,6 +6134,7 @@ rpc_clnt_procedure_t clnt3_3_fop_actors[GF_FOP_MAXVALUE] = {          [GF_FOP_READDIRP]    = { "READDIRP",    client3_3_readdirp },  	[GF_FOP_FALLOCATE]   = { "FALLOCATE",	client3_3_fallocate },  	[GF_FOP_DISCARD]     = { "DISCARD",	client3_3_discard }, +        [GF_FOP_ZEROFILL]    = { "ZEROFILL",    client3_3_zerofill},          [GF_FOP_RELEASE]     = { "RELEASE",     client3_3_release },          [GF_FOP_RELEASEDIR]  = { "RELEASEDIR",  client3_3_releasedir },          [GF_FOP_GETSPEC]     = { "GETSPEC",     client3_getspec }, @@ -6088,6 +6189,8 @@ char *clnt3_3_fop_names[GFS3_OP_MAXVALUE] = {          [GFS3_OP_FREMOVEXATTR] = "FREMOVEXATTR",  	[GFS3_OP_FALLOCATE]   = "FALLOCATE",  	[GFS3_OP_DISCARD]     = "DISCARD", +        [GFS3_OP_ZEROFILL]    = "ZEROFILL", +  };  rpc_clnt_prog_t clnt3_3_fop_prog = { diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 7703c6e8fba..1f7d13ea452 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -2031,6 +2031,42 @@ out:  }  int32_t +client_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +               size_t len, dict_t *xdata) +{ +        int          ret              = -1; +        clnt_conf_t *conf             = NULL; +        rpc_clnt_procedure_t *proc    = NULL; +        clnt_args_t  args             = {0,}; + +        conf = this->private; +        if (!conf || !conf->fops) +                goto out; + +        args.fd = fd; +        args.offset = offset; +        args.size = len; +        args.xdata = xdata; + +        proc = &conf->fops->proctable[GF_FOP_ZEROFILL]; +        if (!proc) { +                gf_log (this->name, GF_LOG_ERROR, +                        "rpc procedure not found for %s", +                        gf_fop_list[GF_FOP_ZEROFILL]); +                goto out; +        } +        if (proc->fn) +                ret = proc->fn (frame, this, &args); +out: +        if (ret) +                STACK_UNWIND_STRICT(zerofill, frame, -1, ENOTCONN, +                                    NULL, NULL, NULL); + +        return 0; +} + + +int32_t  client_getspec (call_frame_t *frame, xlator_t *this, const char *key,                  int32_t flags)  { @@ -2749,6 +2785,7 @@ struct xlator_fops fops = {          .fsetattr    = client_fsetattr,  	.fallocate   = client_fallocate,  	.discard     = client_discard, +        .zerofill    = client_zerofill,          .getspec     = client_getspec,  }; diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index 59e808b2f14..d2da9aa769c 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -1993,6 +1993,46 @@ out:          return 0;  } +int +server_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                   int32_t op_ret, int32_t op_errno, +                   struct iatt *statpre, struct iatt *statpost, dict_t *xdata) +{ +        gfs3_zerofill_rsp  rsp    = {0,}; +        server_state_t    *state  = NULL; +        rpcsvc_request_t  *req    = NULL; + +        req = frame->local; +        state  = CALL_STATE (frame); + +        GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val), +                                    rsp.xdata.xdata_len, op_errno, out); + +        if (op_ret) { +                gf_log (this->name, GF_LOG_INFO, +                        "%"PRId64": ZEROFILL%"PRId64" (%s) ==> (%s)", +                        frame->root->unique, state->resolve.fd_no, +                        uuid_utoa (state->resolve.gfid), +                        strerror (op_errno)); +                goto out; +        } + +        gf_stat_from_iatt (&rsp.statpre, statpre); +        gf_stat_from_iatt (&rsp.statpost, statpost); + +out: +        rsp.op_ret    = op_ret; +        rsp.op_errno  = gf_errno_to_error (op_errno); + +        server_submit_reply(frame, req, &rsp, NULL, 0, NULL, +                            (xdrproc_t) xdr_gfs3_zerofill_rsp); + +        GF_FREE (rsp.xdata.xdata_val); + +        return 0; +} + +  /* Resume function section */  int @@ -3019,6 +3059,28 @@ err:          return 0;  } +int +server_zerofill_resume (call_frame_t *frame, xlator_t *bound_xl) +{ +        server_state_t *state = NULL; + +        state = CALL_STATE (frame); + +        if (state->resolve.op_ret != 0) +                goto err; + +        STACK_WIND (frame, server_zerofill_cbk, +                    bound_xl, bound_xl->fops->zerofill, +                    state->fd, state->offset, state->size, state->xdata); +        return 0; +err: +        server_zerofill_cbk(frame, NULL, frame->this, state->resolve.op_ret, +                           state->resolve.op_errno, NULL, NULL, NULL); + +        return 0; +} + +  /* Fop section */ @@ -3322,6 +3384,65 @@ out:  int +server3_3_zerofill(rpcsvc_request_t *req) +{ +        server_state_t       *state      = NULL; +        call_frame_t         *frame      = NULL; +        gfs3_zerofill_req     args       = {{0},}; +        int                   ret        = -1; +        int                   op_errno   = 0; + +        if (!req) +                return ret; + +        ret = xdr_to_generic (req->msg[0], &args, +                              (xdrproc_t)xdr_gfs3_zerofill_req); +        if (ret < 0) { +                /*failed to decode msg*/; +                req->rpc_err = GARBAGE_ARGS; +                goto out; +        } + +        frame = get_frame_from_request (req); +        if (!frame) { +                /* something wrong, mostly insufficient memory*/ +                req->rpc_err = GARBAGE_ARGS; /* TODO */ +                goto out; +        } +        frame->root->op = GF_FOP_ZEROFILL; + +        state = CALL_STATE (frame); +        if (!frame->root->client->bound_xl) { +                /* auth failure, request on subvolume without setvolume */ +                req->rpc_err = GARBAGE_ARGS; +                goto out; +        } + +        state->resolve.type   = RESOLVE_MUST; +        state->resolve.fd_no  = args.fd; + +        state->offset = args.offset; +        state->size = args.size; +        memcpy(state->resolve.gfid, args.gfid, 16); + +        GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl, state->xdata, +                                      (args.xdata.xdata_val), +                                      (args.xdata.xdata_len), ret, +                                      op_errno, out); + +        ret = 0; +        resolve_and_resume (frame, server_zerofill_resume); + +out: +        free (args.xdata.xdata_val); + +        if (op_errno) +                req->rpc_err = GARBAGE_ARGS; + +        return ret; +} + +int  server3_3_readlink (rpcsvc_request_t *req)  {          server_state_t    *state                 = NULL; @@ -6040,6 +6161,7 @@ rpcsvc_actor_t glusterfs3_3_fop_actors[] = {          [GFS3_OP_FREMOVEXATTR] = {"FREMOVEXATTR", GFS3_OP_FREMOVEXATTR, server3_3_fremovexattr, NULL, 0, DRC_NA},          [GFS3_OP_FALLOCATE]    = {"FALLOCATE",    GFS3_OP_FALLOCATE,    server3_3_fallocate,    NULL, 0, DRC_NA},          [GFS3_OP_DISCARD]      = {"DISCARD",      GFS3_OP_DISCARD,      server3_3_discard,      NULL, 0, DRC_NA}, +        [GFS3_OP_ZEROFILL]    =  {"ZEROFILL",     GFS3_OP_ZEROFILL,     server3_3_zerofill,     NULL, 0, DRC_NA},  }; diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 93ece2474f9..fb45c7a6746 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -24,6 +24,7 @@  #include <ftw.h>  #include <sys/stat.h>  #include <signal.h> +#include <sys/uio.h>  #ifndef GF_BSD_HOST_OS  #include <alloca.h> @@ -616,6 +617,166 @@ out:          return ret;  } +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ +        char            *alloc_buf = NULL; +        char            *buf = NULL; + +        alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); +        if (!alloc_buf) +                goto out; +        /* page aligned buffer */ +        buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); +        *aligned_buf = buf; +out: +        return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct) +{ +        size_t              num_vect            = 0; +        int32_t             num_loop            = 1; +        int32_t             idx                 = 0; +        int32_t             op_ret              = -1; +        int32_t             vect_size           = VECTOR_SIZE; +        size_t              remain              = 0; +        size_t              extra               = 0; +        struct iovec       *vector              = NULL; +        char               *iov_base            = NULL; +        char               *alloc_buf           = NULL; + +        if (len == 0) +                return 0; +        if (len < VECTOR_SIZE) +                vect_size = len; + +        num_vect = len / (vect_size); +        remain = len % vect_size ; +        if (num_vect > MAX_NO_VECT) { +                extra = num_vect % MAX_NO_VECT; +                num_loop = num_vect / MAX_NO_VECT; +                num_vect = MAX_NO_VECT; +        } + +        vector = GF_CALLOC (num_vect, sizeof(struct iovec), +                             gf_common_mt_iovec); +        if (!vector) +                  return -1; +        if (o_direct) { +                alloc_buf = _page_aligned_alloc(vect_size, &iov_base); +                if (!alloc_buf) { +                        gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, +                                 "memory alloc failed, vect_size %d: %s", +                                  vect_size, strerror(errno)); +                        GF_FREE(vector); +                        return -1; +                } +        } else { +                iov_base = GF_CALLOC (vect_size, sizeof(char), +                                        gf_common_mt_char); +                if (!iov_base) { +                        GF_FREE(vector); +                        return -1; +                 } +        } + +        for (idx = 0; idx < num_vect; idx++) { +                vector[idx].iov_base = iov_base; +                vector[idx].iov_len  = vect_size; +        } +        lseek(fd, offset, SEEK_SET); +        for (idx = 0; idx < num_loop; idx++) { +                op_ret = writev(fd, vector, num_vect); +                if (op_ret < 0) +                        goto err; +        } +        if (extra) { +                op_ret = writev(fd, vector, extra); +                if (op_ret < 0) +                        goto err; +        } +        if (remain) { +                vector[0].iov_len = remain; +                op_ret = writev(fd, vector , 1); +                if (op_ret < 0) +                        goto err; +        } +err: +        if (o_direct) +                GF_FREE(alloc_buf); +        else +                GF_FREE(iov_base); +        GF_FREE(vector); +        return op_ret; +} + +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, +                  off_t offset, size_t len, struct iatt *statpre, +                  struct iatt *statpost) +{ +        struct posix_fd *pfd       = NULL; +        int32_t          ret       = -1; + +        DECLARE_OLD_FS_ID_VAR; + +        SET_FS_ID (frame->root->uid, frame->root->gid); + +        VALIDATE_OR_GOTO (frame, out); +        VALIDATE_OR_GOTO (this, out); +        VALIDATE_OR_GOTO (fd, out); + +        ret = posix_fd_ctx_get (fd, this, &pfd); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "pfd is NULL from fd=%p", fd); +                goto out; +        } + +        ret = posix_fdstat (this, pfd->fd, statpre); +        if (ret == -1) { +                ret = -errno; +                gf_log (this->name, GF_LOG_ERROR, +                        "pre-operation fstat failed on fd = %p: %s", fd, +                        strerror (errno)); +                goto out; +        } +        ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); +        if (ret < 0) { +                ret = -errno; +                gf_log(this->name, GF_LOG_ERROR, +                       "zerofill failed on fd %d length %ld %s", +                        pfd->fd, len, strerror(errno)); +                goto out; +        } +        if (pfd->flags & (O_SYNC|O_DSYNC)) { +                ret = fsync (pfd->fd); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "fsync() in writev on fd %d failed: %s", +                        pfd->fd, strerror (errno)); +                        ret = -errno; +                        goto out; +                } +        } + +        ret = posix_fdstat (this, pfd->fd, statpost); +        if (ret == -1) { +                ret = -errno; +                gf_log (this->name, GF_LOG_ERROR, +                        "post operation fstat failed on fd=%p: %s", fd, +                        strerror (errno)); +                goto out; +        } + +out: +        SET_TO_OLD_FS_ID (); + +        return ret; +} +  static int32_t  _posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,  		off_t offset, size_t len, dict_t *xdata) @@ -664,6 +825,28 @@ err:  } +static int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +                size_t len, dict_t *xdata) +{ +        int32_t ret                      =  0; +        struct  iatt statpre             = {0,}; +        struct  iatt statpost            = {0,}; + +        ret = posix_do_zerofill(frame, this, fd, offset, len, +                                 &statpre, &statpost); +        if (ret < 0) +                goto err; + +        STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); +        return 0; + +err: +        STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); +        return 0; + +} +  int32_t  posix_opendir (call_frame_t *frame, xlator_t *this,                 loc_t *loc, fd_t *fd, dict_t *xdata) @@ -2117,22 +2300,6 @@ err:          return op_ret;  } -char* -_page_aligned_alloc (size_t size, char **aligned_buf) -{ -        char            *alloc_buf = NULL; -        char            *buf = NULL; - -        alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); -        if (!alloc_buf) -                goto out; -        /* page aligned buffer */ -        buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); -        *aligned_buf = buf; -out: -        return alloc_buf; -} -  int32_t  __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,                  int odirect) @@ -4938,6 +5105,7 @@ struct xlator_fops fops = {          .fsetattr    = posix_fsetattr,  	.fallocate   = _posix_fallocate,  	.discard     = posix_discard, +        .zerofill    = posix_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index a957e18768c..3121db2717e 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -50,6 +50,8 @@  #include "posix-aio.h"  #endif +#define VECTOR_SIZE 64 * 1024 /* vector size 64KB*/ +#define MAX_NO_VECT 1024  /**   * posix_fd - internal structure common to file and directory fd's   */  | 
