From 63160cb952fe7716a3313ce5ee32f890fe4d7a0c Mon Sep 17 00:00:00 2001 From: Sunil Kumar Acharya Date: Wed, 14 Jun 2017 16:28:40 +0530 Subject: cluster/ec: Implement DISCARD FOP for EC Updates #254 This code change implements DISCARD FOP support for EC. BUG: 1461018 Change-Id: I09a9cb2aa9d91ec27add4f422dc9074af5b8b2db Signed-off-by: Sunil Kumar Acharya --- xlators/cluster/ec/src/ec-common.h | 3 +- xlators/cluster/ec/src/ec-fops.h | 4 + xlators/cluster/ec/src/ec-helpers.h | 5 +- xlators/cluster/ec/src/ec-inode-write.c | 365 ++++++++++++++++++++++++++++---- xlators/cluster/ec/src/ec.c | 3 +- 5 files changed, 332 insertions(+), 48 deletions(-) (limited to 'xlators/cluster/ec') diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 2744c6372a1..e3468333cc4 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -85,6 +85,8 @@ void ec_update_good(ec_fop_data_t *fop, uintptr_t good); void ec_fop_set_error(ec_fop_data_t *fop, int32_t error); +void __ec_fop_set_error(ec_fop_data_t *fop, int32_t error); + ec_cbk_data_t * ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro); @@ -133,5 +135,4 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec, ec_heal_need_t *need_heal); int32_t ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict); - #endif /* __EC_COMMON_H__ */ diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h index fab22d8240d..4a926cf4802 100644 --- a/xlators/cluster/ec/src/ec-fops.h +++ b/xlators/cluster/ec/src/ec-fops.h @@ -172,6 +172,10 @@ void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, int32_t mode, off_t offset, size_t len, dict_t *xdata); +void ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, + off_t offset, size_t len, dict_t *xdata); + void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t * loc, off_t offset, dict_t * xdata); diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index cfd7daaa5c2..a8f153a395d 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -178,8 +178,5 @@ ec_is_data_fop (glusterfs_fop_t fop); int32_t ec_launch_replace_heal (ec_t *ec); -/* -gf_boolean_t -ec_is_metadata_fop (glusterfs_fop_t fop); -*/ + #endif /* __EC_HELPERS_H__ */ diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c index e6a67cf67bc..ae5120226e3 100644 --- a/xlators/cluster/ec/src/ec-inode-write.c +++ b/xlators/cluster/ec/src/ec-inode-write.c @@ -19,6 +19,97 @@ #include "ec-method.h" #include "ec-fops.h" +int32_t +ec_update_writev_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + ec_fop_data_t *fop = cookie; + ec_cbk_data_t *cbk = NULL; + ec_fop_data_t *parent = fop->parent; + int i = 0; + + ec_trace("UPDATE_WRITEV_CBK", cookie, "ret=%d, errno=%d, parent-fop=%s", + op_ret, op_errno, ec_fop_name (parent->id)); + + if (op_ret < 0) { + ec_fop_set_error (parent, op_errno); + goto out; + } + cbk = ec_cbk_data_allocate (parent->frame, this, parent, + parent->id, 0, op_ret, op_errno); + if (!cbk) { + ec_fop_set_error (parent, ENOMEM); + goto out; + } + + if (xdata) + cbk->xdata = dict_ref (xdata); + + if (prebuf) + cbk->iatt[i++] = *prebuf; + + if (postbuf) + cbk->iatt[i++] = *postbuf; + + LOCK (&parent->lock); + { + parent->good &= fop->good; + + if (gf_bits_count (parent->good) < parent->minimum) { + __ec_fop_set_error (parent, EIO); + } else if (fop->error == 0 && parent->answer == NULL) { + parent->answer = cbk; + } + } + UNLOCK (&parent->lock); +out: + return 0; +} + +int32_t ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, + size_t size) +{ + struct iobref *iobref = NULL; + struct iobuf *iobuf = NULL; + struct iovec vector; + int32_t err = -ENOMEM; + + iobref = iobref_new(); + if (iobref == NULL) { + goto out; + } + iobuf = iobuf_get(fop->xl->ctx->iobuf_pool); + if (iobuf == NULL) { + goto out; + } + err = iobref_add(iobref, iobuf); + if (err != 0) { + goto out; + } + + vector.iov_base = iobuf->ptr; + vector.iov_len = size; + memset(vector.iov_base, 0, vector.iov_len); + + ec_writev(fop->frame, fop->xl, mask, fop->minimum, + ec_update_writev_cbk, NULL, fop->fd, &vector, 1, + offset, 0, iobref, NULL); + + err = 0; + +out: + if (iobuf != NULL) { + iobuf_unref(iobuf); + } + if (iobref != NULL) { + iobref_unref(iobref); + } + + return err; +} + int ec_inode_write_cbk (call_frame_t *frame, xlator_t *this, void *cookie, int op_ret, int op_errno, struct iatt *prestat, @@ -1034,62 +1125,252 @@ out: } } -int32_t -ec_truncate_writev_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +/********************************************************************* + * + * File Operation : Discard + * + *********************************************************************/ +void ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask) { - ec_fop_data_t *fop = cookie; + ec_t *ec = fop->xl->private; + off_t off_head = 0; + off_t off_tail = 0; + size_t size_head = 0; + size_t size_tail = 0; + int error = 0; + + off_head = fop->offset * ec->fragments - fop->int32; + if (fop->size == 0) { + error = ec_update_write (fop, mask, off_head, fop->user_size); + } else { + size_head = fop->int32; + size_tail = (fop->user_size - fop->int32) % ec->stripe_size; + off_tail = off_head + fop->user_size - size_tail; + if (size_head) { + error = ec_update_write (fop, mask, off_head, size_head); + goto out; + } + if (size_tail) { + error = ec_update_write (fop, mask, off_tail, size_tail); + } + } +out: + if (error) + ec_fop_set_error (fop, -error); +} - fop->parent->good &= fop->good; - ec_trace("TRUNCATE_WRITEV_CBK", cookie, "ret=%d, errno=%d", - op_ret, op_errno); - return 0; +void ec_discard_adjust_offset_size(ec_fop_data_t *fop) +{ + ec_t *ec = fop->xl->private; + + fop->user_size = fop->size; + /* If discard length covers atleast a fragment on brick, we will + * perform discard operation(when fop->size is non-zero) else we just + * write zeros. + */ + fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true); + if (fop->size < fop->int32) { + fop->size = 0; + } else { + fop->size -= fop->int32; + ec_adjust_size_down(ec, &fop->size, _gf_true); + } } -int32_t ec_truncate_write(ec_fop_data_t * fop, uintptr_t mask) +int32_t ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - ec_t * ec = fop->xl->private; - struct iobref * iobref = NULL; - struct iobuf * iobuf = NULL; - struct iovec vector; - int32_t err = -ENOMEM; + return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, + prebuf, postbuf, xdata); +} - iobref = iobref_new(); - if (iobref == NULL) { - goto out; - } - iobuf = iobuf_get(fop->xl->ctx->iobuf_pool); - if (iobuf == NULL) { - goto out; +void ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx) +{ + ec_trace("WIND", fop, "idx=%d", idx); + + STACK_WIND_COOKIE(fop->frame, ec_discard_cbk, (void *)(uintptr_t)idx, + ec->xl_list[idx], ec->xl_list[idx]->fops->discard, + fop->fd, fop->offset, fop->size, fop->xdata); +} + +int32_t ec_manager_discard(ec_fop_data_t *fop, int32_t state) +{ + ec_cbk_data_t *cbk = NULL; + off_t fl_start = 0; + size_t fl_size = 0; + + + switch (state) { + case EC_STATE_INIT: + if ((fop->size <= 0) || (fop->offset < 0)) { + ec_fop_set_error(fop, EINVAL); + return EC_STATE_REPORT; + } + /* Because of the head/tail writes, "discard" happens on the remaining + * regions, but we need to compute region including head/tail writes + * so compute them separately*/ + fl_start = fop->offset; + fl_size = fop->size; + fl_size += ec_adjust_offset_down (fop->xl->private, &fl_start, + _gf_true); + ec_adjust_size_up (fop->xl->private, &fl_size, _gf_true); + + ec_discard_adjust_offset_size(fop); + + /* Fall through */ + + case EC_STATE_LOCK: + ec_lock_prepare_fd(fop, fop->fd, + EC_UPDATE_DATA | EC_UPDATE_META | + EC_QUERY_INFO, fl_start, fl_size); + ec_lock(fop); + + return EC_STATE_DISPATCH; + + case EC_STATE_DISPATCH: + + /* Dispatch discard fop only if we have whole fragment + * to deallocate */ + if (fop->size) { + ec_dispatch_all(fop); + return EC_STATE_DELAYED_START; + } else { + /*Assume discard to have succeeded on mask*/ + fop->good = fop->mask; + } + + /* Fall through */ + + case EC_STATE_DELAYED_START: + + if (fop->size) { + if (fop->answer && fop->answer->op_ret == 0) + ec_update_discard_write (fop, fop->answer->mask); + } else { + ec_update_discard_write (fop, fop->mask); + } + + return EC_STATE_PREPARE_ANSWER; + + case EC_STATE_PREPARE_ANSWER: + cbk = ec_fop_prepare_answer(fop, _gf_false); + if (cbk != NULL) { + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, + cbk->count); + + /* This shouldn't fail because we have the inode locked. */ + GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, + &cbk->iatt[0].ia_size)); + + cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; + } + return EC_STATE_REPORT; + + case EC_STATE_REPORT: + cbk = fop->answer; + + GF_ASSERT(cbk != NULL); + + if (fop->cbks.discard != NULL) { + fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], + cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; + + case -EC_STATE_INIT: + case -EC_STATE_LOCK: + case -EC_STATE_DISPATCH: + case -EC_STATE_DELAYED_START: + case -EC_STATE_PREPARE_ANSWER: + case -EC_STATE_REPORT: + GF_ASSERT(fop->error != 0); + + if (fop->cbks.discard != NULL) { + fop->cbks.discard(fop->req_frame, fop, fop->xl, -1, + fop->error, NULL, NULL, NULL); + } + + return EC_STATE_LOCK_REUSE; + + case -EC_STATE_LOCK_REUSE: + case EC_STATE_LOCK_REUSE: + ec_lock_reuse(fop); + + return EC_STATE_UNLOCK; + + case -EC_STATE_UNLOCK: + case EC_STATE_UNLOCK: + ec_unlock(fop); + + return EC_STATE_END; + + default: + gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, + EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", + state, ec_fop_name(fop->id)); + + return EC_STATE_END; } - err = iobref_add(iobref, iobuf); - if (err != 0) { +} + +void ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) +{ + ec_cbk_t callback = { .discard = func }; + ec_fop_data_t *fop = NULL; + int32_t error = ENOMEM; + + gf_msg_trace ("ec", 0, "EC(DISCARD) %p", frame); + + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, + minimum, ec_wind_discard, ec_manager_discard, + callback, data); + if (fop == NULL) { goto out; } - vector.iov_base = iobuf->ptr; - vector.iov_len = fop->offset * ec->fragments - fop->user_size; - memset(vector.iov_base, 0, vector.iov_len); + fop->use_fd = 1; + fop->offset = offset; + fop->size = len; - iobuf_unref (iobuf); - iobuf = NULL; + if (fd != NULL) { + fop->fd = fd_ref(fd); + } - ec_writev(fop->frame, fop->xl, mask, fop->minimum, ec_truncate_writev_cbk, - NULL, fop->fd, &vector, 1, fop->user_size, 0, iobref, NULL); + if (xdata != NULL) { + fop->xdata = dict_ref(xdata); + } - err = 0; + error = 0; out: - if (iobuf != NULL) { - iobuf_unref(iobuf); - } - if (iobref != NULL) { - iobref_unref(iobref); + if (fop != NULL) { + ec_manager(fop, error); + } else { + func(frame, NULL, this, -1, error, NULL, NULL, NULL); } +} - return err; +/********************************************************************* + * + * File Operation : truncate + * + *********************************************************************/ + +int32_t ec_update_truncate_write (ec_fop_data_t *fop, uintptr_t mask) +{ + ec_t *ec = fop->xl->private; + size_t size = fop->offset * ec->fragments - fop->user_size; + return ec_update_write (fop, mask, fop->user_size, size); } int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie, @@ -1102,9 +1383,9 @@ int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie, fop->parent->good &= fop->good; if (op_ret >= 0) { fd_bind (fd); - err = ec_truncate_write(fop->parent, fop->answer->mask); + err = ec_update_truncate_write (fop->parent, fop->answer->mask); if (err != 0) { - fop->error = -err; + ec_fop_set_error (fop->parent, -err); } } @@ -1125,7 +1406,7 @@ int32_t ec_truncate_clean(ec_fop_data_t * fop) return 0; } else { - return ec_truncate_write(fop, fop->answer->mask); + return ec_update_truncate_write (fop, fop->answer->mask); } } diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 856d60c00c9..09c5fa83eb9 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -729,7 +729,8 @@ int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc, int32_t ec_gf_discard(call_frame_t * frame, xlator_t * this, fd_t * fd, off_t offset, size_t len, dict_t * xdata) { - default_discard_failure_cbk(frame, ENOTSUP); + ec_discard(frame, this, -1, EC_MINIMUM_MIN, default_discard_cbk, + NULL, fd, offset, len, xdata); return 0; } -- cgit