From 7b3971ad0152eb1bb89a982333970118a6bd4922 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 4 Sep 2019 12:06:34 +0530 Subject: cluster/ec: Fail fsync/flush for files on update size/version failure Problem: If update size/version is not successful on the file, updates on the same stripe could lead to data corruptions if the earlier un-aligned write is not successful on all the bricks. Application won't have any knowledge of this because update size/version happens in the background. Fix: Fail fsync/flush on fds that are opened before update-size-version went bad. fixes: bz#1748836 Change-Id: I9d323eddcda703bd27d55f340c4079d76e06e492 Signed-off-by: Pranith Kumar K --- xlators/cluster/ec/src/ec-common.c | 23 ++++++++++++++++++ xlators/cluster/ec/src/ec-generic.c | 47 ++++++++++++++++++++++++++++++++++++ xlators/cluster/ec/src/ec-helpers.c | 7 ++++++ xlators/cluster/ec/src/ec-messages.h | 2 +- xlators/cluster/ec/src/ec-types.h | 2 ++ 5 files changed, 80 insertions(+), 1 deletion(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 1c829a09e1f..e243b8ba5d9 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -2255,6 +2255,23 @@ ec_unlock_lock(ec_lock_link_t *link) } } +void +ec_inode_bad_inc(inode_t *inode, xlator_t *xl) +{ + ec_inode_t *ctx = NULL; + + LOCK(&inode->lock); + { + ctx = __ec_inode_get(inode, xl); + if (ctx == NULL) { + goto unlock; + } + ctx->bad_version++; + } +unlock: + UNLOCK(&inode->lock); +} + int32_t ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xattr, @@ -2270,6 +2287,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, ctx = lock->ctx; if (op_ret < 0) { + if (link->lock->fd == NULL) { + ec_inode_bad_inc(link->lock->loc.inode, this); + } else { + ec_inode_bad_inc(link->lock->fd->inode, this); + } + gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno, EC_MSG_SIZE_VERS_UPDATE_FAIL, "Failed to update version and size. %s", ec_msg_str(fop)); diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c index 6e7bd4bc7b2..4cd47a83b84 100644 --- a/xlators/cluster/ec/src/ec-generic.c +++ b/xlators/cluster/ec/src/ec-generic.c @@ -147,6 +147,37 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state) } } +static int32_t +ec_validate_fd(fd_t *fd, xlator_t *xl) +{ + uint64_t iversion = 0; + uint64_t fversion = 0; + ec_inode_t *inode_ctx = NULL; + ec_fd_t *fd_ctx = NULL; + + LOCK(&fd->lock); + { + fd_ctx = __ec_fd_get(fd, xl); + if (fd_ctx) { + fversion = fd_ctx->bad_version; + } + } + UNLOCK(&fd->lock); + + LOCK(&fd->inode->lock); + { + inode_ctx = __ec_inode_get(fd->inode, xl); + if (inode_ctx) { + iversion = inode_ctx->bad_version; + } + } + UNLOCK(&fd->inode->lock); + if (fversion < iversion) { + return EBADF; + } + return 0; +} + void ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, @@ -162,6 +193,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); + error = ec_validate_fd(fd, this); + if (error) { + gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, + "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], + fd->inode ? uuid_utoa(fd->inode->gfid) : ""); + goto out; + } + fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, ec_wind_flush, ec_manager_flush, callback, data); if (fop == NULL) { @@ -378,6 +417,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); + error = ec_validate_fd(fd, this); + if (error) { + gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, + "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], + fd->inode ? uuid_utoa(fd->inode->gfid) : ""); + goto out; + } + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, ec_wind_fsync, ec_manager_fsync, callback, data); if (fop == NULL) { diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 43f6e3b69d2..baac001d169 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -753,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) { int i = 0; ec_fd_t *ctx = NULL; + ec_inode_t *ictx = NULL; uint64_t value = 0; ec_t *ec = xl->private; @@ -775,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) GF_FREE(ctx); return NULL; } + /* Only refering bad-version so no need for lock + * */ + ictx = __ec_inode_get(fd->inode, xl); + if (ictx) { + ctx->bad_version = ictx->bad_version; + } } } else { ctx = (ec_fd_t *)(uintptr_t)value; diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h index ce299bb61be..72e98f11286 100644 --- a/xlators/cluster/ec/src/ec-messages.h +++ b/xlators/cluster/ec/src/ec-messages.h @@ -56,6 +56,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, - EC_MSG_THREAD_CLEANUP_FAILED); + EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD); #endif /* !_EC_MESSAGES_H_ */ diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 4dbf4a3a0aa..b93a07aba40 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -150,6 +150,7 @@ struct _ec_fd { loc_t loc; uintptr_t open; int32_t flags; + uint64_t bad_version; ec_fd_status_t fd_status[0]; }; @@ -180,6 +181,7 @@ struct _ec_inode { uint64_t dirty[2]; struct list_head heal; ec_stripe_list_t stripe_cache; + uint64_t bad_version; }; typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, -- cgit