From c36ced27f2bae003ddafe0d5a408424ba57af243 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 3 Dec 2012 15:50:39 -0500 Subject: afr: make flush non-transactional Flush is historically a transaction to ensure all previous writes were complete. This is no longer required as write-behind has learned to make flush a barrier operation (re: conversation w/ Avati). Flush taking a full file lock causes VMs running on afr volumes to stall when a migration occurs and self-heal is in progress. Make afr_flush() a non-transactional operation. BUG: 874045 Change-Id: Ie287b79e7f300df88aca6030e2d80311772746bf Signed-off-by: Brian Foster Reviewed-on: https://code.engineering.redhat.com/gerrit/1912 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 167 ++++++------------------------ xlators/cluster/afr/src/afr-transaction.c | 3 - xlators/cluster/afr/src/afr-transaction.h | 4 + 3 files changed, 38 insertions(+), 136 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 98aba9fd492..d653a3ec73b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2308,137 +2308,45 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd) /* {{{ flush */ int -afr_flush_unwind (call_frame_t *frame, xlator_t *this) +afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; - - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (flush, main_frame, - local->op_ret, local->op_errno, - NULL); - } - - return 0; -} - - -int -afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - afr_local_t * local = NULL; - afr_private_t * priv = NULL; int call_count = -1; - int child_index = (long) cookie; - int need_unwind = 0; local = frame->local; - priv = this->private; LOCK (&frame->lock); { - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - if (op_ret != -1) { if (local->success_count == 0) { local->op_ret = op_ret; } local->success_count++; - - if (local->success_count == priv->wait_count) { - need_unwind = 1; - } } local->op_errno = op_errno; } UNLOCK (&frame->lock); - if (need_unwind) - afr_flush_unwind (frame, this); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - local->transaction.resume (frame, this); - } - - return 0; -} - - -int -afr_flush_wind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = -1; - - local = frame->local; - priv = this->private; - - call_count = afr_up_children_count (local->child_up, priv->child_count); + call_count = afr_frame_return (frame); - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_flush_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->flush, - local->fd, NULL); - - if (!--call_count) - break; - } - } - - return 0; -} - - -int -afr_flush_done (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - local->transaction.unwind (frame, this); - - AFR_STACK_DESTROY (frame); + if (call_count == 0) + AFR_STACK_UNWIND(flush, frame, local->op_ret, + local->op_errno, NULL); return 0; } - int afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { afr_private_t *priv = NULL; afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; int ret = -1; int op_errno = 0; + int call_count = -1; + int i = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -2446,47 +2354,40 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) priv = this->private; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - goto out; - } - - AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out); - local = transaction_frame->local; - - ret = afr_local_init (local, priv, &op_errno); - if (ret < 0) - goto out; + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; - local->op = GF_FOP_FLUSH; + ret = afr_local_init(local, priv, &op_errno); + if (ret < 0) + goto out; - local->transaction.fop = afr_flush_wind; - local->transaction.done = afr_flush_done; - local->transaction.unwind = afr_flush_unwind; + local->fd = fd_ref(fd); + call_count = local->call_count; - local->fd = fd_ref (fd); + /* + * Ideally we should synchronize flush against completion of writing + * the delayed changelog, but for now we just push it out first... + */ + afr_delayed_changelog_wake_up(this, fd); - local->transaction.main_frame = frame; - local->transaction.start = 0; - local->transaction.len = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_flush_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->flush, + local->fd, NULL); - ret = afr_open_fd_fix (transaction_frame, this, _gf_false); - if (ret) { - op_errno = -ret; - goto out; - } - afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + if (!--call_count) + break; + } + } + ret = 0; - ret = 0; out: - if (ret < 0) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - - AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL); - } + if (ret < 0) + AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL); return 0; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 33b87780dda..b29e11fafc4 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1284,9 +1284,6 @@ out: void afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd); -void -afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd); - void afr_delayed_changelog_wake_up_cbk (void *data) { diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 69a8c1b1659..0f1f054059e 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -27,4 +27,8 @@ int afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending); void afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this); + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd); + #endif /* __TRANSACTION_H__ */ -- cgit