From 8909c28c1173e10fd2f10706bd8a0f2ca5b5d685 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Wed, 27 Mar 2013 19:55:58 -0700 Subject: cluster/afr: fsync() guarantees POST-OP completion AFR now provides a stronger guarantee that fsync() returns only after completely finishing all the deferred/delayed POST-OP on that open file. To acheive this we make a stub out of the returning fsync and register it with the "delayed" frame in afr_changelog_wake_resume(). The delayed frame, after getting woken up and finishing the POST-OP will call_resume() the registered stub (which UNWINDs the fsync) at the time of frame destruction. This provides a guarantee that an application's (or FUSE) fsync() returns only after finishing up all the previous transactions, including delayed POST-OPs and UNLOCK. Change-Id: Iaa955457e2f25088a144fde37ad0444277b5cf49 BUG: 927146 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/4737 Tested-by: Gluster Build System Reviewed-by: Pranith Kumar Karampuri --- xlators/cluster/afr/src/afr-common.c | 29 +++++++++++++++++++++++------ xlators/cluster/afr/src/afr-transaction.c | 30 ++++++++++++++++++++++++++---- xlators/cluster/afr/src/afr.h | 7 +++++++ 3 files changed, 56 insertions(+), 10 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 5b96a7789..c55adc1ee 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -852,6 +852,11 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) loc_wipe (&local->transaction.new_parent_loc); GF_FREE (local->transaction.postop_piggybacked); + + if (local->transaction.resume_stub) { + call_resume (local->transaction.resume_stub); + local->transaction.resume_stub = NULL; + } } @@ -2643,6 +2648,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int call_count = -1; int child_index = (long) cookie; int read_child = 0; + call_stub_t *stub = NULL; local = frame->local; @@ -2677,18 +2683,29 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { + /* Make a stub out of the frame, and register it + with the waking up post-op. When the call-stub resumes, + we are guaranteed that there was no post-op pending + (i.e changelogs were unset in the server). This is an + essential "guarantee", that fsync() returns only after + completely finishing EVERYTHING, including the delayed + post-op. This guarantee is expected by FUSE graph switching + for example. + */ + stub = fop_fsync_cbk_stub (frame, default_fsync_cbk, op_ret, + op_errno, prebuf, postbuf, xdata); + if (!stub) { + AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + /* If no new unstable writes happened between the time we cleared the unstable write witness flag in afr_fsync and now, calling afr_delayed_changelog_wake_up() should wake up and skip over the fsync phase and go straight to afr_changelog_post_op_now() */ - afr_delayed_changelog_wake_up (this, local->fd); - - AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, - &local->cont.fsync.prebuf, - &local->cont.fsync.postbuf, - NULL); + afr_delayed_changelog_wake_resume (this, local->fd, stub); } return 0; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 76697f06b..f632a2b70 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1339,7 +1339,8 @@ out: void -afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd); +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd, + call_stub_t *stub); void afr_delayed_changelog_wake_up_cbk (void *data) @@ -1579,12 +1580,14 @@ afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this) void -afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd) +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd, + call_stub_t *stub) { afr_fd_ctx_t *fd_ctx = NULL; call_frame_t *prev_frame = NULL; struct timeval delta = {0, }; afr_private_t *priv = NULL; + afr_local_t *local = NULL; priv = this->private; @@ -1613,7 +1616,11 @@ unlock: pthread_mutex_unlock (&fd_ctx->delay_lock); if (prev_frame) { + local = prev_frame->local; + local->transaction.resume_stub = stub; afr_changelog_post_op_safe (prev_frame, this); + } else { + call_resume (stub); } } @@ -1626,16 +1633,31 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) local = frame->local; if (is_afr_delayed_changelog_post_op_needed (frame, this)) - afr_delayed_changelog_post_op (this, frame, local->fd); + afr_delayed_changelog_post_op (this, frame, local->fd, NULL); else afr_changelog_post_op_safe (frame, this); } + +/* Wake up the sleeping/delayed post-op, and also register + a stub to have it resumed after this transaction + completely finishes. + + The @stub gets saved in @local and gets resumed in + afr_local_cleanup() +*/ +void +afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub) +{ + afr_delayed_changelog_post_op (this, NULL, fd, stub); +} + + void afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) { - afr_delayed_changelog_post_op (this, NULL, fd); + afr_delayed_changelog_post_op (this, NULL, fd, NULL); } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 5d9f752b9..878dbd7ba 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -673,6 +673,10 @@ typedef struct _afr_local { */ int *postop_piggybacked; + /* stub to resume on destruction + of the transaction frame */ + call_stub_t *resume_stub; + int32_t **txn_changelog;//changelog after pre+post ops unsigned char *pre_op; @@ -1102,4 +1106,7 @@ afr_fd_report_unstable_write (xlator_t *this, fd_t *fd); gf_boolean_t afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd); +void +afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub); + #endif /* __AFR_H__ */ -- cgit