From 864ac6b7b3d69b5f2cc0fafe4b12d861da3a633c Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 2 Apr 2013 00:24:45 +0530 Subject: cluster/afr: prevent piggyback on stale pre_op Here are the logs of a file on which we saw EIO because of size mismatch: [root@lizzie ~]# grep 38f18204 /var/log/glusterfs/mnt-x-.log Reporting Unstable write for 38f18204-2840-408e-ae65-c01f4106b8c4 for offset: 0, len: 7680 Cleared unstable write flag for 38f18204-2840-408e-ae65-c01f4106b8c4: offset 0 length 7680 Reporting Unstable write for 38f18204-2840-408e-ae65-c01f4106b8c4 for offset: 7680, len: 71680 Reporting Unstable write for 38f18204-2840-408e-ae65-c01f4106b8c4 for offset: 79360, len: 15716 fsync completed on 38f18204-2840-408e-ae65-c01f4106b8c4 for offset 0 length 7680 with changelog status: -1 -1 According to these logs fsync did not happen after writev with offset: 79360, len: 15716. Which is the reason for this problem. In total 3 writes came. lets call them w1, w2, w3 w1 does pre_op so pre_op_done[0], pre_op_done[1] counts become 1 and 1 then is_piggyback_post_op() is called for w1 and it returns *false* w1's fsync is fired Now w2 and w3 come and see that pre_op_done[0], pre_op_done[1] are both 1, so pre_op_piggyback[0] and pre_op_piggyback[1] are both incremented twice, once by w2, one more time by w3 and become 2, 2 ------- Step-A Now fsync of w1 is complete and it goes ahead with post op and decrements pre_op_done[0], pre_op_done[1] to 0, 0 Now w2, w3 writevs complete and is_piggyback_post_op will return *true* for both w2, w3. So fsync is not fired for both w2, w3 this patch prevents Step-A from happening. Change-Id: I8b6af1f1875b2cf5f718caa3c16ee7ff3dc96b5c BUG: 927146 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/4752 Tested-by: Gluster Build System Reviewed-by: Jeff Darcy --- xlators/cluster/afr/src/afr-transaction.c | 35 ++----------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) (limited to 'xlators/cluster/afr/src/afr-transaction.c') diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index e766b65c..50c15c34 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -146,36 +146,6 @@ out: return; } - -static void -__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index) -{ - afr_local_t *local = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - - local = frame->local; - - if (!local->fd) - return; - - fd_ctx = afr_fd_ctx_get (local->fd, this); - - if (!fd_ctx) - goto out; - - LOCK (&local->fd->lock); - { - if (local->transaction.type == AFR_DATA_TRANSACTION) { - GF_ASSERT (fd_ctx->pre_op_done[child_index]); - fd_ctx->pre_op_done[child_index]--; - } - } - UNLOCK (&local->fd->lock); -out: - return; -} - - static void __mark_non_participant_children (int32_t *pending[], int child_count, unsigned char *participants, @@ -691,9 +661,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) afr_changelog_post_op_cbk (frame, (void *)(long)i, this, 1, 0, xattr[i], NULL); } else { - if (!piggyback) - __mark_pre_op_undone_on_fd (frame, this, - i); STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk, (void *) (long) i, @@ -1392,6 +1359,8 @@ is_piggyback_post_op (call_frame_t *frame, fd_t *fd) if necesssary */ piggyback = _gf_false; + GF_ASSERT (fdctx->pre_op_done[i]); + fdctx->pre_op_done[i]--; } } } -- cgit