From 1df75bc835a75f17fa0fcd3722ef68f05f5f3200 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 30 Jul 2012 21:01:21 +0530 Subject: cluster/afr: Handle child_up & fd not opened case in xaction RCA: When an fd is opened while a brick is down, after the brick comes back up afr issues open on the other brick. It can fail for a number of reasons (enoent etc). While the system is in that state, inode/entrylks pre-op happen only on the brick that is up and fd is opened for fd-fops. post-op should consider only the bricks where both pre-op and fop succeeded as success, rest of them as failures. Code now marks only the children that are down as failures as opposed to child_down & fd-not-opened. This makes change-log appear as success on the subvolume where we did not do any fop leading to no change-log but differences in data/metadata for reg-files. Fix: Mark non-participants of fop as failure. This is tracked in transaction.pre_op[]. Tests: Simulated the scenario using err-gen on top of one of the client xlator which fails all fops always. Performed fops and the changelog represented pending fops on the brick with err-gen loaded. Tested the case of brick down and perform entry/metadata/data operations to confirm they still work as expected. Change-Id: I41905936126b19abba56ca581c0301a894507e1a BUG: 844987 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.com/3776 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-transaction.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index e0729fbf6dc..b21e14a4b36 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -175,16 +175,16 @@ out: static void -__mark_down_children (int32_t *pending[], int child_count, - unsigned char *child_up, afr_transaction_type type) +__mark_non_participant_children (int32_t *pending[], int child_count, + unsigned char *participants, + afr_transaction_type type) { int i = 0; int j = 0; + j = afr_index_for_transaction_type (type); for (i = 0; i < child_count; i++) { - j = afr_index_for_transaction_type (type); - - if (!child_up[i]) + if (!participants[i]) pending[i][j] = 0; } } @@ -511,8 +511,9 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) local = frame->local; int_lock = &local->internal_lock; - __mark_down_children (local->pending, priv->child_count, - local->child_up, local->transaction.type); + __mark_non_participant_children (local->pending, priv->child_count, + local->transaction.pre_op, + local->transaction.type); if (local->fd) afr_transaction_rm_stale_children (frame, this, -- cgit