From a6d313d12c98cf533c6bbb10f491dd2ec48ca89c Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Wed, 19 Apr 2017 16:40:05 +0530 Subject: afr: don't do a post-op on a brick if op failed Problem: In afr-v2, self-blaming xattrs are not there by design. But if the FOP failed on a brick due to an error other than ENOTCONN (or even due to ENOTCONN, but we regained connection before postop was wound), we wind the post-op also on the failed brick, leading to setting self-blaming xattrs on that brick. This can lead to undesired results like healing of files in split-brain etc. Fix: If a fop failed on a brick on which pre-op was successful, do not perform post-op on it. This also produces the desired effect of not resetting the dirty xattr on the brick, which is how it should be because if the fop failed on a brick, there is no reason to clear the dirty bit which actually serves as an indication of the failure. > Reviewed-on: https://review.gluster.org/16976 > Smoke: Gluster Build System > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System > Reviewed-by: Pranith Kumar Karampuri Change-Id: I5f1caf4d1b39f36cf8093ccef940118638caa9c4 BUG: 1443319 Signed-off-by: Ravishankar N Reviewed-on: https://review.gluster.org/17082 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Pranith Kumar Karampuri --- xlators/cluster/afr/src/afr-transaction.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 8178fc0d18b..83e25f3a122 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -588,11 +588,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock) int afr_changelog_call_count (afr_transaction_type type, unsigned char *pre_op_subvols, + unsigned char *failed_subvols, unsigned int child_count) { + int i = 0; int call_count = 0; - call_count = AFR_COUNT(pre_op_subvols, child_count); + for (i = 0; i < child_count; i++) { + if (pre_op_subvols[i] && !failed_subvols[i]) { + call_count++; + } + } if (type == AFR_ENTRY_RENAME_TRANSACTION) call_count *= 2; @@ -1244,6 +1250,7 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, call_count = afr_changelog_call_count (local->transaction.type, local->transaction.pre_op, + local->transaction.failed_subvols, priv->child_count); if (call_count == 0) { @@ -1257,7 +1264,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, local->transaction.changelog_resume = changelog_resume; for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i]) + if (!local->transaction.pre_op[i] || + local->transaction.failed_subvols[i]) continue; switch (local->transaction.type) { -- cgit