From c30bd3ca40fe2235ab46f3cd1e76ed64ba0cd8e4 Mon Sep 17 00:00:00 2001 From: karthik-us Date: Thu, 21 May 2020 15:18:59 +0530 Subject: cluster/afr: Prioritize ENOSPC over other errors Problem: In a replicate/arbiter volume if file creations or writes fails on quorum number of bricks and on one brick it is due to ENOSPC and on other brick it fails for a different reason, it may fail with errors other than ENOSPC in some cases. Fix: Prioritize ENOSPC over other lesser priority errors and do not set op_errno in posix_gfid_set if op_ret is 0 to avoid receiving any error_no which can be misinterpreted by __afr_dir_write_finalize(). Also removing the function afr_has_arbiter_fop_cbk_quorum() which might consider a successful reply form a single brick as quorum success in some cases, whereas we always need fop to be successful on quorum number of bricks in arbiter configuration. Change-Id: I106e267f8b9451f681022f1cccb410d9bc824c08 Fixes: #1254 Signed-off-by: karthik-us (cherry picked from commit fa63b45ca5edf172b1b89b28b5db3c5129cc57b6) --- xlators/cluster/afr/src/afr-common.c | 4 ++- xlators/cluster/afr/src/afr-transaction.c | 48 ++----------------------------- 2 files changed, 5 insertions(+), 47 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 8959a9c503d..ecad217d0c4 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2471,7 +2471,7 @@ error: * others in that they must be given higher priority while * returning to the user. * - * The hierarchy is ENODATA > ENOENT > ESTALE > others + * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others */ int @@ -2483,6 +2483,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno) return ENOENT; if (old_errno == ESTALE || new_errno == ESTALE) return ESTALE; + if (old_errno == ENOSPC || new_errno == ENOSPC) + return ENOSPC; return new_errno; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index bdc4bfc0b10..87d68d40161 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -521,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) local->transaction.pre_op_sources[j] = 0; } -gf_boolean_t -afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - xlator_t *this = NULL; - gf_boolean_t fop_failed = _gf_false; - unsigned char *pre_op_sources = NULL; - int i = 0; - - local = frame->local; - this = frame->this; - priv = this->private; - pre_op_sources = local->transaction.pre_op_sources; - - /* If the fop failed on the brick, it is not a source. */ - for (i = 0; i < priv->child_count; i++) - if (local->transaction.failed_subvols[i]) - pre_op_sources[i] = 0; - - switch (AFR_COUNT(pre_op_sources, priv->child_count)) { - case 1: - if (pre_op_sources[ARBITER_BRICK_INDEX]) - fop_failed = _gf_true; - break; - case 0: - fop_failed = _gf_true; - break; - } - - if (fop_failed) - return _gf_false; - - return _gf_true; -} - void afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) { @@ -971,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) priv->child_count) return _gf_false; - if (priv->arbiter_count) { - if (!afr_has_arbiter_fop_cbk_quorum(frame)) - need_dirty = _gf_true; - } else if (!afr_has_fop_cbk_quorum(frame)) { + if (!afr_has_fop_cbk_quorum(frame)) need_dirty = _gf_true; - } return need_dirty; } @@ -1026,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this) * no split-brain with the fix. The problem is eliminated completely. */ - if (priv->arbiter_count) { - if (afr_has_arbiter_fop_cbk_quorum(frame)) - return; - } else if (afr_has_fop_cbk_quorum(frame)) { + if (afr_has_fop_cbk_quorum(frame)) return; - } if (afr_need_dirty_marking(frame, this)) goto set_response; -- cgit