From 985a1d15db910e012ddc1dcdc2e333cc28a9968b Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Fri, 18 May 2018 15:38:29 +0530 Subject: afr: fix bug-1363721.t failure Problem: In the .t, when the only good brick was brought down, writes on the fd were still succeeding on the bad bricks. The inflight split-brain check was marking the write as failure but since the write succeeded on all the bad bricks, afr_txn_nothing_failed() was set to true and we were unwinding writev with success to DHT and then catching the failure in post-op in the background. Fix: Don't wind the FOP phase if the write_subvol (which is populated with readable subvols obtained in pre-op cbk) does not have at least 1 good brick which was up when the transaction started. Note: This fix is not related to brick muliplexing. I ran the .t 10 times with this fix and brick-mux enabled without any failures. Change-Id: I915c9c366aa32cd342b1565827ca2d83cb02ae85 updates: bz#1577672 Signed-off-by: Ravishankar N --- xlators/cluster/afr/src/afr-common.c | 14 ++++++++++++ xlators/cluster/afr/src/afr-transaction.c | 38 +++++++++++++++++++++++++++++++ xlators/cluster/afr/src/afr.h | 3 +++ 3 files changed, 55 insertions(+) (limited to 'xlators/cluster/afr') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index c313294961f..ad037cd7060 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -6631,6 +6631,20 @@ out: return ret; } +uint64_t +afr_write_subvol_get (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + uint64_t write_subvol = 0; + + local = frame->local; + LOCK(&local->inode->lock); + write_subvol = local->inode_ctx->write_subvol; + UNLOCK (&local->inode->lock); + + return write_subvol; +} + int afr_write_subvol_set (call_frame_t *frame, xlator_t *this) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index b4d3062fc2b..9c0963781e3 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -167,6 +167,34 @@ afr_changelog_has_quorum (afr_local_t *local, xlator_t *this) return _gf_false; } + +gf_boolean_t +afr_is_write_subvol_valid (call_frame_t *frame, xlator_t *this) +{ + int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + uint64_t write_subvol = 0; + unsigned char *writable = NULL; + uint16_t datamap = 0; + + local = frame->local; + priv = this->private; + writable = alloca0 (priv->child_count); + + write_subvol = afr_write_subvol_get (frame, this); + datamap = (write_subvol & 0x00000000ffff0000) >> 16; + for (i = 0; i < priv->child_count; i++) { + if (datamap & (1 << i)) + writable[i] = 1; + + if (writable[i] && !local->transaction.failed_subvols[i]) + return _gf_true; + } + + return _gf_false; +} + int afr_transaction_fop (call_frame_t *frame, xlator_t *this) { @@ -189,6 +217,16 @@ afr_transaction_fop (call_frame_t *frame, xlator_t *this) afr_transaction_resume (frame, this); return 0; } + + /* Fail if at least one writeable brick isn't up.*/ + if (local->transaction.type == AFR_DATA_TRANSACTION && + !afr_is_write_subvol_valid (frame, this)) { + local->op_ret = -1; + local->op_errno = EIO; + afr_transaction_resume (frame, this); + return 0; + } + local->call_count = call_count; for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i] && !failed_subvols[i]) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c76b0c1c485..68087e0ea20 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1218,6 +1218,9 @@ afr_is_symmetric_error (call_frame_t *frame, xlator_t *this); int __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx); +uint64_t +afr_write_subvol_get (call_frame_t *frame, xlator_t *this); + int afr_write_subvol_set (call_frame_t *frame, xlator_t *this); -- cgit