summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2018-07-06 15:26:59 +0530
committerRavishankar N <ravishankar@redhat.com>2018-07-09 15:33:07 +0530
commit83076bc29535810dd596595198e260970847cff6 (patch)
tree07d228958cbeb7511a7f69a6bdb3e8e7885f9609
parenta4d67a1e0572a3bdb353c03eb3cff9646c6e2a5d (diff)
afr: fix bug-1363721.t failure
Backport of https://review.gluster.org/#/c/20036/ Note: We need to update inode context's write_subvol even in case of compound fops. This is not there in master and 4.1 since compound FOPS was removed in it. Problem: In the .t, when the only good brick was brought down, writes on the fd were still succeeding on the bad bricks. The inflight split-brain check was marking the write as failure but since the write succeeded on all the bad bricks, afr_txn_nothing_failed() was set to true and we were unwinding writev with success to DHT and then catching the failure in post-op in the background. Fix: Don't wind the FOP phase if the write_subvol (which is populated with readable subvols obtained in pre-op cbk) does not have at least 1 good brick which was up when the transaction started. Change-Id: I4a1fef4569609c31cffeaef591a64c10870e8d0b BUG: 1598720 Signed-off-by: Ravishankar N <ravishankar@redhat.com>
-rw-r--r--tests/bugs/replicate/bug-1363721.t10
-rw-r--r--xlators/cluster/afr/src/afr-common.c14
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c54
-rw-r--r--xlators/cluster/afr/src/afr.h3
4 files changed, 78 insertions, 3 deletions
diff --git a/tests/bugs/replicate/bug-1363721.t b/tests/bugs/replicate/bug-1363721.t
index 580d8b0..0ed34d8 100644
--- a/tests/bugs/replicate/bug-1363721.t
+++ b/tests/bugs/replicate/bug-1363721.t
@@ -18,6 +18,10 @@ function size_increased {
fi
}
+function has_write_failed {
+ local pid=$1
+ if [ -d /proc/$pid ]; then echo "N"; else echo "Y"; fi
+}
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
@@ -67,8 +71,10 @@ sleep 3
# Now kill the second brick
kill_brick $V0 $H0 $B0/${V0}2
-# At this point the write should have been failed. But make sure that the second
-# brick is never an accused.
+# At this point the write should have been failed.
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "Y" has_write_failed $dd_pid
+
+# Also make sure that the second brick is never an accused.
md5sum_2=$(md5sum $B0/${V0}2/file1 | awk '{print $1}')
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index eb266dd..2e63030 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -6508,6 +6508,20 @@ out:
return ret;
}
+uint64_t
+afr_write_subvol_get (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ uint64_t write_subvol = 0;
+
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ write_subvol = local->inode_ctx->write_subvol;
+ UNLOCK (&local->inode->lock);
+
+ return write_subvol;
+}
+
int
afr_write_subvol_set (call_frame_t *frame, xlator_t *this)
{
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 9888624..6f0c5d5 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -160,6 +160,34 @@ afr_changelog_has_quorum (afr_local_t *local, xlator_t *this)
return _gf_false;
}
+
+gf_boolean_t
+afr_is_write_subvol_valid (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t write_subvol = 0;
+ unsigned char *writable = NULL;
+ uint16_t datamap = 0;
+
+ local = frame->local;
+ priv = this->private;
+ writable = alloca0 (priv->child_count);
+
+ write_subvol = afr_write_subvol_get (frame, this);
+ datamap = (write_subvol & 0x00000000ffff0000) >> 16;
+ for (i = 0; i < priv->child_count; i++) {
+ if (datamap & (1 << i))
+ writable[i] = 1;
+
+ if (writable[i] && !local->transaction.failed_subvols[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
{
@@ -185,8 +213,16 @@ __afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
return 0;
}
- local->call_count = call_count;
+ /* Fail if at least one writeable brick isn't up.*/
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !afr_is_write_subvol_valid (frame, this)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+ local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i] && !failed_subvols[i]) {
local->transaction.wind (frame, this, i);
@@ -1291,15 +1327,31 @@ afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
void *data, dict_t *xdata)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
call_frame_t *fop_frame = NULL;
default_args_cbk_t *write_args_cbk = NULL;
compound_args_cbk_t *args_cbk = data;
int call_count = -1;
int child_index = -1;
+ int i = 0;
+ int ret = 0;
local = frame->local;
+ priv = this->private;
child_index = (long) cookie;
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !local->transaction.inherited) {
+ ret = afr_write_subvol_set (frame, this);
+ if (ret) {
+ /*act as if operation failed on all subvols*/
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
+ }
+ }
+
if (local->pre_op_compat)
afr_changelog_pre_op_update (frame, this);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 53791b0..91825aa 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1273,6 +1273,9 @@ afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);
int
__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx);
+uint64_t
+afr_write_subvol_get (call_frame_t *frame, xlator_t *this);
+
int
afr_write_subvol_set (call_frame_t *frame, xlator_t *this);