summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-common.c
diff options
context:
space:
mode:
authorkarthik-us <ksubrahm@redhat.com>2017-08-16 17:26:48 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-11-27 18:13:32 +0000
commit2475bfb4d25b20c20bc806771857a2de443835a2 (patch)
treec8d3fe9d34e845372aa136c2c72b5a1c1e09d80e /xlators/cluster/afr/src/afr-common.c
parentae88dc134a89fa4af4ff2356abbf32f6f220dd53 (diff)
cluster/afr: Fix for arbiter becoming source
Problem: When eager-lock is on, and two writes happen in parallel on a FD we were observing the following behaviour: - First write fails on one data brick - Since the post-op is not yet happened, the inode refresh will get both the data bricks as readable and set it in the inode context - In flight split brain check see both the data bricks as readable and allows the second write - Second write fails on the other data brick - Now the post-op happens and marks both the data bricks as bad and arbiter will become source for healing Fix: Adding one more variable called write_suvol in inode context and it will have the in memory representation of the writable subvols. Inode refresh will not update this value and its lifetime is pre-op through unlock in the afr transaction. Initially the pre-op will set this value same as read_subvol in inode context and then in the in flight split brain check we will use this value instead of read_subvol. After all the checks we will update the value of this and set the read_subvol same as this to avoid having incorrect value in that. Change-Id: I2ef6904524ab91af861d59690974bbc529ab1af3 BUG: 1516313 Signed-off-by: karthik-us <ksubrahm@redhat.com> (cherry picked from commit 19f9bcff4aada589d4321356c2670ed283f02c03)
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-common.c76
1 files changed, 75 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e0468a4006c..2702ed31de0 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -147,6 +147,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
}
tmp_ctx->spb_choice = -1;
tmp_ctx->read_subvol = 0;
+ tmp_ctx->write_subvol = 0;
} else {
tmp_ctx = (afr_inode_ctx_t *) ctx_int;
}
@@ -214,7 +215,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
if (ret < 0)
return ret;
- val = ctx->read_subvol;
+ val = ctx->write_subvol;
metadatamap_old = metadatamap = (val & 0x000000000000ffff);
datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
@@ -275,6 +276,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
(((uint64_t) datamap) << 16) |
(((uint64_t) event) << 32);
+ ctx->write_subvol = val;
ctx->read_subvol = val;
return ret;
@@ -6562,3 +6564,75 @@ afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
out:
return ret;
}
+
+int
+afr_write_subvol_set (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ uint64_t val = 0;
+ uint64_t val1 = 0;
+ int ret = -1;
+
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, local->inode, &ctx);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_GET_FAILED,
+ "ERROR GETTING INODE CTX");
+ UNLOCK(&local->inode->lock);
+ return ret;
+ }
+
+ val = ctx->write_subvol;
+ /*
+ * We need to set the value of write_subvol to read_subvol in 2
+ * cases:
+ * 1. Initially when the value is 0. i.e., it's the first lock
+ * request.
+ * 2. If it's a metadata transaction. If metadata transactions
+ * comes in between data transactions and we have a brick
+ * disconnect, the next metadata transaction won't get the
+ * latest value of readables, since we do resetting of
+ * write_subvol in unlock code path only if it's a data
+ * transaction. To handle those scenarios we need to set the
+ * value of write_subvol to read_subvol in case of metadata
+ * transactions.
+ */
+ if (val == 0 ||
+ local->transaction.type == AFR_METADATA_TRANSACTION) {
+ val1 = ctx->read_subvol;
+ ctx->write_subvol = val1;
+ }
+ }
+ UNLOCK (&local->inode->lock);
+
+ return 0;
+}
+
+int
+afr_write_subvol_reset (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, local->inode, &ctx);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_GET_FAILED,
+ "ERROR GETTING INODE CTX");
+ UNLOCK(&local->inode->lock);
+ return ret;
+ }
+ ctx->write_subvol = 0;
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
+}