summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2018-07-09 15:10:04 +0530
committerjiffin tony Thottan <jthottan@redhat.com>2018-07-11 14:04:25 +0000
commitc530352ff2f5dc96631154f2b9a193d99cedd9bc (patch)
tree0a09cfbc4198c5e2830ad00344df6ea3178345e8
parentc1758216a39232e5592e0e34999fed48420fc5ac (diff)
afr: don't update readables if inode refresh failed on all children
Backport of: https://review.gluster.org/#/c/20029/ 3.12 still supports quorum-reads, hence modified afr_inode_refresh_done() to support that. If inode refresh failed on all children of afr due to ENOENT (say file migrated by dht), it resets the readables to zero. Any inflight txn which then later comes on the inode fails with EIO because no readable children present for the inode. Fix: Don't update readables when inode refresh fails on *all* children of afr. In that way any inflight txns will either proceed with its own inode refresh if needed and fail it with the right errno or use the old value of readables and continue with the txn. Also, add quorum checks to the beginning of afr_transaction(). Otherwise, we seem to be winding the lock and checking for quorum only in pre-op pahse. Note: This should ideally fix BZ 1329505 since the stop gap fix for it is has been reverted at https://review.gluster.org/#/c/20028. Change-Id: I82990769f01be918a073fec83fc67ba4b3be24b1 BUG: 1599247 Signed-off-by: Ravishankar N <ravishankar@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c58
-rw-r--r--xlators/cluster/afr/src/afr-open.c2
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c7
4 files changed, 52 insertions, 21 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2e63030189a..0ffd59d65e2 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1145,7 +1145,7 @@ afr_inode_refresh_err (call_frame_t *frame, xlator_t *this)
err = afr_final_errno (local, priv);
ret:
- return -err;
+ return err;
}
gf_boolean_t
@@ -1195,31 +1195,31 @@ afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
if (ret == -EIO || (local->is_read_txn && !event_generation)) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
- err = -EIO;
+ err = EIO;
goto refresh_done;
}
read_subvol = afr_sh_get_fav_by_policy (this, local->replies,
inode, NULL);
if (read_subvol == -1) {
- err = -EIO;
+ err = EIO;
goto refresh_done;
}
heal_frame = copy_frame (frame);
if (!heal_frame) {
- err = -EIO;
+ err = EIO;
goto refresh_done;
}
heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
if (!heal_local) {
- err = -EIO;
+ err = EIO;
AFR_STACK_DESTROY (heal_frame);
goto refresh_done;
}
heal_local->xdata_req = dict_new();
if (!heal_local->xdata_req) {
- err = -EIO;
+ err = EIO;
AFR_STACK_DESTROY (heal_frame);
goto refresh_done;
}
@@ -1239,29 +1239,54 @@ refresh_done:
return 0;
}
+static void
+afr_fill_success_replies (afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0)
+ replies[i] = 1;
+ }
+}
+
int
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error)
{
call_frame_t *heal_frame = NULL;
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
gf_boolean_t start_heal = _gf_false;
afr_local_t *heal_local = NULL;
+ unsigned char *success_replies = NULL;
int op_errno = ENOMEM;
int ret = 0;
- int err = 0;
if (error != 0) {
- err = error;
goto refresh_done;
}
local = frame->local;
+ priv = this->private;
+ success_replies = alloca0 (priv->child_count);
+ afr_fill_success_replies (local, priv, success_replies);
+
+ if (local->op == GF_FOP_LOOKUP ||
+ (local->is_read_txn && !priv->quorum_reads))
+ goto interpret;
+ if (!afr_has_quorum (success_replies, this)) {
+ error = afr_final_errno (frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
+
+interpret:
ret = afr_replies_interpret (frame, this, local->refreshinode,
&start_heal);
- err = afr_inode_refresh_err (frame, this);
-
if (ret && afr_selfheal_enabled (this) && start_heal) {
heal_frame = copy_frame (frame);
if (!heal_frame)
@@ -1281,7 +1306,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error)
}
refresh_done:
- afr_txn_refresh_done (frame, this, err);
+ afr_txn_refresh_done (frame, this, error);
return 0;
}
@@ -1295,7 +1320,7 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_child = (long) cookie;
int8_t need_heal = 1;
int call_count = 0;
- GF_UNUSED int ret = 0;
+ int ret = 0;
local = frame->local;
local->replies[call_child].valid = 1;
@@ -1318,7 +1343,8 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
afr_set_need_heal (this, local);
- afr_inode_refresh_done (frame, this, 0);
+ ret = afr_inode_refresh_err (frame, this);
+ afr_inode_refresh_done (frame, this, ret);
}
}
@@ -2786,7 +2812,7 @@ afr_discover_done (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int i = -1;
+ int i = -1;
int op_errno = 0;
int spb_choice = -1;
int read_subvol = -1;
@@ -2904,7 +2930,7 @@ afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
priv = this->private;
if (err) {
- local->op_errno = -err;
+ local->op_errno = err;
ret = -1;
goto out;
}
@@ -3019,7 +3045,7 @@ afr_lookup_do (call_frame_t *frame, xlator_t *this, int err)
priv = this->private;
if (err < 0) {
- local->op_errno = -err;
+ local->op_errno = err;
ret = -1;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 6c625ccf7da..70dc4928044 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -123,7 +123,7 @@ afr_open_continue (call_frame_t *frame, xlator_t *this, int err)
priv = this->private;
if (err) {
- AFR_STACK_UNWIND (open, frame, -1, -err, NULL, NULL);
+ AFR_STACK_UNWIND (open, frame, -1, err, NULL, NULL);
} else {
local->call_count = AFR_COUNT (local->child_up,
priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 50e8040d33e..3e930a9db37 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -68,7 +68,7 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
read_subvol = afr_read_subvol_select_by_policy (inode, this,
local->readable, NULL);
if (read_subvol == -1) {
- err = -EIO;
+ err = EIO;
goto readfn;
}
@@ -87,7 +87,7 @@ readfn:
}
if (read_subvol == -1) {
- AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN (-1, -err);
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN (-1, err);
}
local->readfn (frame, this, read_subvol);
@@ -196,7 +196,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (priv->quorum_reads &&
priv->quorum_count && !afr_has_quorum (priv->child_up, this)) {
local->op_ret = -1;
- local->op_errno = ENOTCONN;
+ local->op_errno = afr_quorum_errno(priv);
read_subvol = -1;
goto read;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 6f0c5d58f62..b590a1f5d51 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2677,7 +2677,7 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
afr_local_t *local = frame->local;
if (err) {
- AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, -err);
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
goto fail;
}
@@ -2703,6 +2703,11 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
local->transaction.resume = afr_transaction_resume;
local->transaction.type = type;
+ if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible (local, priv, &ret)) {
ret = -ret; /*op_errno to ret conversion*/
goto out;