From 4cc44e3f128bf0eb90e5634c634ff5eabeaeda60 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Tue, 16 Jun 2020 10:47:47 +0530 Subject: afr: more quorum checks in lookup and new entry marking Problem: See github issue for details. Fix: -In lookup if the entry exists in 2 out of 3 bricks, don't fail the lookup with ENOENT just because there is an entrylk on the parent. Consider quorum before deciding. -If entry FOP does not succeed on quorum no. of bricks, do not perform new entry mark. Fixes: #1303 Change-Id: I56df8c89ad53b29fa450c7930a7b7ccec9f4a6c5 Signed-off-by: Ravishankar N (cherry picked from commit c4a6748f25d2c1ab3ebcf89952278ebf94c8d371) --- ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 2 -- xlators/cluster/afr/src/afr-common.c | 24 ++++++++++++---------- xlators/cluster/afr/src/afr-dir-write.c | 8 ++++++++ xlators/cluster/afr/src/afr.h | 4 ++++ 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t index 26f90497d6f..49c4dea4e9c 100644 --- a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t +++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t @@ -53,8 +53,6 @@ TEST ! ls $B0/${V0}1/file$i TEST ls $B0/${V0}2/file$i dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2) TEST [ "$dirty" != "000000000000000000000000" ] -EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i -EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i TEST $CLI volume set $V0 self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index de4c306a0f4..8959a9c503d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1236,7 +1236,7 @@ refresh_done: return 0; } -static void +void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) { @@ -2295,6 +2295,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) 0, }; gf_boolean_t locked_entry = _gf_false; + gf_boolean_t in_flight_create = _gf_false; gf_boolean_t can_interpret = _gf_true; inode_t *parent = NULL; ia_type_t ia_type = IA_INVAL; @@ -2338,17 +2339,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) if (!replies[i].valid) continue; - if (locked_entry && replies[i].op_ret == -1 && - replies[i].op_errno == ENOENT) { - /* Second, check entry is still - "underway" in creation */ - local->op_ret = -1; - local->op_errno = ENOENT; - goto error; - } - - if (replies[i].op_ret == -1) + if (replies[i].op_ret == -1) { + if (locked_entry && replies[i].op_errno == ENOENT) { + in_flight_create = _gf_true; + } continue; + } if (read_subvol == -1 || !readable[read_subvol]) { read_subvol = i; @@ -2358,6 +2354,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) } } + if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) { + local->op_ret = -1; + local->op_errno = ENOENT; + goto error; + } + if (read_subvol == -1) goto error; /* We now have a read_subvol, which is readable[] (if there diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 84e2a344624..416c19d688f 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -349,6 +349,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int pre_op_count = 0; int failed_count = 0; + unsigned char *success_replies = NULL; local = frame->local; priv = this->private; @@ -364,9 +365,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) failed_count = AFR_COUNT(local->transaction.failed_subvols, priv->child_count); + /* FOP succeeded on all bricks. */ if (pre_op_count == priv->child_count && !failed_count) return; + /* FOP did not suceed on quorum no. of bricks. */ + success_replies = alloca0(priv->child_count); + afr_fill_success_replies(local, priv, success_replies); + if (!afr_has_quorum(success_replies, this, NULL)) + return; + if (priv->thin_arbiter_count) { /*Mark new entry using ta file*/ local->is_new_entry = _gf_true; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 600c7551801..e777b10ee2b 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1341,4 +1341,8 @@ afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); void afr_selfheal_childup(xlator_t *this, afr_private_t *priv); + +void +afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies); #endif /* __AFR_H__ */ -- cgit