summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-lk-common.c
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2013-01-25 19:22:55 -0800
committerAnand Avati <avati@redhat.com>2013-01-26 00:20:03 -0800
commit1939519979b6c4da110f08434fe63d0b03332d52 (patch)
treebf5a91507513474fdfcde7e2e3bfe2760d0d8166 /xlators/cluster/afr/src/afr-lk-common.c
parentd8d138300b18fc2291a8a73059ac1d888141d6db (diff)
replicate: fix lock counting in blocking lock path
As of http://review.gluster.org/2828, the blocking lock code path's condition for checking completion of locking atempt is broken. The condition - if ((child_index == priv->child_count) || ...) and if ((child_index == priv->child_count) && ...) which is retained to check completion of blocking lock attempts for DATA/METADATA transaction will _always_ fail because a few lines above we have - child_index = cookie % priv->child_count; So child_index will never equal priv->child_count. This leaves the correctness at the mercy of the next part of the conditional - .. (int_lock->lock_count == int_lock->lk_expected_count) .. This "works" as long as no server went down during the transaction. If a server goes down in the middle of the transaction, then this condition also fails, and the code wraps around and starts a blocking lock attempt loop all the way again from from the first server. This results in double locks getting acquired on those servers, and eventually the second condition gets hit (first condition is _never_ hit) and we come out of locking phase. During unlock phase we perform only one unlock per server leaving the other lock "leaked" forever. Change-Id: I7189cdf3f70901b04647516fe1d1e189f36cc8dd BUG: 765564 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.org/4433 Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-lk-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c9
1 files changed, 4 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index ef183d985cb..802f91f5fa7 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -913,6 +913,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -1072,7 +1074,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
child_index++;
}
- if ((child_index == priv->child_count) &&
+ if ((int_lock->lk_expected_count == int_lock->lk_attempted_count) &&
((afr_is_entrylk (int_lock, local->transaction.type) &&
int_lock->entrylk_lock_count == 0) ||
(int_lock->lock_count == 0))){
@@ -1091,10 +1093,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
}
- if ((child_index == priv->child_count) ||
- (int_lock->entrylk_lock_count == int_lock->lk_expected_count) ||
- (int_lock->lock_count == int_lock->lk_expected_count)) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,