diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2019-04-04 15:31:56 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2019-04-15 06:02:22 +0000 |
commit | 8239efc76b56f1f4ce16bab263f7b355d8205820 (patch) | |
tree | 4e73e82e017bcdcb6a1bec02ad85a721aebca7dd /xlators/cluster | |
parent | 798aadbe51a9a02dd98a0f861cc239ecf7c8ed57 (diff) |
cluster/afr: Remove local from owners_list on failure of lock-acquisition
When eager-lock lock acquisition fails because of say network failures, the
local is not being removed from owners_list, this leads to accumulation of
waiting frames and the application will hang because the waiting frames are
under the assumption that another transaction is in the process of acquiring
lock because owner-list is not empty. Handled this case as well in this patch.
Added asserts to make it easier to find these problems in future.
fixes bz#1696599
Change-Id: I3101393265e9827755725b1f2d94a93d8709e923
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 1 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 19 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 4 |
4 files changed, 14 insertions, 18 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9adab10c399..d671b564f47 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5770,6 +5770,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) afr_private_t *priv = NULL; priv = this->private; + INIT_LIST_HEAD(&local->transaction.wait_list); + INIT_LIST_HEAD(&local->transaction.owner_list); + INIT_LIST_HEAD(&local->ta_waitq); + INIT_LIST_HEAD(&local->ta_onwireq); ret = afr_internal_lock_init(&local->internal_lock, priv->child_count); if (ret < 0) goto out; @@ -5807,10 +5811,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) goto out; ret = 0; - INIT_LIST_HEAD(&local->transaction.wait_list); - INIT_LIST_HEAD(&local->transaction.owner_list); - INIT_LIST_HEAD(&local->ta_waitq); - INIT_LIST_HEAD(&local->ta_onwireq); out: return ret; } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 4091671fa3f..bc8eabe0f43 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this) int_lock->lk_call_count = call_count; if (!call_count) { - GF_ASSERT(!local->transaction.do_eager_unlock); gf_msg_trace(this->name, 0, "No internal locks unlocked"); int_lock->lock_cbk(frame, this); goto out; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index defea0eb95c..8c59fd252aa 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this) } local->transaction.unwind(frame, this); + GF_ASSERT(list_empty(&local->transaction.owner_list)); + GF_ASSERT(list_empty(&local->transaction.wait_list)); AFR_STACK_DESTROY(frame); return 0; @@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list) } static void -afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) +afr_handle_lock_acquire_failure(afr_local_t *local) { struct list_head shared; afr_lock_t *lock = NULL; @@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) afr_lock_fail_shared(local, &shared); local->transaction.do_eager_unlock = _gf_true; out: - if (locked) { - local->internal_lock.lock_cbk = afr_transaction_done; - afr_unlock(local->transaction.frame, local->transaction.frame->this); - } else { - afr_transaction_done(local->transaction.frame, - local->transaction.frame->this); - } + local->internal_lock.lock_cbk = afr_transaction_done; + afr_unlock(local->transaction.frame, local->transaction.frame->this); } call_frame_t * @@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this) failure_count = AFR_COUNT(local->transaction.failed_subvols, priv->child_count); if (failure_count == priv->child_count) { - afr_handle_lock_acquire_failure(local, _gf_true); + afr_handle_lock_acquire_failure(local); return 0; } else { lock = &local->inode_ctx->lock[local->transaction.type]; @@ -2092,7 +2089,7 @@ err: local->op_ret = -1; local->op_errno = op_errno; - afr_handle_lock_acquire_failure(local, _gf_true); + afr_handle_lock_acquire_failure(local); if (xdata_req) dict_unref(xdata_req); @@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this) } else { lock = &local->inode_ctx->lock[local->transaction.type]; if (local->internal_lock.lock_op_ret < 0) { - afr_handle_lock_acquire_failure(local, _gf_false); + afr_handle_lock_acquire_failure(local); } else { lock->event_generation = local->event_generation; afr_changelog_pre_op(frame, this); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index a044b2f0dc3..33e78c15c6c 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1089,8 +1089,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd); #define AFR_FRAME_INIT(frame, op_errno) \ ({ \ frame->local = mem_get0(THIS->local_pool); \ - if (afr_local_init(frame->local, THIS->private, &op_errno)) { \ - afr_local_cleanup(frame->local, THIS); \ + if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \ + afr_local_cleanup(frame->local, frame->this); \ mem_put(frame->local); \ frame->local = NULL; \ }; \ |