summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2019-04-04 15:31:56 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2019-04-16 11:29:03 +0000
commit7ec3a8527f2fb341fc1f6e54ded36b157e4904fe (patch)
tree0c16a7755de080529481173f7e39adf8519fe966 /xlators/cluster/afr
parentd2de3f66393d4f737e21eedab4200563a60f8bcc (diff)
cluster/afr: Remove local from owners_list on failure of lock-acquisition
When eager-lock lock acquisition fails because of say network failures, the local is not being removed from owners_list, this leads to accumulation of waiting frames and the application will hang because the waiting frames are under the assumption that another transaction is in the process of acquiring lock because owner-list is not empty. Handled this case as well in this patch. Added asserts to make it easier to find these problems in future. fixes bz#1699731 Change-Id: I3101393265e9827755725b1f2d94a93d8709e923 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-common.c8
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c19
-rw-r--r--xlators/cluster/afr/src/afr.h4
4 files changed, 14 insertions, 18 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 56f4833b36c..8e8d7eb7473 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -5773,6 +5773,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
afr_private_t *priv = NULL;
priv = this->private;
+ INIT_LIST_HEAD(&local->transaction.wait_list);
+ INIT_LIST_HEAD(&local->transaction.owner_list);
+ INIT_LIST_HEAD(&local->ta_waitq);
+ INIT_LIST_HEAD(&local->ta_onwireq);
ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
if (ret < 0)
goto out;
@@ -5810,10 +5814,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
goto out;
ret = 0;
- INIT_LIST_HEAD(&local->transaction.wait_list);
- INIT_LIST_HEAD(&local->transaction.owner_list);
- INIT_LIST_HEAD(&local->ta_waitq);
- INIT_LIST_HEAD(&local->ta_onwireq);
out:
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 4091671fa3f..bc8eabe0f43 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this)
int_lock->lk_call_count = call_count;
if (!call_count) {
- GF_ASSERT(!local->transaction.do_eager_unlock);
gf_msg_trace(this->name, 0, "No internal locks unlocked");
int_lock->lock_cbk(frame, this);
goto out;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 229820ba26a..15f3a7efa91 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this)
}
local->transaction.unwind(frame, this);
+ GF_ASSERT(list_empty(&local->transaction.owner_list));
+ GF_ASSERT(list_empty(&local->transaction.wait_list));
AFR_STACK_DESTROY(frame);
return 0;
@@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
}
static void
-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
+afr_handle_lock_acquire_failure(afr_local_t *local)
{
struct list_head shared;
afr_lock_t *lock = NULL;
@@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
afr_lock_fail_shared(local, &shared);
local->transaction.do_eager_unlock = _gf_true;
out:
- if (locked) {
- local->internal_lock.lock_cbk = afr_transaction_done;
- afr_unlock(local->transaction.frame, local->transaction.frame->this);
- } else {
- afr_transaction_done(local->transaction.frame,
- local->transaction.frame->this);
- }
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ afr_unlock(local->transaction.frame, local->transaction.frame->this);
}
call_frame_t *
@@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
failure_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
if (failure_count == priv->child_count) {
- afr_handle_lock_acquire_failure(local, _gf_true);
+ afr_handle_lock_acquire_failure(local);
return 0;
} else {
lock = &local->inode_ctx->lock[local->transaction.type];
@@ -2092,7 +2089,7 @@ err:
local->op_ret = -1;
local->op_errno = op_errno;
- afr_handle_lock_acquire_failure(local, _gf_true);
+ afr_handle_lock_acquire_failure(local);
if (xdata_req)
dict_unref(xdata_req);
@@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
} else {
lock = &local->inode_ctx->lock[local->transaction.type];
if (local->internal_lock.lock_op_ret < 0) {
- afr_handle_lock_acquire_failure(local, _gf_false);
+ afr_handle_lock_acquire_failure(local);
} else {
lock->event_generation = local->event_generation;
afr_changelog_pre_op(frame, this);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index b76788b693d..6f91c9172f1 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1092,8 +1092,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
#define AFR_FRAME_INIT(frame, op_errno) \
({ \
frame->local = mem_get0(THIS->local_pool); \
- if (afr_local_init(frame->local, THIS->private, &op_errno)) { \
- afr_local_cleanup(frame->local, THIS); \
+ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \
+ afr_local_cleanup(frame->local, frame->this); \
mem_put(frame->local); \
frame->local = NULL; \
}; \