diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2019-08-20 13:27:24 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2019-09-12 06:36:47 +0000 | 
| commit | 6362999974cc4b55c4c42929b22a2f2f53321699 (patch) | |
| tree | 8aa5377d797b1cd82c4d9a0c79853ac39d9e10a8 | |
| parent | 880e20ca967d0001e0b6d2780bece8b635a090eb (diff) | |
cluster/ec: Mark release only when it is acquired
Problem:
Mount-1                                Mount-2
1)Tries to acquire lock on 'dir1'   1)Tries to acquire lock on 'dir1'
2)Lock is granted on brick-0        2)Lock gets EAGAIN on brick-0 and
				      leads to blocking lock on brick-0
3)Gets a lock-contention            3) Doesn't matter what happens on mount-2
  notification, marks lock->release    from here on.
  to true.
4)New fop comes on 'dir1' which will
  be put in frozen list as lock->release
  is set to true.
5) Lock acquisition from step-2 fails because
3 bricks went down in 4+2 setup.
Fop on mount-1 which is put in frozen list will hang because no codepath will
move it from frozen list to any other list and the lock will not be retried.
Fix:
Don't set lock->release to true if lock is not acquired at the time of
lock-contention-notification
fixes: bz#1743573
Change-Id: Ie6630db8735ccf372cc54b873a3a3aed7a6082b7
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 20 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 1 | 
2 files changed, 19 insertions, 2 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index dea987ef319..9045a336c56 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -1867,6 +1867,10 @@ ec_lock_acquired(ec_lock_link_t *link)      LOCK(&lock->loc.inode->lock);      lock->acquired = _gf_true; +    if (lock->contention) { +        lock->release = _gf_true; +        lock->contention = _gf_false; +    }      ec_lock_update_fd(lock, fop);      ec_lock_wake_shared(lock, &list); @@ -1892,15 +1896,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,      ec_lock_link_t *link = NULL;      ec_lock_t *lock = NULL; +    link = fop->data; +    lock = link->lock;      if (op_ret >= 0) { -        link = fop->data; -        lock = link->lock;          lock->mask = lock->good_mask = fop->good;          lock->healing = 0;          ec_lock_acquired(link);          ec_lock(fop->parent);      } else { +        LOCK(&lock->loc.inode->lock); +        { +            lock->contention = _gf_false; +        } +        UNLOCK(&lock->loc.inode->lock);          gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,                 "Failed to complete preop lock");      } @@ -2547,6 +2556,13 @@ ec_lock_release(ec_t *ec, inode_t *inode)      gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",                   inode); +    if (!lock->acquired) { +        /* This happens if some bricks already got the lock while inodelk is in +         * progress.  Set release to true after lock is acquired*/ +        lock->contention = _gf_true; +        goto done; +    } +      /* The lock is not marked to be released, so the frozen list should be       * empty. */      GF_ASSERT(list_empty(&lock->frozen)); diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 9c790380d4d..2568b6b3223 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -267,6 +267,7 @@ struct _ec_lock {      uint32_t refs_pending;  /* Refs assigned to fops being prepared */      uint32_t waiting_flags; /*Track xattrop/dirty marking*/      gf_boolean_t acquired; +    gf_boolean_t contention;      gf_boolean_t unlock_now;      gf_boolean_t release;      gf_boolean_t query;  | 
