diff options
| author | Krutika Dhananjay <kdhananj@redhat.com> | 2014-04-24 16:37:05 +0530 | 
|---|---|---|
| committer | Niels de Vos <ndevos@redhat.com> | 2014-05-08 06:00:56 -0700 | 
| commit | d8014f53c22a7da2d7b38e7d3215aa83e3e51d0d (patch) | |
| tree | a5230832fc109449a2f86577fadb57748a4e049c | |
| parent | 45a0322066513259e61c7a4b1b1ed1a0bd3a0827 (diff) | |
features/locks: Remove stale entrylk objects from 'blocked_locks' list
        Backport of http://review.gluster.org/7560
* In the event of a DISCONNECT from a client, as part of cleanup,
  entrylk objects are not removed from the blocked_locks list before
  being unref'd and freed, causing the brick process to crash at
  some point when the (now) stale object is accessed again in the list.
* Also during cleanup, it is pointless to try and grant lock to a
  previously blocked entrylk (say L1) as part of releasing another
  conflicting lock (L2), (which is a side-effect of L1 not being
  deleted from blocked_locks list before grant_blocked_entry_locks()
  in cleanup) if L1 is also associated with the DISCONNECTing client.
  This patch fixes the problem.
Change-Id: Ie077f8eeb61c5505f047a8fdaac67db32e5d4270
BUG: 1089470
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: http://review.gluster.org/7576
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
| -rw-r--r-- | xlators/features/locks/src/entrylk.c | 52 | 
1 files changed, 48 insertions, 4 deletions
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index dc86512bef0..8496d9d8dce 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -715,15 +715,16 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)          pl_inode_t *pinode = NULL;          struct list_head released; +        struct list_head unwind;          INIT_LIST_HEAD (&released); +        INIT_LIST_HEAD (&unwind);  	pthread_mutex_lock (&ctx->lock);          {                  list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,  					  client_list) {                          list_del_init (&l->client_list); -			list_add_tail (&l->client_list, &released);  			pl_entrylk_log_cleanup (l); @@ -731,25 +732,68 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)  			pthread_mutex_lock (&pinode->mutex);  			{ -				list_del_init (&l->domain_list); +                        /* If the entrylk object is part of granted list but not +                         * blocked list, then perform the following actions: +                         * i.   delete the object from granted list; +                         * ii.  grant other locks (from other clients) that may +                         *      have been blocked on this entrylk; and +                         * iii. unref the object. +                         * +                         * If the entrylk object (L1) is part of both granted +                         * and blocked lists, then this means that a parallel +                         * unlock on another entrylk (L2 say) may have 'granted' +                         * L1 and added it to 'granted' list in +                         * __grant_blocked_entry_locks() (although using the +                         * 'blocked_locks' member). In that case, the cleanup +                         * codepath must try and grant other overlapping +                         * blocked entrylks from other clients, now that L1 is +                         * out of their way and then unref L1 in the end, and +                         * leave it to the other thread (the one executing +                         * unlock codepath) to unwind L1's frame, delete it from +                         * blocked_locks list, and perform the last unref on L1. +                         * +                         * If the entrylk object (L1) is part of blocked list +                         * only, the cleanup code path must: +                         * i.   delete it from the blocked_locks list inside +                         *      this critical section, +                         * ii.  unwind its frame with EAGAIN, +                         * iii. try and grant blocked entry locks from other +                         *      clients that were otherwise grantable, but were +                         *      blocked to avoid leaving L1 to starve forever. +                         * iv.  unref the object. +                         */ +                                if (!list_empty (&l->domain_list)) { +                                        list_del_init (&l->domain_list); +                                        list_add_tail (&l->client_list, +                                                       &released); +                                } else { +                                        list_del_init (&l->blocked_locks); +                                        list_add_tail (&l->client_list, +                                                       &unwind); +                                }                          }  			pthread_mutex_unlock (&pinode->mutex);                  }  	}          pthread_mutex_unlock (&ctx->lock); -        list_for_each_entry_safe (l, tmp, &released, client_list) { +        list_for_each_entry_safe (l, tmp, &unwind, client_list) {                  list_del_init (&l->client_list);  		if (l->frame)  			STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,  					     NULL); +                list_add_tail (&l->client_list, &released); +        } + +        list_for_each_entry_safe (l, tmp, &released, client_list) { +                list_del_init (&l->client_list);  		pinode = l->pinode;  		dom = get_domain (pinode, l->volume); -		grant_blocked_inode_locks (this, pinode, dom); +		grant_blocked_entry_locks (this, pinode, dom);  		pthread_mutex_lock (&pinode->mutex);  		{  | 
