From 5888a89fa8950be38ed3c5b000a37013f6656031 Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Thu, 5 Jun 2014 09:22:34 +0530 Subject: features/locks: Clean up logging of cleanup in DISCONNECT codepath Backport of http://review.gluster.org/7981 Now, gfid is printed as opposed to path in cleanup messages. Also, refkeeper update is eliminated in inodelk and entrylk. Instead, the patch ensures inode and pl_inode are kept alive as long as there is atleast one lock (granted/blocked) on an inode. Also, every inode is unref'd appropriately on a DISCONNECT from the lock-owning client. Change-Id: I234db688ad0d314f4936a16cc5af70a3bd071970 BUG: 1104915 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/8042 Tested-by: Gluster Build System Reviewed-by: Pranith Kumar Karampuri Reviewed-by: Niels de Vos --- xlators/features/locks/src/common.c | 17 +------ xlators/features/locks/src/entrylk.c | 89 ++++++++++++++++++++++++------------ xlators/features/locks/src/inodelk.c | 85 +++++++++++++++++++++++----------- xlators/features/locks/src/locks.h | 4 ++ 4 files changed, 126 insertions(+), 69 deletions(-) (limited to 'xlators/features/locks/src') diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index f6c71c1cf86..e0a5f7c0533 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -116,21 +116,7 @@ fd_from_fdnum (posix_lock_t *lock) int __pl_inode_is_empty (pl_inode_t *pl_inode) { - pl_dom_list_t *dom = NULL; - int is_empty = 1; - - if (!list_empty (&pl_inode->ext_list)) - is_empty = 0; - - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (!list_empty (&dom->entrylk_list)) - is_empty = 0; - - if (!list_empty (&dom->inodelk_list)) - is_empty = 0; - } - - return is_empty; + return (list_empty (&pl_inode->ext_list)); } void @@ -451,6 +437,7 @@ pl_inode_get (xlator_t *this, inode_t *inode) INIT_LIST_HEAD (&pl_inode->reservelk_list); INIT_LIST_HEAD (&pl_inode->blocked_reservelks); INIT_LIST_HEAD (&pl_inode->blocked_calls); + uuid_copy (pl_inode->gfid, inode->gfid); __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); } diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 8496d9d8dce..c00a7124658 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -517,18 +517,19 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - char unwind = 1; - GF_UNUSED int dict_ret = -1; - pl_inode_t *pinode = NULL; - pl_entry_lock_t *reqlock = NULL; - pl_entry_lock_t *unlocked = NULL; - pl_dom_list_t *dom = NULL; - char *conn_id = NULL; - pl_ctx_t *ctx = NULL; - int nonblock = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + char unwind = 1; + GF_UNUSED int dict_ret = -1; + pl_inode_t *pinode = NULL; + pl_entry_lock_t *reqlock = NULL; + pl_entry_lock_t *unlocked = NULL; + pl_dom_list_t *dom = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + int nonblock = 0; + gf_boolean_t need_inode_unref = _gf_false; if (xdata) dict_ret = dict_get_str (xdata, "connection-id", &conn_id); @@ -564,6 +565,25 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, goto unwind; } + /* Ideally, AFTER a successful lock (both blocking and non-blocking) or + * an unsuccessful blocking lock operation, the inode needs to be ref'd. + * + * But doing so might give room to a race where the lock-requesting + * client could send a DISCONNECT just before this thread refs the inode + * after the locking is done, and the epoll thread could unref the inode + * in cleanup which means the inode's refcount would come down to 0, and + * the call to pl_forget() at this point destroys @pinode. Now when + * the io-thread executing this function tries to access pinode, + * it could crash on account of illegal memory access. + * + * To get around this problem, the inode is ref'd once even before + * adding the lock into client_list as a precautionary measure. + * This way even if there are DISCONNECTs, there will always be 1 extra + * ref on the inode, so @pinode is still alive until after the + * current stack unwinds. + */ + pinode->inode = inode_ref (inode); + switch (cmd) { case ENTRYLK_LOCK_NB: nonblock = 1; @@ -593,6 +613,13 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, } else { __pl_entrylk_unref (reqlock); } + + /* For all but the case where a non-blocking lock + * attempt fails, the extra ref taken before the switch + * block must be negated. + */ + if ((ret == -EAGAIN) && (nonblock)) + need_inode_unref = _gf_true; } pthread_mutex_unlock (&pinode->mutex); if (ctx) @@ -604,6 +631,12 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, pthread_mutex_lock (&ctx->lock); pthread_mutex_lock (&pinode->mutex); { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done before the switch + * block must be negated. Towards this, + * @need_inode_unref flag is set unconditionally here. + */ + need_inode_unref = _gf_true; unlocked = __unlock_entrylk (dom, reqlock); if (unlocked) { list_del_init (&unlocked->client_list); @@ -623,13 +656,22 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, break; default: + inode_unref (pinode->inode); gf_log (this->name, GF_LOG_ERROR, "Unexpected case in entrylk (cmd=%d). Please file" "a bug report at http://bugs.gluster.com", cmd); goto out; } + if (need_inode_unref) + inode_unref (pinode->inode); + + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ + if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0)) + inode_unref (pinode->inode); + out: - pl_update_refkeeper (this, inode); if (unwind) { entrylk_trace_out (this, frame, volume, fd, loc, basename, @@ -684,24 +726,14 @@ static void pl_entrylk_log_cleanup (pl_entry_lock_t *lock) { pl_inode_t *pinode = NULL; - char *path = NULL; - char *file = NULL; pinode = lock->pinode; - inode_path (pinode->refkeeper, NULL, &path); - - if (path) - file = path; - else - file = uuid_utoa (pinode->refkeeper->gfid); - - gf_log (THIS->name, GF_LOG_WARNING, - "releasing lock on %s held by " - "{client=%p, pid=%"PRId64" lk-owner=%s}", - file, lock->client, (uint64_t) lock->client_pid, - lkowner_utoa (&lock->owner)); - GF_FREE (path); + gf_log (THIS->name, GF_LOG_WARNING, + "releasing lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + uuid_utoa (pinode->gfid), lock->client, + (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner)); } @@ -800,6 +832,7 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) __pl_entrylk_unref (l); } pthread_mutex_unlock (&pinode->mutex); + inode_unref (pinode->inode); } return 0; diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 4bc5d80c866..8866cf759dc 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -383,24 +383,13 @@ static void pl_inodelk_log_cleanup (pl_inode_lock_t *lock) { pl_inode_t *pl_inode = NULL; - char *path = NULL; - char *file = NULL; pl_inode = lock->pl_inode; - inode_path (pl_inode->refkeeper, NULL, &path); - - if (path) - file = path; - else - file = uuid_utoa (pl_inode->refkeeper->gfid); - - gf_log (THIS->name, GF_LOG_WARNING, - "releasing lock on %s held by " - "{client=%p, pid=%"PRId64" lk-owner=%s}", - file, lock->client, (uint64_t) lock->client_pid, - lkowner_utoa (&lock->owner)); - GF_FREE (path); + gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + uuid_utoa (pl_inode->gfid), lock->client, + (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner)); } @@ -501,6 +490,7 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) __pl_inodelk_unref (l); } pthread_mutex_unlock (&pl_inode->mutex); + inode_unref (pl_inode->inode); } return 0; @@ -509,13 +499,36 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) static int pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, - pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom) + pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom, + inode_t *inode) { - int ret = -EINVAL; - pl_inode_lock_t *retlock = NULL; - gf_boolean_t unref = _gf_true; + int ret = -EINVAL; + pl_inode_lock_t *retlock = NULL; + gf_boolean_t unref = _gf_true; + gf_boolean_t need_inode_unref = _gf_false; + short fl_type; lock->pl_inode = pl_inode; + fl_type = lock->fl_type; + + /* Ideally, AFTER a successful lock (both blocking and non-blocking) or + * an unsuccessful blocking lock operation, the inode needs to be ref'd. + * + * But doing so might give room to a race where the lock-requesting + * client could send a DISCONNECT just before this thread refs the inode + * after the locking is done, and the epoll thread could unref the inode + * in cleanup which means the inode's refcount would come down to 0, and + * the call to pl_forget() at this point destroys @pl_inode. Now when + * the io-thread executing this function tries to access pl_inode, + * it could crash on account of illegal memory access. + * + * To get around this problem, the inode is ref'd once even before + * adding the lock into client_list as a precautionary measure. + * This way even if there are DISCONNECTs, there will always be 1 extra + * ref on the inode, so @pl_inode is still alive until after the + * current stack unwinds. + */ + pl_inode->inode = inode_ref (inode); if (ctx) pthread_mutex_lock (&ctx->lock); @@ -542,12 +555,24 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, lock->user_flock.l_len); if (can_block) unref = _gf_false; + /* For all but the case where a non-blocking + * lock attempt fails, the extra ref taken at + * the start of this function must be negated. + */ + else + need_inode_unref = _gf_true; } if (ctx && (!ret || can_block)) list_add_tail (&lock->client_list, &ctx->inodelk_lockers); } else { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done at the start of + * this function must be negated. Towards this, + * @need_inode_unref flag is set unconditionally here. + */ + need_inode_unref = _gf_true; retlock = __inode_unlock_lock (this, lock, dom); if (!retlock) { gf_log (this->name, GF_LOG_DEBUG, @@ -568,7 +593,16 @@ out: if (ctx) pthread_mutex_unlock (&ctx->lock); - grant_blocked_inode_locks (this, pl_inode, dom); + if (need_inode_unref) + inode_unref (pl_inode->inode); + + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ + if ((fl_type == F_UNLCK) && (ret == 0)) { + inode_unref (pl_inode->inode); + grant_blocked_inode_locks (this, pl_inode, dom); + } return ret; } @@ -707,7 +741,7 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, } reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid, - frame, this, volume, conn_id); + frame, this, dom->domain, conn_id); if (!reqlock) { op_ret = -1; @@ -725,7 +759,7 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, case F_SETLK: memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block, - dom); + dom, inode); if (ret < 0) { if ((can_block) && (F_UNLCK != flock->l_type)) { @@ -751,10 +785,9 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, op_ret = 0; unwind: - if ((inode != NULL) && (flock !=NULL)) { - pl_update_refkeeper (this, inode); - pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume); - } + if (flock != NULL) + pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, + op_errno, volume); STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL); out: diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 8c2a6f867ee..f761b3d4a00 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -146,6 +146,10 @@ struct __pl_inode { inode_t *refkeeper; /* hold refs on an inode while locks are held to prevent pruning */ + uuid_t gfid; /* placeholder for gfid of the inode */ + inode_t *inode; /* pointer to be used for ref and unref + of inode_t as long as there are + locks on it */ }; typedef struct __pl_inode pl_inode_t; -- cgit