diff options
Diffstat (limited to 'xlators/features/locks/src/common.c')
| -rw-r--r-- | xlators/features/locks/src/common.c | 1365 |
1 files changed, 990 insertions, 375 deletions
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 0c57b3b41..b3309580d 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <unistd.h> #include <fcntl.h> #include <limits.h> @@ -35,29 +25,30 @@ #include "common-utils.h" #include "locks.h" +#include "common.h" static int __is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock); static void __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock); +static int +pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock); static pl_dom_list_t * -allocate_domain(const char *volume) +__allocate_domain (const char *volume) { pl_dom_list_t *dom = NULL; - dom = CALLOC (1, sizeof (*dom)); + dom = GF_CALLOC (1, sizeof (*dom), + gf_locks_mt_pl_dom_list_t); if (!dom) - return NULL; + goto out; - - dom->domain = strdup(volume); - if (!dom->domain) { - gf_log ("posix-locks", GF_LOG_TRACE, - "Out of Memory"); - return NULL; - } + dom->domain = gf_strdup(volume); + if (!dom->domain) + goto out; gf_log ("posix-locks", GF_LOG_TRACE, "New domain allocated: %s", dom->domain); @@ -68,6 +59,12 @@ allocate_domain(const char *volume) INIT_LIST_HEAD (&dom->inodelk_list); INIT_LIST_HEAD (&dom->blocked_inodelks); +out: + if (dom && (NULL == dom->domain)) { + GF_FREE (dom); + dom = NULL; + } + return dom; } @@ -79,88 +76,426 @@ get_domain (pl_inode_t *pl_inode, const char *volume) { pl_dom_list_t *dom = NULL; + GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out); + GF_VALIDATE_OR_GOTO ("posix-locks", volume, out); + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { + if (strcmp (dom->domain, volume) == 0) + goto unlock; + } + + dom = __allocate_domain (volume); + if (dom) + list_add (&dom->inode_list, &pl_inode->dom_list); + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + if (dom) { + gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume); + } else { + gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume); + } +out: + return dom; +} + +unsigned long +fd_to_fdnum (fd_t *fd) +{ + return ((unsigned long) fd); +} + +fd_t * +fd_from_fdnum (posix_lock_t *lock) +{ + return ((fd_t *) lock->fd_num); +} + +int +__pl_inode_is_empty (pl_inode_t *pl_inode) +{ + pl_dom_list_t *dom = NULL; + int is_empty = 1; + + if (!list_empty (&pl_inode->ext_list)) + is_empty = 0; + list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (strcmp (dom->domain, volume) == 0) - goto found; + if (!list_empty (&dom->entrylk_list)) + is_empty = 0; + if (!list_empty (&dom->inodelk_list)) + is_empty = 0; + } + return is_empty; +} + +void +pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame) +{ + snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu", + (unsigned long long) frame->root->pid, + lkowner_utoa (&frame->root->lk_owner), + frame->root->client, + (unsigned long long) frame->root->unique); +} + + +void +pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc) +{ + inode_t *inode = NULL; + char *ipath = NULL; + int ret = 0; + + if (fd) + inode = fd->inode; + if (loc) + inode = loc->inode; + + if (!inode) { + snprintf (str, size, "<nul>"); + return; } - dom = allocate_domain(volume); + if (loc && loc->path) { + ipath = gf_strdup (loc->path); + } else { + ret = inode_path (inode, NULL, &ipath); + if (ret <= 0) + ipath = NULL; + } - if (dom) - list_add (&dom->inode_list, &pl_inode->dom_list); -found: + snprintf (str, size, "gfid=%s, fd=%p, path=%s", + uuid_utoa (inode->gfid), fd, + ipath ? ipath : "<nul>"); - return dom; + GF_FREE (ipath); } -pl_inode_t * -pl_inode_get (xlator_t *this, inode_t *inode) + +void +pl_print_lock (char *str, int size, int cmd, + struct gf_flock *flock, gf_lkowner_t *owner) { - uint64_t tmp_pl_inode = 0; - pl_inode_t *pl_inode = NULL; - mode_t st_mode = 0; - int ret = 0; + char *cmd_str = NULL; + char *type_str = NULL; + + switch (cmd) { +#if F_GETLK != F_GETLK64 + case F_GETLK64: +#endif + case F_GETLK: + cmd_str = "GETLK"; + break; + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + cmd_str = "SETLK"; + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + cmd_str = "SETLKW"; + break; - ret = inode_ctx_get (inode, this,&tmp_pl_inode); - if (ret == 0) { - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - goto out; + default: + cmd_str = "UNKNOWN"; + break; } - pl_inode = CALLOC (1, sizeof (*pl_inode)); - if (!pl_inode) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - gf_log (this->name, GF_LOG_TRACE, - "Allocating new pl inode"); + switch (flock->l_type) { + case F_RDLCK: + type_str = "READ"; + break; + case F_WRLCK: + type_str = "WRITE"; + break; + case F_UNLCK: + type_str = "UNLOCK"; + break; + default: + type_str = "UNKNOWN"; + break; + } - st_mode = inode->st_mode; - if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) - pl_inode->mandatory = 1; + snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, " + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", + cmd_str, type_str, (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + lkowner_utoa (owner)); +} - pthread_mutex_init (&pl_inode->mutex, NULL); +void +pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain) +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk (pl_lock, 256, cmd, flock, domain); + else + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + + gf_log (this->name, GF_LOG_INFO, + "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", + pl_locker, pl_lockee, pl_lock); +} - INIT_LIST_HEAD (&pl_inode->dom_list); - INIT_LIST_HEAD (&pl_inode->ext_list); - INIT_LIST_HEAD (&pl_inode->rw_list); - ret = inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); +void +pl_print_verdict (char *str, int size, int op_ret, int op_errno) +{ + char *verdict = NULL; + + if (op_ret == 0) { + verdict = "GRANTED"; + } else { + switch (op_errno) { + case EAGAIN: + verdict = "TRYAGAIN"; + break; + default: + verdict = strerror (op_errno); + } + } -out: - return pl_inode; + snprintf (str, size, "%s", verdict); +} + + +void +pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain) + +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + char verdict[32]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk (pl_lock, 256, cmd, flock, domain); + else + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + + pl_print_verdict (verdict, 32, op_ret, op_errno); + + gf_log (this->name, GF_LOG_INFO, + "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", + verdict, pl_locker, pl_lockee, pl_lock); +} + + +void +pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain) + +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk (pl_lock, 256, cmd, flock, domain); + else + pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + + gf_log (this->name, GF_LOG_INFO, + "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", + pl_locker, pl_lockee, pl_lock); +} + + +void +pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd) +{ + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + pl_inode_t *pl_inode = NULL; + + priv = this->private; + + if (!priv->trace) + return; + + pl_inode = pl_inode_get (this, fd->inode); + + if (pl_inode && __pl_inode_is_empty (pl_inode)) + return; + + pl_print_locker (pl_locker, 256, this, frame); + pl_print_lockee (pl_lockee, 256, fd, NULL); + + gf_log (this->name, GF_LOG_INFO, + "[FLUSH] Locker = {%s} Lockee = {%s}", + pl_locker, pl_lockee); +} + +void +pl_trace_release (xlator_t *this, fd_t *fd) +{ + posix_locks_private_t *priv = NULL; + char pl_lockee[256]; + + priv = this->private; + + if (!priv->trace) + return; + + pl_print_lockee (pl_lockee, 256, fd, NULL); + + gf_log (this->name, GF_LOG_INFO, + "[RELEASE] Lockee = {%s}", pl_lockee); +} + + +void +pl_update_refkeeper (xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + int is_empty = 0; + int need_unref = 0; + int need_ref = 0; + + pl_inode = pl_inode_get (this, inode); + + pthread_mutex_lock (&pl_inode->mutex); + { + is_empty = __pl_inode_is_empty (pl_inode); + + if (is_empty && pl_inode->refkeeper) { + need_unref = 1; + pl_inode->refkeeper = NULL; + } + + if (!is_empty && !pl_inode->refkeeper) { + need_ref = 1; + pl_inode->refkeeper = inode; + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + if (need_unref) + inode_unref (inode); + + if (need_ref) + inode_ref (inode); +} + + +pl_inode_t * +pl_inode_get (xlator_t *this, inode_t *inode) +{ + uint64_t tmp_pl_inode = 0; + pl_inode_t *pl_inode = NULL; + int ret = 0; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto unlock; + } + pl_inode = GF_CALLOC (1, sizeof (*pl_inode), + gf_locks_mt_pl_inode_t); + if (!pl_inode) { + goto unlock; + } + + gf_log (this->name, GF_LOG_TRACE, + "Allocating new pl inode"); + + pthread_mutex_init (&pl_inode->mutex, NULL); + + INIT_LIST_HEAD (&pl_inode->dom_list); + INIT_LIST_HEAD (&pl_inode->ext_list); + INIT_LIST_HEAD (&pl_inode->rw_list); + INIT_LIST_HEAD (&pl_inode->reservelk_list); + INIT_LIST_HEAD (&pl_inode->blocked_reservelks); + INIT_LIST_HEAD (&pl_inode->blocked_calls); + + __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); + } +unlock: + UNLOCK (&inode->lock); + + return pl_inode; } /* Create a new posix_lock_t */ posix_lock_t * -new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid) +new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd) { - posix_lock_t *lock = NULL; + posix_lock_t *lock = NULL; - lock = CALLOC (1, sizeof (posix_lock_t)); - if (!lock) { - return NULL; - } + GF_VALIDATE_OR_GOTO ("posix-locks", flock, out); + GF_VALIDATE_OR_GOTO ("posix-locks", client, out); + GF_VALIDATE_OR_GOTO ("posix-locks", fd, out); - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; + lock = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!lock) { + goto out; + } - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; - lock->transport = transport; - lock->client_pid = client_pid; + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; - INIT_LIST_HEAD (&lock->list); + lock->client = client; + lock->fd_num = fd_to_fdnum (fd); + lock->fd = fd; + lock->client_pid = client_pid; + lock->owner = *owner; - return lock; + INIT_LIST_HEAD (&lock->list); + +out: + return lock; } @@ -168,7 +503,7 @@ new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid) void __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) { - list_del_init (&lock->list); + list_del_init (&lock->list); } @@ -176,32 +511,37 @@ __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) void __destroy_lock (posix_lock_t *lock) { - free (lock); + GF_FREE (lock); } -/* Convert a posix_lock to a struct flock */ +/* Convert a posix_lock to a struct gf_flock */ void -posix_lock_to_flock (posix_lock_t *lock, struct flock *flock) +posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock) { - flock->l_pid = lock->client_pid; - flock->l_type = lock->fl_type; - flock->l_start = lock->fl_start; - - if (lock->fl_end == LLONG_MAX) - flock->l_len = 0; - else - flock->l_len = lock->fl_end - lock->fl_start + 1; + flock->l_pid = lock->client_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + flock->l_owner = lock->owner; + + if (lock->fl_end == LLONG_MAX) + flock->l_len = 0; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; } - /* Insert the lock into the inode's lock list */ static void __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) { - list_add_tail (&lock->list, &pl_inode->ext_list); + if (lock->blocked) + gettimeofday (&lock->blkd_time, NULL); + else + gettimeofday (&lock->granted_time, NULL); - return; + list_add_tail (&lock->list, &pl_inode->ext_list); + + return; } @@ -209,14 +549,14 @@ __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) int locks_overlap (posix_lock_t *l1, posix_lock_t *l2) { - /* - Note: - FUSE always gives us absolute offsets, so no need to worry - about SEEK_CUR or SEEK_END - */ - - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); } @@ -224,8 +564,10 @@ locks_overlap (posix_lock_t *l1, posix_lock_t *l2) int same_owner (posix_lock_t *l1, posix_lock_t *l2) { - return ((l1->client_pid == l2->client_pid) && - (l1->transport == l2->transport)); + + return (is_same_lkowner (&l1->owner, &l2->owner) && + (l1->client == l2->client)); + } @@ -233,15 +575,15 @@ same_owner (posix_lock_t *l1, posix_lock_t *l2) void __delete_unlck_locks (pl_inode_t *pl_inode) { - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->fl_type == F_UNLCK) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } - } + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if (l->fl_type == F_UNLCK) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } } @@ -249,105 +591,179 @@ __delete_unlck_locks (pl_inode_t *pl_inode) static posix_lock_t * add_locks (posix_lock_t *l1, posix_lock_t *l2) { - posix_lock_t *sum = NULL; + posix_lock_t *sum = NULL; - sum = CALLOC (1, sizeof (posix_lock_t)); - if (!sum) - return NULL; + sum = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!sum) + return NULL; - sum->fl_start = min (l1->fl_start, l2->fl_start); - sum->fl_end = max (l1->fl_end, l2->fl_end); + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); - return sum; + return sum; } /* Subtract two locks */ struct _values { - posix_lock_t *locks[3]; + posix_lock_t *locks[3]; }; /* {big} must always be contained inside {small} */ static struct _values subtract_locks (posix_lock_t *big, posix_lock_t *small) { - struct _values v = { .locks = {0, 0, 0} }; - - if ((big->fl_start == small->fl_start) && - (big->fl_end == small->fl_end)) { - /* both edges coincide with big */ - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_type = small->fl_type; - } - else if ((small->fl_start > big->fl_start) && - (small->fl_end < big->fl_end)) { - /* both edges lie inside big */ - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - v.locks[2] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[2]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - memcpy (v.locks[2], big, sizeof (posix_lock_t)); - v.locks[2]->fl_start = small->fl_end + 1; - } - /* one edge coincides with big */ - else if (small->fl_start == big->fl_start) { - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_start = small->fl_end + 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - } - else if (small->fl_end == big->fl_end) { - v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[0]); - v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); - ERR_ABORT (v.locks[1]); - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - } - else { - gf_log ("posix-locks", GF_LOG_ERROR, - "Unexpected case in subtract_locks. Please send " - "a bug report to gluster-devel@nongnu.org"); - } - - return v; + + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + goto done; + } + + if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + + memcpy (v.locks[2], big, sizeof (posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + goto done; + + } + + /* one edge coincides with big */ + if (small->fl_start == big->fl_start) { + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + goto done; + } + + if (small->fl_end == big->fl_end) { + v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[0]) + goto out; + + v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!v.locks[1]) + goto out; + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + goto done; + } + + GF_ASSERT (0); + gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks"); + +out: + if (v.locks[0]) { + GF_FREE (v.locks[0]); + v.locks[0] = NULL; + } + if (v.locks[1]) { + GF_FREE (v.locks[1]); + v.locks[1] = NULL; + } + if (v.locks[2]) { + GF_FREE (v.locks[2]); + v.locks[2] = NULL; + } + +done: + return v; +} + +static posix_lock_t * +first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + posix_lock_t *conf = NULL; + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (l->blocked) + continue; + + if (locks_overlap (l, lock)) { + if (same_owner (l, lock)) + continue; + + if ((l->fl_type == F_WRLCK) || + (lock->fl_type == F_WRLCK)) { + conf = l; + goto unlock; + } + } + } + } +unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + return conf; } /* - Start searching from {begin}, and return the first lock that - conflicts, NULL if no conflict - If {begin} is NULL, then start from the beginning of the list + Start searching from {begin}, and return the first lock that + conflicts, NULL if no conflict + If {begin} is NULL, then start from the beginning of the list */ static posix_lock_t * first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; + posix_lock_t *l = NULL; - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->blocked) - continue; + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (l->blocked) + continue; - if (locks_overlap (l, lock)) - return l; - } + if (locks_overlap (l, lock)) + return l; + } - return NULL; + return NULL; } @@ -356,21 +772,21 @@ first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) static int __is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; - int ret = 1; - - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (!l->blocked && locks_overlap (lock, l)) { - if (((l->fl_type == F_WRLCK) - || (lock->fl_type == F_WRLCK)) - && (lock->fl_type != F_UNLCK) - && !same_owner (l, lock)) { - ret = 0; - break; - } - } - } - return ret; + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (!l->blocked && locks_overlap (lock, l)) { + if (((l->fl_type == F_WRLCK) + || (lock->fl_type == F_WRLCK)) + && (lock->fl_type != F_UNLCK) + && !same_owner (l, lock)) { + ret = 0; + break; + } + } + } + return ret; } @@ -380,227 +796,426 @@ extern void do_blocked_rw (pl_inode_t *); static void __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *conf = NULL; - posix_lock_t *t = NULL; - posix_lock_t *sum = NULL; - int i = 0; - struct _values v = { .locks = {0, 0, 0} }; + posix_lock_t *conf = NULL; + posix_lock_t *t = NULL; + posix_lock_t *sum = NULL; + int i = 0; + struct _values v = { .locks = {0, 0, 0} }; + + list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) { + if (conf->blocked) + continue; + if (!locks_overlap (conf, lock)) + continue; + + if (same_owner (conf, lock)) { + if (conf->fl_type == lock->fl_type) { + sum = add_locks (lock, conf); + + sum->fl_type = lock->fl_type; + sum->client = lock->client; + sum->fd_num = lock->fd_num; + sum->client_pid = lock->client_pid; + sum->owner = lock->owner; + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __destroy_lock (lock); + INIT_LIST_HEAD (&sum->list); + posix_lock_to_flock (sum, &sum->user_flock); + __insert_and_merge (pl_inode, sum); + + return; + } else { + sum = add_locks (lock, conf); + + sum->fl_type = conf->fl_type; + sum->client = conf->client; + sum->fd_num = conf->fd_num; + sum->client_pid = conf->client_pid; + sum->owner = conf->owner; + + v = subtract_locks (sum, lock); + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __delete_lock (pl_inode, lock); + __destroy_lock (lock); + + __destroy_lock (sum); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + INIT_LIST_HEAD (&v.locks[i]->list); + posix_lock_to_flock (v.locks[i], + &v.locks[i]->user_flock); + __insert_and_merge (pl_inode, + v.locks[i]); + } + + __delete_unlck_locks (pl_inode); + return; + } + } + + if (lock->fl_type == F_UNLCK) { + continue; + } + + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + __insert_lock (pl_inode, lock); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + __insert_lock (pl_inode, lock); + } else { + __destroy_lock (lock); + } +} + + +void +__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted) +{ + struct list_head tmp_list; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *conf = NULL; + + INIT_LIST_HEAD (&tmp_list); + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if (l->blocked) { + conf = first_overlap (pl_inode, l); + if (conf) + continue; + + l->blocked = 0; + list_move_tail (&l->list, &tmp_list); + } + } - list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) { - if (!locks_overlap (conf, lock)) - continue; + list_for_each_entry_safe (l, tmp, &tmp_list, list) { + list_del_init (&l->list); - if (same_owner (conf, lock)) { - if (conf->fl_type == lock->fl_type) { - sum = add_locks (lock, conf); + if (__is_lock_grantable (pl_inode, l)) { + conf = GF_CALLOC (1, sizeof (*conf), + gf_locks_mt_posix_lock_t); - sum->fl_type = lock->fl_type; - sum->transport = lock->transport; - sum->client_pid = lock->client_pid; + if (!conf) { + l->blocked = 1; + __insert_lock (pl_inode, l); + continue; + } - __delete_lock (pl_inode, conf); - __destroy_lock (conf); + conf->frame = l->frame; + l->frame = NULL; - __destroy_lock (lock); - __insert_and_merge (pl_inode, sum); + posix_lock_to_flock (l, &conf->user_flock); - return; - } else { - sum = add_locks (lock, conf); + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", + l->client_pid, lkowner_utoa (&l->owner), + l->user_flock.l_start, + l->user_flock.l_len); - sum->fl_type = conf->fl_type; - sum->transport = conf->transport; - sum->client_pid = conf->client_pid; + __insert_and_merge (pl_inode, l); - v = subtract_locks (sum, lock); + list_add (&conf->list, granted); + } else { + l->blocked = 1; + __insert_lock (pl_inode, l); + } + } +} - __delete_lock (pl_inode, conf); - __destroy_lock (conf); - __delete_lock (pl_inode, lock); - __destroy_lock (lock); +void +grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode) +{ + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; - __destroy_lock (sum); + INIT_LIST_HEAD (&granted_list); - for (i = 0; i < 3; i++) { - if (!v.locks[i]) - continue; + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_locks (this, pl_inode, &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); - if (v.locks[i]->fl_type == F_UNLCK) { - __destroy_lock (v.locks[i]); - continue; - } - __insert_and_merge (pl_inode, - v.locks[i]); - } + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); - __delete_unlck_locks (pl_inode); - return; - } - } + pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, 0, 0, NULL); - if (lock->fl_type == F_UNLCK) { - continue; - } + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, + &lock->user_flock, NULL); - if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { - __insert_lock (pl_inode, lock); - return; - } - } + GF_FREE (lock); + } - /* no conflicts, so just insert */ - if (lock->fl_type != F_UNLCK) { - __insert_lock (pl_inode, lock); - } else { - __destroy_lock (lock); - } + return; } - -void -__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted) +static int +pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock) { - struct list_head tmp_list; - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - posix_lock_t *conf = NULL; + struct gf_flock flock = {0,}; + posix_lock_t *unlock_lock = NULL; + + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + + int ret = -1; - INIT_LIST_HEAD (&tmp_list); + INIT_LIST_HEAD (&granted_list); - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->blocked) { - conf = first_overlap (pl_inode, l); - if (conf) - continue; + flock.l_type = F_UNLCK; + flock.l_whence = old_lock->user_flock.l_whence; + flock.l_start = old_lock->user_flock.l_start; + flock.l_len = old_lock->user_flock.l_len; - l->blocked = 0; - list_move_tail (&l->list, &tmp_list); - } - } - list_for_each_entry_safe (l, tmp, &tmp_list, list) { - list_del_init (&l->list); + unlock_lock = new_posix_lock (&flock, old_lock->client, + old_lock->client_pid, &old_lock->owner, + old_lock->fd); + GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out); + ret = 0; - if (__is_lock_grantable (pl_inode, l)) { - conf = CALLOC (1, sizeof (*conf)); + __insert_and_merge (pl_inode, unlock_lock); - if (!conf) { - l->blocked = 1; - __insert_lock (pl_inode, l); - continue; - } + __grant_blocked_locks (this, pl_inode, &granted_list); - conf->frame = l->frame; - l->frame = NULL; + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); - posix_lock_to_flock (l, &conf->user_flock); + pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, 0, 0, NULL); - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => Granted", - l->fl_type == F_UNLCK ? "Unlock" : "Lock", - l->client_pid, - l->user_flock.l_start, - l->user_flock.l_len); + STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, + &lock->user_flock, NULL); - __insert_and_merge (pl_inode, l); + GF_FREE (lock); + } - list_add (&conf->list, granted); - } else { - l->blocked = 1; - __insert_lock (pl_inode, l); - } - } +out: + return ret; } +int +pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + int ret = 0; + + errno = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + /* Send unlock before the actual lock to + prevent lock upgrade / downgrade + problems only if: + - it is a blocking call + - it has other conflicting locks + */ + + if (can_block && + !(__is_lock_grantable (pl_inode, lock))) { + ret = pl_send_prelock_unlock (this, pl_inode, + lock); + if (ret) + gf_log (this->name, GF_LOG_DEBUG, + "Could not send pre-lock " + "unlock"); + } + + if (__is_lock_grantable (pl_inode, lock)) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + __insert_and_merge (pl_inode, lock); + } else if (can_block) { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + lock->blocked = 1; + __insert_lock (pl_inode, lock); + ret = -1; + } else { + gf_log (this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lkowner_utoa (&lock->owner), + lock->user_flock.l_start, + lock->user_flock.l_len); + errno = EAGAIN; + ret = -1; + } + } + pthread_mutex_unlock (&pl_inode->mutex); -void -grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode) + grant_blocked_locks (this, pl_inode); + + do_blocked_rw (pl_inode); + + return ret; +} + + +posix_lock_t * +pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) { - struct list_head granted_list; - posix_lock_t *tmp = NULL; - posix_lock_t *lock = NULL; + posix_lock_t *conf = NULL; - INIT_LIST_HEAD (&granted_list); + conf = first_conflicting_overlap (pl_inode, lock); - pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_locks (this, pl_inode, &granted_list); - } - pthread_mutex_unlock (&pl_inode->mutex); + if (conf == NULL) { + lock->fl_type = F_UNLCK; + return lock; + } - list_for_each_entry_safe (lock, tmp, &granted_list, list) { - list_del_init (&lock->list); + return conf; +} - STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock); - FREE (lock); - } +struct _lock_table * +pl_lock_table_new (void) +{ + struct _lock_table *new = NULL; - return; + new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table); + if (new == NULL) { + goto out; + } + INIT_LIST_HEAD (&new->entrylk_lockers); + INIT_LIST_HEAD (&new->inodelk_lockers); + LOCK_INIT (&new->lock); +out: + return new; } int -pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block) -{ - int ret = 0; - - errno = 0; - - pthread_mutex_lock (&pl_inode->mutex); - { - if (__is_lock_grantable (pl_inode, lock)) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->user_flock.l_start, - lock->user_flock.l_len); - __insert_and_merge (pl_inode, lock); - } else if (can_block) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->user_flock.l_start, - lock->user_flock.l_len); - lock->blocked = 1; - __insert_lock (pl_inode, lock); - ret = -1; - } else { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lock->user_flock.l_start, - lock->user_flock.l_len); - errno = EAGAIN; - ret = -1; - } - } - pthread_mutex_unlock (&pl_inode->mutex); - - grant_blocked_locks (this, pl_inode); - - do_blocked_rw (pl_inode); - - return ret; -} +pl_add_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner, + glusterfs_fop_t type) +{ + int32_t ret = -1; + struct _locker *new = NULL; + GF_VALIDATE_OR_GOTO ("lock-table", table, out); + GF_VALIDATE_OR_GOTO ("lock-table", volume, out); -posix_lock_t * -pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) + new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker); + if (new == NULL) { + goto out; + } + INIT_LIST_HEAD (&new->lockers); + + new->volume = gf_strdup (volume); + + if (fd == NULL) { + loc_copy (&new->loc, loc); + } else { + new->fd = fd_ref (fd); + } + + new->pid = pid; + new->owner = *owner; + + LOCK (&table->lock); + { + if (type == GF_FOP_ENTRYLK) + list_add_tail (&new->lockers, &table->entrylk_lockers); + else + list_add_tail (&new->lockers, &table->inodelk_lockers); + } + UNLOCK (&table->lock); +out: + return ret; +} + +int +pl_del_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type) { - posix_lock_t *conf = NULL; + struct _locker *locker = NULL; + struct _locker *tmp = NULL; + int32_t ret = -1; + struct list_head *head = NULL; + struct list_head del; + + GF_VALIDATE_OR_GOTO ("lock-table", table, out); + GF_VALIDATE_OR_GOTO ("lock-table", volume, out); + + INIT_LIST_HEAD (&del); + + LOCK (&table->lock); + { + if (type == GF_FOP_ENTRYLK) { + head = &table->entrylk_lockers; + } else { + head = &table->inodelk_lockers; + } + + list_for_each_entry_safe (locker, tmp, head, lockers) { + if (!is_same_lkowner (&locker->owner, owner) || + strcmp (locker->volume, volume)) + continue; + + /* + * It is possible for inodelk lock to come on anon-fd + * and inodelk unlock to come on normal fd in case of + * client re-opens. So don't check for fds to be equal. + */ + if (locker->fd && fd) + list_move_tail (&locker->lockers, &del); + else if (locker->loc.inode && loc && + (locker->loc.inode == loc->inode)) + list_move_tail (&locker->lockers, &del); + } + } + UNLOCK (&table->lock); - conf = first_overlap (pl_inode, lock); + tmp = NULL; + locker = NULL; - if (conf == NULL) { - lock->fl_type = F_UNLCK; - return lock; - } + list_for_each_entry_safe (locker, tmp, &del, lockers) { + list_del_init (&locker->lockers); + if (locker->fd) + fd_unref (locker->fd); + else + loc_wipe (&locker->loc); + + GF_FREE (locker->volume); + GF_FREE (locker); + } + + ret = 0; +out: + return ret; - return conf; } + |
