diff options
Diffstat (limited to 'xlators/features/locks/src/common.c')
| -rw-r--r-- | xlators/features/locks/src/common.c | 1966 |
1 files changed, 1228 insertions, 738 deletions
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index f6c71c1cf86..a2c6be93e03 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -12,737 +12,775 @@ #include <limits.h> #include <pthread.h> -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/logging.h> +#include <glusterfs/syncop.h> #include "locks.h" #include "common.h" - static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock); +__is_lock_grantable(pl_inode_t *pl_inode, posix_lock_t *lock); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock); +__insert_and_merge(pl_inode_t *pl_inode, posix_lock_t *lock); static int -pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, - posix_lock_t *old_lock); +pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock); static pl_dom_list_t * -__allocate_domain (const char *volume) +__allocate_domain(const char *volume) { - pl_dom_list_t *dom = NULL; + pl_dom_list_t *dom = NULL; - dom = GF_CALLOC (1, sizeof (*dom), - gf_locks_mt_pl_dom_list_t); - if (!dom) - goto out; + dom = GF_CALLOC(1, sizeof(*dom), gf_locks_mt_pl_dom_list_t); + if (!dom) + goto out; - dom->domain = gf_strdup(volume); - if (!dom->domain) - goto out; + dom->domain = gf_strdup(volume); + if (!dom->domain) + goto out; - gf_log ("posix-locks", GF_LOG_TRACE, - "New domain allocated: %s", dom->domain); + gf_log("posix-locks", GF_LOG_TRACE, "New domain allocated: %s", + dom->domain); - INIT_LIST_HEAD (&dom->inode_list); - INIT_LIST_HEAD (&dom->entrylk_list); - INIT_LIST_HEAD (&dom->blocked_entrylks); - INIT_LIST_HEAD (&dom->inodelk_list); - INIT_LIST_HEAD (&dom->blocked_inodelks); + INIT_LIST_HEAD(&dom->inode_list); + INIT_LIST_HEAD(&dom->entrylk_list); + INIT_LIST_HEAD(&dom->blocked_entrylks); + INIT_LIST_HEAD(&dom->inodelk_list); + INIT_LIST_HEAD(&dom->blocked_inodelks); out: - if (dom && (NULL == dom->domain)) { - GF_FREE (dom); - dom = NULL; - } + if (dom && (NULL == dom->domain)) { + GF_FREE(dom); + dom = NULL; + } - return dom; + return dom; } /* Returns domain for the lock. If domain is not present, * allocates a domain and returns it */ pl_dom_list_t * -get_domain (pl_inode_t *pl_inode, const char *volume) +get_domain(pl_inode_t *pl_inode, const char *volume) { - pl_dom_list_t *dom = NULL; + pl_dom_list_t *dom = NULL; - GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out); - GF_VALIDATE_OR_GOTO ("posix-locks", volume, out); + GF_VALIDATE_OR_GOTO("posix-locks", pl_inode, out); + GF_VALIDATE_OR_GOTO("posix-locks", volume, out); - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) { - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (strcmp (dom->domain, volume) == 0) - goto unlock; - } - - dom = __allocate_domain (volume); - if (dom) - list_add (&dom->inode_list, &pl_inode->dom_list); + if (strcmp(dom->domain, volume) == 0) + goto unlock; } + + dom = __allocate_domain(volume); + if (dom) + list_add(&dom->inode_list, &pl_inode->dom_list); + } unlock: - pthread_mutex_unlock (&pl_inode->mutex); - if (dom) { - gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume); - } else { - gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume); - } + pthread_mutex_unlock(&pl_inode->mutex); + if (dom) { + gf_log("posix-locks", GF_LOG_TRACE, "Domain %s found", volume); + } else { + gf_log("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume); + } out: - return dom; + return dom; } unsigned long -fd_to_fdnum (fd_t *fd) +fd_to_fdnum(fd_t *fd) { - return ((unsigned long) fd); + return ((unsigned long)fd); } fd_t * -fd_from_fdnum (posix_lock_t *lock) +fd_from_fdnum(posix_lock_t *lock) { - return ((fd_t *) lock->fd_num); + return ((fd_t *)lock->fd_num); } int -__pl_inode_is_empty (pl_inode_t *pl_inode) +__pl_inode_is_empty(pl_inode_t *pl_inode) { - pl_dom_list_t *dom = NULL; - int is_empty = 1; - - if (!list_empty (&pl_inode->ext_list)) - is_empty = 0; - - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (!list_empty (&dom->entrylk_list)) - is_empty = 0; - - if (!list_empty (&dom->inodelk_list)) - is_empty = 0; - } - - return is_empty; + return (list_empty(&pl_inode->ext_list)); } void -pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame) +pl_print_locker(char *str, int size, xlator_t *this, call_frame_t *frame) { - snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu", - (unsigned long long) frame->root->pid, - lkowner_utoa (&frame->root->lk_owner), - frame->root->client, - (unsigned long long) frame->root->unique); + snprintf(str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu", + (unsigned long long)frame->root->pid, + lkowner_utoa(&frame->root->lk_owner), frame->root->client, + (unsigned long long)frame->root->unique); } - void -pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc) +pl_print_lockee(char *str, int size, fd_t *fd, loc_t *loc) { - inode_t *inode = NULL; - char *ipath = NULL; - int ret = 0; + inode_t *inode = NULL; + char *ipath = NULL; + int ret = 0; - if (fd) - inode = fd->inode; - if (loc) - inode = loc->inode; + if (fd) + inode = fd->inode; + if (loc) + inode = loc->inode; - if (!inode) { - snprintf (str, size, "<nul>"); - return; - } + if (!inode) { + snprintf(str, size, "<nul>"); + return; + } - if (loc && loc->path) { - ipath = gf_strdup (loc->path); - } else { - ret = inode_path (inode, NULL, &ipath); - if (ret <= 0) - ipath = NULL; - } + if (loc && loc->path) { + ipath = gf_strdup(loc->path); + } else { + ret = inode_path(inode, NULL, &ipath); + if (ret <= 0) + ipath = NULL; + } - snprintf (str, size, "gfid=%s, fd=%p, path=%s", - uuid_utoa (inode->gfid), fd, - ipath ? ipath : "<nul>"); + snprintf(str, size, "gfid=%s, fd=%p, path=%s", uuid_utoa(inode->gfid), fd, + ipath ? ipath : "<nul>"); - GF_FREE (ipath); + GF_FREE(ipath); } - void -pl_print_lock (char *str, int size, int cmd, - struct gf_flock *flock, gf_lkowner_t *owner) +pl_print_lock(char *str, int size, int cmd, struct gf_flock *flock, + gf_lkowner_t *owner) { - char *cmd_str = NULL; - char *type_str = NULL; + char *cmd_str = NULL; + char *type_str = NULL; - switch (cmd) { + switch (cmd) { #if F_GETLK != F_GETLK64 case F_GETLK64: #endif case F_GETLK: - cmd_str = "GETLK"; - break; + cmd_str = "GETLK"; + break; #if F_SETLK != F_SETLK64 case F_SETLK64: #endif case F_SETLK: - cmd_str = "SETLK"; - break; + cmd_str = "SETLK"; + break; #if F_SETLKW != F_SETLKW64 case F_SETLKW64: #endif case F_SETLKW: - cmd_str = "SETLKW"; - break; + cmd_str = "SETLKW"; + break; default: - cmd_str = "UNKNOWN"; - break; - } + cmd_str = "UNKNOWN"; + break; + } - switch (flock->l_type) { + switch (flock->l_type) { case F_RDLCK: - type_str = "READ"; - break; + type_str = "READ"; + break; case F_WRLCK: - type_str = "WRITE"; - break; + type_str = "WRITE"; + break; case F_UNLCK: - type_str = "UNLOCK"; - break; + type_str = "UNLOCK"; + break; default: - type_str = "UNKNOWN"; - break; - } - - snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, " - "start=%llu, len=%llu, pid=%llu, lk-owner=%s", - cmd_str, type_str, (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid, - lkowner_utoa (owner)); + type_str = "UNKNOWN"; + break; + } + + snprintf(str, size, + "lock=FCNTL, cmd=%s, type=%s, " + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", + cmd_str, type_str, (unsigned long long)flock->l_start, + (unsigned long long)flock->l_len, (unsigned long long)flock->l_pid, + lkowner_utoa(owner)); } - void -pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, const char *domain) +pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, + struct gf_flock *flock, const char *domain) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_lock[256]; - - priv = this->private; + posix_locks_private_t *priv = this->private; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - if (domain) - pl_print_inodelk (pl_lock, 256, cmd, flock, domain); - else - pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk(pl_lock, 256, cmd, flock, domain); + else + pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner); - gf_log (this->name, GF_LOG_INFO, - "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_lock); + gf_log(this->name, GF_LOG_INFO, + "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_lock); } - void -pl_print_verdict (char *str, int size, int op_ret, int op_errno) +pl_print_verdict(char *str, int size, int op_ret, int op_errno) { - char *verdict = NULL; - - if (op_ret == 0) { - verdict = "GRANTED"; - } else { - switch (op_errno) { - case EAGAIN: - verdict = "TRYAGAIN"; - break; - default: - verdict = strerror (op_errno); - } + char *verdict = NULL; + + if (op_ret == 0) { + verdict = "GRANTED"; + } else { + switch (op_errno) { + case EAGAIN: + verdict = "TRYAGAIN"; + break; + default: + verdict = strerror(op_errno); } + } - snprintf (str, size, "%s", verdict); + snprintf(str, size, "%s", verdict); } - void -pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain) +pl_trace_out(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, + struct gf_flock *flock, int op_ret, int op_errno, + const char *domain) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_lock[256]; - char verdict[32]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + char verdict[32]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - if (domain) - pl_print_inodelk (pl_lock, 256, cmd, flock, domain); - else - pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk(pl_lock, 256, cmd, flock, domain); + else + pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner); - pl_print_verdict (verdict, 32, op_ret, op_errno); + pl_print_verdict(verdict, 32, op_ret, op_errno); - gf_log (this->name, GF_LOG_INFO, - "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", - verdict, pl_locker, pl_lockee, pl_lock); + gf_log(this->name, GF_LOG_INFO, + "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker, + pl_lockee, pl_lock); } - void -pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, const char *domain) +pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_lock[256]; - - priv = this->private; + posix_locks_private_t *priv = this->private; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - if (domain) - pl_print_inodelk (pl_lock, 256, cmd, flock, domain); - else - pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk(pl_lock, 256, cmd, flock, domain); + else + pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner); - gf_log (this->name, GF_LOG_INFO, - "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_lock); + gf_log(this->name, GF_LOG_INFO, + "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_lock); } - void -pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd) +pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - pl_inode_t *pl_inode = NULL; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + pl_inode_t *pl_inode = NULL; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_inode = pl_inode_get (this, fd->inode); + pl_inode = pl_inode_get(this, fd->inode, NULL); - if (pl_inode && __pl_inode_is_empty (pl_inode)) - return; + if (pl_inode && __pl_inode_is_empty(pl_inode)) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, NULL); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, NULL); - gf_log (this->name, GF_LOG_INFO, - "[FLUSH] Locker = {%s} Lockee = {%s}", - pl_locker, pl_lockee); + gf_log(this->name, GF_LOG_INFO, "[FLUSH] Locker = {%s} Lockee = {%s}", + pl_locker, pl_lockee); } void -pl_trace_release (xlator_t *this, fd_t *fd) +pl_trace_release(xlator_t *this, fd_t *fd) { - posix_locks_private_t *priv = NULL; - char pl_lockee[256]; + posix_locks_private_t *priv = NULL; + char pl_lockee[256]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_lockee (pl_lockee, 256, fd, NULL); + pl_print_lockee(pl_lockee, 256, fd, NULL); - gf_log (this->name, GF_LOG_INFO, - "[RELEASE] Lockee = {%s}", pl_lockee); + gf_log(this->name, GF_LOG_INFO, "[RELEASE] Lockee = {%s}", pl_lockee); } - void -pl_update_refkeeper (xlator_t *this, inode_t *inode) +pl_update_refkeeper(xlator_t *this, inode_t *inode) { - pl_inode_t *pl_inode = NULL; - int is_empty = 0; - int need_unref = 0; - int need_ref = 0; + pl_inode_t *pl_inode = NULL; + int is_empty = 0; + int need_unref = 0; + int need_ref = 0; - pl_inode = pl_inode_get (this, inode); + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) + return; - pthread_mutex_lock (&pl_inode->mutex); - { - is_empty = __pl_inode_is_empty (pl_inode); + pthread_mutex_lock(&pl_inode->mutex); + { + is_empty = __pl_inode_is_empty(pl_inode); - if (is_empty && pl_inode->refkeeper) { - need_unref = 1; - pl_inode->refkeeper = NULL; - } + if (is_empty && pl_inode->refkeeper) { + need_unref = 1; + pl_inode->refkeeper = NULL; + } - if (!is_empty && !pl_inode->refkeeper) { - need_ref = 1; - pl_inode->refkeeper = inode; - } + if (!is_empty && !pl_inode->refkeeper) { + need_ref = 1; + pl_inode->refkeeper = inode; } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); - if (need_unref) - inode_unref (inode); + if (need_unref) + inode_unref(inode); - if (need_ref) - inode_ref (inode); + if (need_ref) + inode_ref(inode); } - -pl_inode_t * -pl_inode_get (xlator_t *this, inode_t *inode) +/* Get lock enforcement info from disk */ +int +pl_fetch_mlock_info_from_disk(xlator_t *this, pl_inode_t *pl_inode, + pl_local_t *local) { - uint64_t tmp_pl_inode = 0; - pl_inode_t *pl_inode = NULL; - int ret = 0; - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret == 0) { - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - goto unlock; - } - pl_inode = GF_CALLOC (1, sizeof (*pl_inode), - gf_locks_mt_pl_inode_t); - if (!pl_inode) { - goto unlock; - } + dict_t *xdata_rsp = NULL; + int ret = 0; + int op_ret = 0; + + if (!local) { + return -1; + } + + if (local->fd) { + op_ret = syncop_fgetxattr(this, local->fd, &xdata_rsp, + GF_ENFORCE_MANDATORY_LOCK, NULL, NULL); + } else { + op_ret = syncop_getxattr(this, &local->loc[0], &xdata_rsp, + GF_ENFORCE_MANDATORY_LOCK, NULL, NULL); + } + + pthread_mutex_lock(&pl_inode->mutex); + { + if (op_ret >= 0) { + pl_inode->mlock_enforced = _gf_true; + pl_inode->check_mlock_info = _gf_false; + } else { + gf_msg(this->name, GF_LOG_WARNING, -op_ret, 0, + "getxattr failed with %d", op_ret); + pl_inode->mlock_enforced = _gf_false; + + if (-op_ret == ENODATA) { + pl_inode->check_mlock_info = _gf_false; + } else { + pl_inode->check_mlock_info = _gf_true; + } + } + } + pthread_mutex_unlock(&pl_inode->mutex); - gf_log (this->name, GF_LOG_TRACE, - "Allocating new pl inode"); + return ret; +} - pthread_mutex_init (&pl_inode->mutex, NULL); +pl_inode_t * +pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) +{ + uint64_t tmp_pl_inode = 0; + pl_inode_t *pl_inode = NULL; + int ret = 0; + + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto unlock; + } - INIT_LIST_HEAD (&pl_inode->dom_list); - INIT_LIST_HEAD (&pl_inode->ext_list); - INIT_LIST_HEAD (&pl_inode->rw_list); - INIT_LIST_HEAD (&pl_inode->reservelk_list); - INIT_LIST_HEAD (&pl_inode->blocked_reservelks); - INIT_LIST_HEAD (&pl_inode->blocked_calls); + pl_inode = GF_CALLOC(1, sizeof(*pl_inode), gf_locks_mt_pl_inode_t); + if (!pl_inode) { + goto unlock; + } - __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); + gf_log(this->name, GF_LOG_TRACE, "Allocating new pl inode"); + + pthread_mutex_init(&pl_inode->mutex, NULL); + pthread_cond_init(&pl_inode->check_fop_wind_count, 0); + + INIT_LIST_HEAD(&pl_inode->dom_list); + INIT_LIST_HEAD(&pl_inode->ext_list); + INIT_LIST_HEAD(&pl_inode->rw_list); + INIT_LIST_HEAD(&pl_inode->reservelk_list); + INIT_LIST_HEAD(&pl_inode->blocked_reservelks); + INIT_LIST_HEAD(&pl_inode->blocked_calls); + INIT_LIST_HEAD(&pl_inode->metalk_list); + INIT_LIST_HEAD(&pl_inode->queued_locks); + INIT_LIST_HEAD(&pl_inode->waiting); + gf_uuid_copy(pl_inode->gfid, inode->gfid); + + pl_inode->check_mlock_info = _gf_true; + pl_inode->mlock_enforced = _gf_false; + + /* -2 means never looked up. -1 means something went wrong and link + * tracking is disabled. */ + pl_inode->links = -2; + + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); + if (ret) { + pthread_mutex_destroy(&pl_inode->mutex); + GF_FREE(pl_inode); + pl_inode = NULL; + goto unlock; } + } unlock: - UNLOCK (&inode->lock); + UNLOCK(&inode->lock); - return pl_inode; -} + if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) && + pl_inode->check_mlock_info && local) { + /* Note: The lock enforcement information per file can be stored in the + attribute flag of stat(x) in posix. With that there won't be a need + for doing getxattr post a reboot + */ + pl_fetch_mlock_info_from_disk(this, pl_inode, local); + } + return pl_inode; +} /* Create a new posix_lock_t */ posix_lock_t * -new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, - gf_lkowner_t *owner, fd_t *fd) +new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking, + int32_t *op_errno) { - posix_lock_t *lock = NULL; + posix_lock_t *lock = NULL; - GF_VALIDATE_OR_GOTO ("posix-locks", flock, out); - GF_VALIDATE_OR_GOTO ("posix-locks", client, out); - GF_VALIDATE_OR_GOTO ("posix-locks", fd, out); + GF_VALIDATE_OR_GOTO("posix-locks", flock, out); + GF_VALIDATE_OR_GOTO("posix-locks", client, out); + GF_VALIDATE_OR_GOTO("posix-locks", fd, out); - lock = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!lock) { - goto out; - } + if (!pl_is_lk_owner_valid(owner, client)) { + *op_errno = EINVAL; + goto out; + } + + lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t); + if (!lock) { + *op_errno = ENOMEM; + goto out; + } - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; - lock->client = client; - lock->fd_num = fd_to_fdnum (fd); - lock->fd = fd; - lock->client_pid = client_pid; - lock->owner = *owner; + lock->client = client; - INIT_LIST_HEAD (&lock->list); + lock->client_uid = gf_strdup(client->client_uid); + if (lock->client_uid == NULL) { + GF_FREE(lock); + lock = NULL; + *op_errno = ENOMEM; + goto out; + } + + lock->fd_num = fd_to_fdnum(fd); + lock->fd = fd; + lock->client_pid = client_pid; + lock->owner = *owner; + lock->lk_flags = lk_flags; + + lock->blocking = blocking; + memcpy(&lock->user_flock, flock, sizeof(lock->user_flock)); + + INIT_LIST_HEAD(&lock->list); out: - return lock; + return lock; } - /* Delete a lock from the inode's lock list */ void -__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) +__delete_lock(posix_lock_t *lock) { - list_del_init (&lock->list); + list_del_init(&lock->list); } - /* Destroy a posix_lock */ void -__destroy_lock (posix_lock_t *lock) +__destroy_lock(posix_lock_t *lock) { - GF_FREE (lock); + GF_FREE(lock->client_uid); + GF_FREE(lock); } +static posix_lock_t * +__copy_lock(posix_lock_t *src) +{ + posix_lock_t *dst; + + dst = GF_MALLOC(sizeof(posix_lock_t), gf_locks_mt_posix_lock_t); + if (dst != NULL) { + memcpy(dst, src, sizeof(posix_lock_t)); + dst->client_uid = gf_strdup(src->client_uid); + if (dst->client_uid == NULL) { + GF_FREE(dst); + dst = NULL; + } + + if (dst != NULL) + INIT_LIST_HEAD(&dst->list); + } + + return dst; +} /* Convert a posix_lock to a struct gf_flock */ void -posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock) +posix_lock_to_flock(posix_lock_t *lock, struct gf_flock *flock) { - flock->l_pid = lock->client_pid; - flock->l_type = lock->fl_type; - flock->l_start = lock->fl_start; - flock->l_owner = lock->owner; - - if (lock->fl_end == LLONG_MAX) - flock->l_len = 0; - else - flock->l_len = lock->fl_end - lock->fl_start + 1; + flock->l_pid = lock->user_flock.l_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + flock->l_owner = lock->owner; + + if (lock->fl_end == LLONG_MAX) + flock->l_len = 0; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; } /* Insert the lock into the inode's lock list */ static void -__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) +__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock) { - if (lock->blocked) - gettimeofday (&lock->blkd_time, NULL); - else - gettimeofday (&lock->granted_time, NULL); + if (lock->blocked) + lock->blkd_time = gf_time(); + else + lock->granted_time = gf_time(); - list_add_tail (&lock->list, &pl_inode->ext_list); - - return; + list_add_tail(&lock->list, &pl_inode->ext_list); } - /* Return true if the locks overlap, false otherwise */ int -locks_overlap (posix_lock_t *l1, posix_lock_t *l2) +locks_overlap(posix_lock_t *l1, posix_lock_t *l2) { - /* - Note: - FUSE always gives us absolute offsets, so no need to worry - about SEEK_CUR or SEEK_END - */ + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start)); } - /* Return true if the locks have the same owner */ int -same_owner (posix_lock_t *l1, posix_lock_t *l2) +same_owner(posix_lock_t *l1, posix_lock_t *l2) { - - return (is_same_lkowner (&l1->owner, &l2->owner) && - (l1->client == l2->client)); - + return (is_same_lkowner(&l1->owner, &l2->owner) && + (l1->client == l2->client)); } - /* Delete all F_UNLCK locks */ void -__delete_unlck_locks (pl_inode_t *pl_inode) +__delete_unlck_locks(pl_inode_t *pl_inode) { - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->fl_type == F_UNLCK) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + + list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list) + { + if (l->fl_type == F_UNLCK) { + __delete_lock(l); + __destroy_lock(l); } + } } - /* Add two locks */ static posix_lock_t * -add_locks (posix_lock_t *l1, posix_lock_t *l2) +add_locks(posix_lock_t *l1, posix_lock_t *l2, posix_lock_t *dst) { - posix_lock_t *sum = NULL; + posix_lock_t *sum = NULL; - sum = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!sum) - return NULL; + sum = __copy_lock(dst); + if (!sum) + return NULL; - sum->fl_start = min (l1->fl_start, l2->fl_start); - sum->fl_end = max (l1->fl_end, l2->fl_end); + sum->fl_start = min(l1->fl_start, l2->fl_start); + sum->fl_end = max(l1->fl_end, l2->fl_end); - return sum; + posix_lock_to_flock(sum, &sum->user_flock); + + return sum; } /* Subtract two locks */ struct _values { - posix_lock_t *locks[3]; + posix_lock_t *locks[3]; }; /* {big} must always be contained inside {small} */ static struct _values -subtract_locks (posix_lock_t *big, posix_lock_t *small) +subtract_locks(posix_lock_t *big, posix_lock_t *small) { + struct _values v = {.locks = {0, 0, 0}}; - struct _values v = { .locks = {0, 0, 0} }; - - if ((big->fl_start == small->fl_start) && - (big->fl_end == small->fl_end)) { - /* both edges coincide with big */ - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_type = small->fl_type; - goto done; + if ((big->fl_start == small->fl_start) && (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = __copy_lock(big); + if (!v.locks[0]) { + goto out; } - if ((small->fl_start > big->fl_start) && - (small->fl_end < big->fl_end)) { - /* both edges lie inside big */ - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - - v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; - - v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - - memcpy (v.locks[2], big, sizeof (posix_lock_t)); - v.locks[2]->fl_start = small->fl_end + 1; - goto done; - + v.locks[0]->fl_type = small->fl_type; + v.locks[0]->user_flock.l_type = small->fl_type; + goto done; + } + + if ((small->fl_start > big->fl_start) && (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = __copy_lock(big); + v.locks[1] = __copy_lock(small); + v.locks[2] = __copy_lock(big); + if ((v.locks[0] == NULL) || (v.locks[1] == NULL) || + (v.locks[2] == NULL)) { + goto out; } - /* one edge coincides with big */ - if (small->fl_start == big->fl_start) { - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - - v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_start = small->fl_end + 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - goto done; + v.locks[0]->fl_end = small->fl_start - 1; + v.locks[2]->fl_start = small->fl_end + 1; + posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock); + posix_lock_to_flock(v.locks[2], &v.locks[2]->user_flock); + goto done; + } + + /* one edge coincides with big */ + if (small->fl_start == big->fl_start) { + v.locks[0] = __copy_lock(big); + v.locks[1] = __copy_lock(small); + if ((v.locks[0] == NULL) || (v.locks[1] == NULL)) { + goto out; } - if (small->fl_end == big->fl_end) { - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - - v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; + v.locks[0]->fl_start = small->fl_end + 1; + posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock); + goto done; + } - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - goto done; + if (small->fl_end == big->fl_end) { + v.locks[0] = __copy_lock(big); + v.locks[1] = __copy_lock(small); + if ((v.locks[0] == NULL) || (v.locks[1] == NULL)) { + goto out; } - GF_ASSERT (0); - gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks"); + v.locks[0]->fl_end = small->fl_start - 1; + posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock); + goto done; + } + + GF_ASSERT(0); + gf_log("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks"); out: - if (v.locks[0]) { - GF_FREE (v.locks[0]); - v.locks[0] = NULL; - } - if (v.locks[1]) { - GF_FREE (v.locks[1]); - v.locks[1] = NULL; - } - if (v.locks[2]) { - GF_FREE (v.locks[2]); - v.locks[2] = NULL; - } + if (v.locks[0]) { + __destroy_lock(v.locks[0]); + v.locks[0] = NULL; + } + if (v.locks[1]) { + __destroy_lock(v.locks[1]); + v.locks[1] = NULL; + } + if (v.locks[2]) { + __destroy_lock(v.locks[2]); + v.locks[2] = NULL; + } done: - return v; + return v; } static posix_lock_t * -first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) +first_conflicting_overlap(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; - posix_lock_t *conf = NULL; + posix_lock_t *l = NULL; + posix_lock_t *conf = NULL; - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry(l, &pl_inode->ext_list, list) { - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->blocked) - continue; - - if (locks_overlap (l, lock)) { - if (same_owner (l, lock)) - continue; - - if ((l->fl_type == F_WRLCK) || - (lock->fl_type == F_WRLCK)) { - conf = l; - goto unlock; - } - } + if (l->blocked) + continue; + + if (locks_overlap(l, lock)) { + if (same_owner(l, lock)) + continue; + + if ((l->fl_type == F_WRLCK) || (lock->fl_type == F_WRLCK)) { + conf = l; + goto unlock; } + } } + } unlock: - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_unlock(&pl_inode->mutex); - return conf; + return conf; } /* @@ -751,351 +789,803 @@ unlock: If {begin} is NULL, then start from the beginning of the list */ static posix_lock_t * -first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) +first_overlap(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; + posix_lock_t *l = NULL; - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->blocked) - continue; + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (l->blocked) + continue; - if (locks_overlap (l, lock)) - return l; - } + if (locks_overlap(l, lock)) + return l; + } - return NULL; + return NULL; } - - /* Return true if lock is grantable */ static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) +__is_lock_grantable(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; - int ret = 1; - - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (!l->blocked && locks_overlap (lock, l)) { - if (((l->fl_type == F_WRLCK) - || (lock->fl_type == F_WRLCK)) - && (lock->fl_type != F_UNLCK) - && !same_owner (l, lock)) { - ret = 0; - break; - } - } + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (!l->blocked && locks_overlap(lock, l)) { + if (((l->fl_type == F_WRLCK) || (lock->fl_type == F_WRLCK)) && + (lock->fl_type != F_UNLCK) && !same_owner(l, lock)) { + ret = 0; + break; + } } - return ret; + } + return ret; } - -extern void do_blocked_rw (pl_inode_t *); - +extern void +do_blocked_rw(pl_inode_t *); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) +__insert_and_merge(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *conf = NULL; - posix_lock_t *t = NULL; - posix_lock_t *sum = NULL; - int i = 0; - struct _values v = { .locks = {0, 0, 0} }; - - list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) { - if (conf->blocked) - continue; - if (!locks_overlap (conf, lock)) - continue; + posix_lock_t *conf = NULL; + posix_lock_t *t = NULL; + posix_lock_t *sum = NULL; + int i = 0; + struct _values v = {.locks = {0, 0, 0}}; + + list_for_each_entry_safe(conf, t, &pl_inode->ext_list, list) + { + if (conf->blocked) + continue; + if (!locks_overlap(conf, lock)) + continue; + + if (same_owner(conf, lock)) { + if (conf->fl_type == lock->fl_type && + conf->lk_flags == lock->lk_flags) { + sum = add_locks(lock, conf, lock); + + __delete_lock(conf); + __destroy_lock(conf); + + __destroy_lock(lock); + INIT_LIST_HEAD(&sum->list); + posix_lock_to_flock(sum, &sum->user_flock); + __insert_and_merge(pl_inode, sum); - if (same_owner (conf, lock)) { - if (conf->fl_type == lock->fl_type) { - sum = add_locks (lock, conf); + return; + } else { + sum = add_locks(lock, conf, conf); - sum->fl_type = lock->fl_type; - sum->client = lock->client; - sum->fd_num = lock->fd_num; - sum->client_pid = lock->client_pid; - sum->owner = lock->owner; + v = subtract_locks(sum, lock); - __delete_lock (pl_inode, conf); - __destroy_lock (conf); + __delete_lock(conf); + __destroy_lock(conf); - __destroy_lock (lock); - INIT_LIST_HEAD (&sum->list); - posix_lock_to_flock (sum, &sum->user_flock); - __insert_and_merge (pl_inode, sum); + __delete_lock(lock); + __destroy_lock(lock); - return; - } else { - sum = add_locks (lock, conf); + __destroy_lock(sum); - sum->fl_type = conf->fl_type; - sum->client = conf->client; - sum->fd_num = conf->fd_num; - sum->client_pid = conf->client_pid; - sum->owner = conf->owner; + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; - v = subtract_locks (sum, lock); + __insert_and_merge(pl_inode, v.locks[i]); + } - __delete_lock (pl_inode, conf); - __destroy_lock (conf); + __delete_unlck_locks(pl_inode); + return; + } + } - __delete_lock (pl_inode, lock); - __destroy_lock (lock); + if (lock->fl_type == F_UNLCK) { + continue; + } - __destroy_lock (sum); + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + __insert_lock(pl_inode, lock); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + __insert_lock(pl_inode, lock); + } else { + __destroy_lock(lock); + } +} - for (i = 0; i < 3; i++) { - if (!v.locks[i]) - continue; +void +__grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + struct list_head tmp_list; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *conf = NULL; + + INIT_LIST_HEAD(&tmp_list); + + list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list) + { + if (l->blocked) { + conf = first_overlap(pl_inode, l); + if (conf) + continue; + + l->blocked = 0; + list_move_tail(&l->list, &tmp_list); + } + } - INIT_LIST_HEAD (&v.locks[i]->list); - posix_lock_to_flock (v.locks[i], - &v.locks[i]->user_flock); - __insert_and_merge (pl_inode, - v.locks[i]); - } + list_for_each_entry_safe(l, tmp, &tmp_list, list) + { + list_del_init(&l->list); - __delete_unlck_locks (pl_inode); - return; - } - } + if (__is_lock_grantable(pl_inode, l)) { + conf = GF_CALLOC(1, sizeof(*conf), gf_locks_mt_posix_lock_t); - if (lock->fl_type == F_UNLCK) { - continue; - } + if (!conf) { + l->blocked = 1; + __insert_lock(pl_inode, l); + continue; + } - if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { - __insert_lock (pl_inode, lock); - return; - } - } + conf->frame = l->frame; + l->frame = NULL; - /* no conflicts, so just insert */ - if (lock->fl_type != F_UNLCK) { - __insert_lock (pl_inode, lock); + posix_lock_to_flock(l, &conf->user_flock); + + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 + " => Granted", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", l->client_pid, + lkowner_utoa(&l->owner), l->user_flock.l_start, + l->user_flock.l_len); + + __insert_and_merge(pl_inode, l); + + list_add(&conf->list, granted); } else { - __destroy_lock (lock); + l->blocked = 1; + __insert_lock(pl_inode, l); } + } } - void -__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted) +grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) { - struct list_head tmp_list; - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - posix_lock_t *conf = NULL; + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + pl_local_t *local = NULL; + INIT_LIST_HEAD(&granted_list); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_locks(this, pl_inode, &granted_list); + } + pthread_mutex_unlock(&pl_inode->mutex); + + list_for_each_entry_safe(lock, tmp, &granted_list, list) + { + list_del_init(&lock->list); + + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); + __destroy_lock(lock); + } + + return; +} - INIT_LIST_HEAD (&tmp_list); +static int +pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock) +{ + struct gf_flock flock = { + 0, + }; + posix_lock_t *unlock_lock = NULL; + int32_t op_errno = 0; - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->blocked) { - conf = first_overlap (pl_inode, l); - if (conf) - continue; + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + pl_local_t *local = NULL; - l->blocked = 0; - list_move_tail (&l->list, &tmp_list); - } - } + int ret = -1; - list_for_each_entry_safe (l, tmp, &tmp_list, list) { - list_del_init (&l->list); + INIT_LIST_HEAD(&granted_list); - if (__is_lock_grantable (pl_inode, l)) { - conf = GF_CALLOC (1, sizeof (*conf), - gf_locks_mt_posix_lock_t); + flock.l_type = F_UNLCK; + flock.l_whence = old_lock->user_flock.l_whence; + flock.l_start = old_lock->user_flock.l_start; + flock.l_len = old_lock->user_flock.l_len; + flock.l_pid = old_lock->user_flock.l_pid; - if (!conf) { - l->blocked = 1; - __insert_lock (pl_inode, l); - continue; - } + unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid, + &old_lock->owner, old_lock->fd, + old_lock->lk_flags, 0, &op_errno); + GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out); + ret = 0; - conf->frame = l->frame; - l->frame = NULL; + __insert_and_merge(pl_inode, unlock_lock); - posix_lock_to_flock (l, &conf->user_flock); + __grant_blocked_locks(this, pl_inode, &granted_list); - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted", - l->fl_type == F_UNLCK ? "Unlock" : "Lock", - l->client_pid, lkowner_utoa (&l->owner), - l->user_flock.l_start, - l->user_flock.l_len); + list_for_each_entry_safe(lock, tmp, &granted_list, list) + { + list_del_init(&lock->list); - __insert_and_merge (pl_inode, l); + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); + __destroy_lock(lock); + } - list_add (&conf->list, granted); - } else { - l->blocked = 1; - __insert_lock (pl_inode, l); - } - } +out: + return ret; } +int +pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + int ret = 0; -void -grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode) + errno = 0; + + pthread_mutex_lock(&pl_inode->mutex); + { + /* Send unlock before the actual lock to + prevent lock upgrade / downgrade + problems only if: + - it is a blocking call + - it has other conflicting locks + */ + + if (can_block && !(__is_lock_grantable(pl_inode, lock))) { + ret = pl_send_prelock_unlock(this, pl_inode, lock); + if (ret) + gf_log(this->name, GF_LOG_DEBUG, + "Could not send pre-lock " + "unlock"); + } + + if (__is_lock_grantable(pl_inode, lock)) { + if (pl_metalock_is_active(pl_inode)) { + __pl_queue_lock(pl_inode, lock); + pthread_mutex_unlock(&pl_inode->mutex); + ret = -2; + goto out; + } + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + __insert_and_merge(pl_inode, lock); + } else if (can_block) { + if (pl_metalock_is_active(pl_inode)) { + __pl_queue_lock(pl_inode, lock); + pthread_mutex_unlock(&pl_inode->mutex); + ret = -2; + goto out; + } + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 + " => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + + pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, NULL); + + lock->blocked = 1; + __insert_lock(pl_inode, lock); + ret = -1; + } else { + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + errno = EAGAIN; + ret = -1; + } + } + pthread_mutex_unlock(&pl_inode->mutex); + + grant_blocked_locks(this, pl_inode); + + do_blocked_rw(pl_inode); + +out: + return ret; +} + +posix_lock_t * +pl_getlk(pl_inode_t *pl_inode, posix_lock_t *lock) { - struct list_head granted_list; - posix_lock_t *tmp = NULL; - posix_lock_t *lock = NULL; + posix_lock_t *conf = first_conflicting_overlap(pl_inode, lock); + if (conf == NULL) { + lock->fl_type = F_UNLCK; + return lock; + } + + return conf; +} - INIT_LIST_HEAD (&granted_list); +gf_boolean_t +pl_does_monkey_want_stuck_lock() +{ + long int monkey_unlock_rand = 0; + long int monkey_unlock_rand_rem = 0; + + /* coverity[DC.WEAK_CRYPTO] */ + monkey_unlock_rand = random(); + monkey_unlock_rand_rem = monkey_unlock_rand % 100; + if (monkey_unlock_rand_rem == 0) + return _gf_true; + return _gf_false; +} - pthread_mutex_lock (&pl_inode->mutex); +int +pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock) +{ + posix_lock_t *lock = NULL; + posix_lock_t *i = NULL; + pl_rw_req_t *rw = NULL; + pl_rw_req_t *itr = NULL; + struct list_head unwind_blist = { + 0, + }; + struct list_head unwind_rw_list = { + 0, + }; + int ret = 0; + + INIT_LIST_HEAD(&unwind_blist); + INIT_LIST_HEAD(&unwind_rw_list); + + pthread_mutex_lock(&pl_inode->mutex); + { + /* + - go through the lock list + - remove all locks from different owners + - same owner locks will be added or substracted based on + the new request + - add the new lock + */ + list_for_each_entry_safe(lock, i, &pl_inode->ext_list, list) { - __grant_blocked_locks (this, pl_inode, &granted_list); + if (lock->blocked) { + list_del_init(&lock->list); + list_add(&lock->list, &unwind_blist); + continue; + } + + if (locks_overlap(lock, reqlock)) { + if (same_owner(lock, reqlock)) + continue; + + /* remove conflicting locks */ + list_del_init(&lock->list); + __delete_lock(lock); + __destroy_lock(lock); + } } - pthread_mutex_unlock (&pl_inode->mutex); - list_for_each_entry_safe (lock, tmp, &granted_list, list) { - list_del_init (&lock->list); + __insert_and_merge(pl_inode, reqlock); - pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, - &lock->user_flock, 0, 0, NULL); + list_for_each_entry_safe(rw, itr, &pl_inode->rw_list, list) + { + list_del_init(&rw->list); + list_add(&rw->list, &unwind_rw_list); + } + } + pthread_mutex_unlock(&pl_inode->mutex); + + /* unwind blocked locks */ + list_for_each_entry_safe(lock, i, &unwind_blist, list) + { + PL_STACK_UNWIND_AND_FREE(((pl_local_t *)lock->frame->local), lk, + lock->frame, -1, EBUSY, &lock->user_flock, + NULL); + __destroy_lock(lock); + } + + /* unwind blocked IOs */ + list_for_each_entry_safe(rw, itr, &unwind_rw_list, list) + { + pl_clean_local(rw->stub->frame->local); + call_unwind_error(rw->stub, -1, EBUSY); + } + + return ret; +} - STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, - &lock->user_flock, NULL); +/* Return true in case we need to ensure mandatory-locking + * semantics under different modes. + */ +gf_boolean_t +pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode) +{ + posix_locks_private_t *priv = THIS->private; - GF_FREE (lock); - } + if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory) + return _gf_true; + else if (priv->mandatory_mode == MLK_FORCED || + priv->mandatory_mode == MLK_OPTIMAL) + return _gf_true; - return; + return _gf_false; } -static int -pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, - posix_lock_t *old_lock) +void +pl_clean_local(pl_local_t *local) { - struct gf_flock flock = {0,}; - posix_lock_t *unlock_lock = NULL; + if (!local) + return; - struct list_head granted_list; - posix_lock_t *tmp = NULL; - posix_lock_t *lock = NULL; + if (local->inodelk_dom_count_req) + data_unref(local->inodelk_dom_count_req); + loc_wipe(&local->loc[0]); + loc_wipe(&local->loc[1]); + if (local->fd) + fd_unref(local->fd); + if (local->inode) + inode_unref(local->inode); + mem_put(local); +} - int ret = -1; +/* +TODO: detach local initialization from PL_LOCAL_GET_REQUESTS and add it here +*/ +int +pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +{ + pl_local_t *local = NULL; + + if (!loc && !fd) { + return -1; + } + + if (!frame->local) { + local = mem_get0(this->local_pool); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "mem allocation failed"); + return -1; + } - INIT_LIST_HEAD (&granted_list); + local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode)); - flock.l_type = F_UNLCK; - flock.l_whence = old_lock->user_flock.l_whence; - flock.l_start = old_lock->user_flock.l_start; - flock.l_len = old_lock->user_flock.l_len; + frame->local = local; + } + return 0; +} - unlock_lock = new_posix_lock (&flock, old_lock->client, - old_lock->client_pid, &old_lock->owner, - old_lock->fd); - GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out); - ret = 0; +gf_boolean_t +pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) +{ + if (client && (client->opversion < GD_OP_VERSION_7_0)) { + return _gf_true; + } + + if (is_lk_owner_null(owner)) { + return _gf_false; + } + return _gf_true; +} - __insert_and_merge (pl_inode, unlock_lock); +static int32_t +pl_inode_from_loc(loc_t *loc, inode_t **pinode) +{ + inode_t *inode = NULL; + int32_t error = 0; + + if (loc->inode != NULL) { + inode = inode_ref(loc->inode); + goto done; + } + + if (loc->parent == NULL) { + error = EINVAL; + goto done; + } + + if (!gf_uuid_is_null(loc->gfid)) { + inode = inode_find(loc->parent->table, loc->gfid); + if (inode != NULL) { + goto done; + } + } - __grant_blocked_locks (this, pl_inode, &granted_list); + if (loc->name == NULL) { + error = EINVAL; + goto done; + } - list_for_each_entry_safe (lock, tmp, &granted_list, list) { - list_del_init (&lock->list); + inode = inode_grep(loc->parent->table, loc->parent, loc->name); + if (inode == NULL) { + /* We haven't found any inode. This means that the file doesn't exist + * or that even if it exists, we don't have any knowledge about it, so + * we don't have locks on it either, which is fine for our purposes. */ + goto done; + } - pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, - &lock->user_flock, 0, 0, NULL); +done: + *pinode = inode; - STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, - &lock->user_flock, NULL); + return error; +} + +static gf_boolean_t +pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, + struct timespec *now, struct list_head *contend) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *lock; + gf_boolean_t has_owners = _gf_false; - GF_FREE (lock); + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->inodelk_list, list) + { + /* If the lock belongs to the same client, we assume it's related + * to the same operation, so we allow the removal to continue. */ + if (lock->client == client) { + continue; + } + /* If the lock belongs to an internal process, we don't block the + * removal. */ + if (lock->client_pid < 0) { + continue; + } + if (contend == NULL) { + return _gf_true; + } + has_owners = _gf_true; + inodelk_contention_notify_check(xl, lock, now, contend); } + } -out: - return ret; + return has_owners; } -int -pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block) +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend) { - int ret = 0; + struct timespec now; + inode_t *inode; + pl_inode_t *pl_inode; + int32_t error; + + pl_inode = NULL; + + error = pl_inode_from_loc(loc, &inode); + if ((error != 0) || (inode == NULL)) { + goto done; + } + + pl_inode = pl_inode_get(xl, inode, NULL); + if (pl_inode == NULL) { + inode_unref(inode); + error = ENOMEM; + goto done; + } + + /* pl_inode_from_loc() already increments ref count for inode, so + * we only assign here our reference. */ + pl_inode->inode = inode; + + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (pl_inode->removed) { + error = ESTALE; + goto unlock; + } + + if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { + error = -1; + /* We skip the unlock here because the caller must create a stub when + * we return -1 and do a call to pl_inode_remove_complete(), which + * assumes the lock is still acquired and will release it once + * everything else is prepared. */ + goto done; + } + + pl_inode->is_locked = _gf_true; + pl_inode->remove_running++; - errno = 0; +unlock: + pthread_mutex_unlock(&pl_inode->mutex); - pthread_mutex_lock (&pl_inode->mutex); - { - /* Send unlock before the actual lock to - prevent lock upgrade / downgrade - problems only if: - - it is a blocking call - - it has other conflicting locks - */ - - if (can_block && - !(__is_lock_grantable (pl_inode, lock))) { - ret = pl_send_prelock_unlock (this, pl_inode, - lock); - if (ret) - gf_log (this->name, GF_LOG_DEBUG, - "Could not send pre-lock " - "unlock"); - } +done: + *ppl_inode = pl_inode; - if (__is_lock_grantable (pl_inode, lock)) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - __insert_and_merge (pl_inode, lock); - } else if (can_block) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - lock->blocked = 1; - __insert_lock (pl_inode, lock); - ret = -1; - } else { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - errno = EAGAIN; - ret = -1; - } + return error; +} + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend) +{ + pl_inode_lock_t *lock; + int32_t error = -1; + + if (stub != NULL) { + list_add_tail(&stub->list, &pl_inode->waiting); + pl_inode->is_locked = _gf_true; + } else { + error = ENOMEM; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, list); + list_del_init(&lock->list); + __pl_inodelk_unref(lock); } - pthread_mutex_unlock (&pl_inode->mutex); + } - grant_blocked_locks (this, pl_inode); + pthread_mutex_unlock(&pl_inode->mutex); - do_blocked_rw (pl_inode); + if (error < 0) { + inodelk_contention_notify(xl, contend); + } - return ret; + inode_unref(pl_inode->inode); + + return error; } +void +pl_inode_remove_wake(struct list_head *list) +{ + call_stub_t *stub; -posix_lock_t * -pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); + + call_resume(stub); + } +} + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) { - posix_lock_t *conf = NULL; + struct list_head contend, granted; + struct timespec now; + pl_dom_list_t *dom; + + if (pl_inode == NULL) { + return; + } - conf = first_conflicting_overlap (pl_inode, lock); + INIT_LIST_HEAD(&contend); + INIT_LIST_HEAD(&granted); + timespec_now(&now); - if (conf == NULL) { - lock->fl_type = F_UNLCK; - return lock; + pthread_mutex_lock(&pl_inode->mutex); + + if (error == 0) { + if (pl_inode->links >= 0) { + pl_inode->links--; + } + if (pl_inode->links == 0) { + pl_inode->removed = _gf_true; + } + } + + pl_inode->remove_running--; + + if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { + pl_inode->is_locked = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, + &contend); } + } + + pthread_mutex_unlock(&pl_inode->mutex); - return conf; + unwind_granted_inodes(xl, pl_inode, &granted); + + inodelk_contention_notify(xl, &contend); + + inode_unref(pl_inode->inode); +} + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list) +{ + call_stub_t *stub, *tmp; + + if (!pl_inode->is_locked) { + return; + } + + list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) + { + if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, + NULL)) { + list_move_tail(&stub->list, list); + } + } } +/* This function determines if an inodelk attempt can be done now or it needs + * to wait. + * + * Possible return values: + * < 0: An error occurred. Currently only -ESTALE can be returned if the + * inode has been deleted previously by unlink/rmdir/rename + * = 0: The lock can be attempted. + * > 0: The lock needs to wait because a conflicting remove operation is + * ongoing. + */ +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *ilock; + + /* If the inode has been deleted, we won't allow any lock. */ + if (pl_inode->removed) { + return -ESTALE; + } + + /* We only synchronize with locks made for regular operations coming from + * the user. Locks done for internal purposes are hard to control and could + * lead to long delays or deadlocks quite easily. */ + if (lock->client_pid < 0) { + return 0; + } + if (!pl_inode->is_locked) { + return 0; + } + if (pl_inode->remove_running > 0) { + return 1; + } + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(ilock, &dom->inodelk_list, list) + { + /* If a lock from the same client is already granted, we allow this + * one to continue. This is necessary to prevent deadlocks when + * multiple locks are taken for the same operation. + * + * On the other side it's unlikely that the same client sends + * completely unrelated locks for the same inode. + */ + if (ilock->client == lock->client) { + return 0; + } + } + } + + return 1; +} |
