diff options
Diffstat (limited to 'xlators/features/locks/src')
| -rw-r--r-- | xlators/features/locks/src/clear.c | 58 | ||||
| -rw-r--r-- | xlators/features/locks/src/clear.h | 8 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.c | 580 | ||||
| -rw-r--r-- | xlators/features/locks/src/common.h | 87 | ||||
| -rw-r--r-- | xlators/features/locks/src/entrylk.c | 106 | ||||
| -rw-r--r-- | xlators/features/locks/src/inodelk.c | 249 | ||||
| -rw-r--r-- | xlators/features/locks/src/locks-mem-types.h | 3 | ||||
| -rw-r--r-- | xlators/features/locks/src/locks.h | 105 | ||||
| -rw-r--r-- | xlators/features/locks/src/pl-messages.h | 2 | ||||
| -rw-r--r-- | xlators/features/locks/src/posix.c | 1442 | ||||
| -rw-r--r-- | xlators/features/locks/src/reservelk.c | 78 |
11 files changed, 1999 insertions, 719 deletions
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 0966ee753d6..ab1eac68a53 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -12,17 +12,23 @@ #include <limits.h> #include <pthread.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> #include "locks.h" #include "common.h" -#include "statedump.h" +#include <glusterfs/statedump.h> #include "clear.h" +const char *clrlk_type_names[CLRLK_TYPE_MAX] = { + [CLRLK_INODE] = "inode", + [CLRLK_ENTRY] = "entry", + [CLRLK_POSIX] = "posix", +}; + int clrlk_get_kind(char *kind) { @@ -175,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, if (plock->blocked) { bcount++; pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, - &plock->user_flock, -1, EAGAIN, NULL); + &plock->user_flock, -1, EINTR, NULL); - STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN, + STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, &plock->user_flock, NULL); } else { @@ -254,14 +260,16 @@ blkd: } pthread_mutex_unlock(&pl_inode->mutex); - list_for_each_entry_safe(ilock, tmp, &released, blocked_locks) - { - list_del_init(&ilock->blocked_locks); - pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW, - &ilock->user_flock, -1, EAGAIN, ilock->volume); - STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL); - // No need to take lock as the locks are only in one list - __pl_inodelk_unref(ilock); + if (!list_empty(&released)) { + list_for_each_entry_safe(ilock, tmp, &released, blocked_locks) + { + list_del_init(&ilock->blocked_locks); + pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW, + &ilock->user_flock, -1, EAGAIN, ilock->volume); + STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL); + // No need to take lock as the locks are only in one list + __pl_inodelk_unref(ilock); + } } if (!(args->kind & CLRLK_GRANTED)) { @@ -357,15 +365,17 @@ blkd: } pthread_mutex_unlock(&pl_inode->mutex); - list_for_each_entry_safe(elock, tmp, &released, blocked_locks) - { - list_del_init(&elock->blocked_locks); - entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL, - elock->basename, ENTRYLK_LOCK, elock->type, -1, - EAGAIN); - STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL); + if (!list_empty(&released)) { + list_for_each_entry_safe(elock, tmp, &released, blocked_locks) + { + list_del_init(&elock->blocked_locks); + entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL, + elock->basename, ENTRYLK_LOCK, elock->type, -1, + EAGAIN); + STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL); - __pl_entrylk_unref(elock); + __pl_entrylk_unref(elock); + } } if (!(args->kind & CLRLK_GRANTED)) { diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h index 08662746f98..bc118cb1b81 100644 --- a/xlators/features/locks/src/clear.h +++ b/xlators/features/locks/src/clear.h @@ -10,9 +10,9 @@ #ifndef __CLEAR_H__ #define __CLEAR_H__ -#include "compat-errno.h" -#include "stack.h" -#include "call-stub.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/stack.h> +#include <glusterfs/call-stub.h> #include "locks.h" typedef enum { @@ -22,6 +22,8 @@ typedef enum { CLRLK_TYPE_MAX } clrlk_type; +extern const char *clrlk_type_names[]; + typedef enum { CLRLK_BLOCKED = 1, CLRLK_GRANTED, diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index a953e0d1a4a..a2c6be93e03 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -12,11 +12,10 @@ #include <limits.h> #include <pthread.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/logging.h> +#include <glusterfs/syncop.h> #include "locks.h" #include "common.h" @@ -213,13 +212,11 @@ void pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, struct gf_flock *flock, const char *domain) { - posix_locks_private_t *priv = NULL; + posix_locks_private_t *priv = this->private; char pl_locker[256]; char pl_lockee[256]; char pl_lock[256]; - priv = this->private; - if (!priv->trace) return; @@ -291,13 +288,11 @@ pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, struct gf_flock *flock, const char *domain) { - posix_locks_private_t *priv = NULL; + posix_locks_private_t *priv = this->private; char pl_locker[256]; char pl_lockee[256]; char pl_lock[256]; - priv = this->private; - if (!priv->trace) return; @@ -326,7 +321,7 @@ pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd) if (!priv->trace) return; - pl_inode = pl_inode_get(this, fd->inode); + pl_inode = pl_inode_get(this, fd->inode, NULL); if (pl_inode && __pl_inode_is_empty(pl_inode)) return; @@ -362,7 +357,9 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode) int need_unref = 0; int need_ref = 0; - pl_inode = pl_inode_get(this, inode); + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) + return; pthread_mutex_lock(&pl_inode->mutex); { @@ -387,8 +384,51 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode) inode_ref(inode); } +/* Get lock enforcement info from disk */ +int +pl_fetch_mlock_info_from_disk(xlator_t *this, pl_inode_t *pl_inode, + pl_local_t *local) +{ + dict_t *xdata_rsp = NULL; + int ret = 0; + int op_ret = 0; + + if (!local) { + return -1; + } + + if (local->fd) { + op_ret = syncop_fgetxattr(this, local->fd, &xdata_rsp, + GF_ENFORCE_MANDATORY_LOCK, NULL, NULL); + } else { + op_ret = syncop_getxattr(this, &local->loc[0], &xdata_rsp, + GF_ENFORCE_MANDATORY_LOCK, NULL, NULL); + } + + pthread_mutex_lock(&pl_inode->mutex); + { + if (op_ret >= 0) { + pl_inode->mlock_enforced = _gf_true; + pl_inode->check_mlock_info = _gf_false; + } else { + gf_msg(this->name, GF_LOG_WARNING, -op_ret, 0, + "getxattr failed with %d", op_ret); + pl_inode->mlock_enforced = _gf_false; + + if (-op_ret == ENODATA) { + pl_inode->check_mlock_info = _gf_false; + } else { + pl_inode->check_mlock_info = _gf_true; + } + } + } + pthread_mutex_unlock(&pl_inode->mutex); + + return ret; +} + pl_inode_t * -pl_inode_get(xlator_t *this, inode_t *inode) +pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) { uint64_t tmp_pl_inode = 0; pl_inode_t *pl_inode = NULL; @@ -401,6 +441,7 @@ pl_inode_get(xlator_t *this, inode_t *inode) pl_inode = (pl_inode_t *)(long)tmp_pl_inode; goto unlock; } + pl_inode = GF_CALLOC(1, sizeof(*pl_inode), gf_locks_mt_pl_inode_t); if (!pl_inode) { goto unlock; @@ -409,6 +450,7 @@ pl_inode_get(xlator_t *this, inode_t *inode) gf_log(this->name, GF_LOG_TRACE, "Allocating new pl inode"); pthread_mutex_init(&pl_inode->mutex, NULL); + pthread_cond_init(&pl_inode->check_fop_wind_count, 0); INIT_LIST_HEAD(&pl_inode->dom_list); INIT_LIST_HEAD(&pl_inode->ext_list); @@ -418,8 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode) INIT_LIST_HEAD(&pl_inode->blocked_calls); INIT_LIST_HEAD(&pl_inode->metalk_list); INIT_LIST_HEAD(&pl_inode->queued_locks); + INIT_LIST_HEAD(&pl_inode->waiting); gf_uuid_copy(pl_inode->gfid, inode->gfid); + pl_inode->check_mlock_info = _gf_true; + pl_inode->mlock_enforced = _gf_false; + + /* -2 means never looked up. -1 means something went wrong and link + * tracking is disabled. */ + pl_inode->links = -2; + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); if (ret) { pthread_mutex_destroy(&pl_inode->mutex); @@ -431,13 +481,23 @@ pl_inode_get(xlator_t *this, inode_t *inode) unlock: UNLOCK(&inode->lock); + if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) && + pl_inode->check_mlock_info && local) { + /* Note: The lock enforcement information per file can be stored in the + attribute flag of stat(x) in posix. With that there won't be a need + for doing getxattr post a reboot + */ + pl_fetch_mlock_info_from_disk(this, pl_inode, local); + } + return pl_inode; } /* Create a new posix_lock_t */ posix_lock_t * new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, - gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking) + gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking, + int32_t *op_errno) { posix_lock_t *lock = NULL; @@ -445,8 +505,14 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, GF_VALIDATE_OR_GOTO("posix-locks", client, out); GF_VALIDATE_OR_GOTO("posix-locks", fd, out); + if (!pl_is_lk_owner_valid(owner, client)) { + *op_errno = EINVAL; + goto out; + } + lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t); if (!lock) { + *op_errno = ENOMEM; goto out; } @@ -464,6 +530,7 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, if (lock->client_uid == NULL) { GF_FREE(lock); lock = NULL; + *op_errno = ENOMEM; goto out; } @@ -538,13 +605,11 @@ static void __insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock) { if (lock->blocked) - gettimeofday(&lock->blkd_time, NULL); + lock->blkd_time = gf_time(); else - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add_tail(&lock->list, &pl_inode->ext_list); - - return; } /* Return true if the locks overlap, false otherwise */ @@ -900,7 +965,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) struct list_head granted_list; posix_lock_t *tmp = NULL; posix_lock_t *lock = NULL; - + pl_local_t *local = NULL; INIT_LIST_HEAD(&granted_list); pthread_mutex_lock(&pl_inode->mutex); @@ -915,9 +980,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, 0, 0, NULL); - - STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); - + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); __destroy_lock(lock); } @@ -932,10 +997,12 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, 0, }; posix_lock_t *unlock_lock = NULL; + int32_t op_errno = 0; struct list_head granted_list; posix_lock_t *tmp = NULL; posix_lock_t *lock = NULL; + pl_local_t *local = NULL; int ret = -1; @@ -949,7 +1016,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid, &old_lock->owner, old_lock->fd, - old_lock->lk_flags, 0); + old_lock->lk_flags, 0, &op_errno); GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out); ret = 0; @@ -963,9 +1030,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, 0, 0, NULL); - - STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); - + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); __destroy_lock(lock); } @@ -1000,7 +1067,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, if (__is_lock_grantable(pl_inode, lock)) { if (pl_metalock_is_active(pl_inode)) { - __pl_queue_lock(pl_inode, lock, can_block); + __pl_queue_lock(pl_inode, lock); pthread_mutex_unlock(&pl_inode->mutex); ret = -2; goto out; @@ -1013,7 +1080,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, __insert_and_merge(pl_inode, lock); } else if (can_block) { if (pl_metalock_is_active(pl_inode)) { - __pl_queue_lock(pl_inode, lock, can_block); + __pl_queue_lock(pl_inode, lock); pthread_mutex_unlock(&pl_inode->mutex); ret = -2; goto out; @@ -1024,6 +1091,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); + + pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, NULL); + lock->blocked = 1; __insert_lock(pl_inode, lock); ret = -1; @@ -1050,10 +1121,7 @@ out: posix_lock_t * pl_getlk(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *conf = NULL; - - conf = first_conflicting_overlap(pl_inode, lock); - + posix_lock_t *conf = first_conflicting_overlap(pl_inode, lock); if (conf == NULL) { lock->fl_type = F_UNLCK; return lock; @@ -1075,3 +1143,449 @@ pl_does_monkey_want_stuck_lock() return _gf_true; return _gf_false; } + +int +pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock) +{ + posix_lock_t *lock = NULL; + posix_lock_t *i = NULL; + pl_rw_req_t *rw = NULL; + pl_rw_req_t *itr = NULL; + struct list_head unwind_blist = { + 0, + }; + struct list_head unwind_rw_list = { + 0, + }; + int ret = 0; + + INIT_LIST_HEAD(&unwind_blist); + INIT_LIST_HEAD(&unwind_rw_list); + + pthread_mutex_lock(&pl_inode->mutex); + { + /* + - go through the lock list + - remove all locks from different owners + - same owner locks will be added or substracted based on + the new request + - add the new lock + */ + list_for_each_entry_safe(lock, i, &pl_inode->ext_list, list) + { + if (lock->blocked) { + list_del_init(&lock->list); + list_add(&lock->list, &unwind_blist); + continue; + } + + if (locks_overlap(lock, reqlock)) { + if (same_owner(lock, reqlock)) + continue; + + /* remove conflicting locks */ + list_del_init(&lock->list); + __delete_lock(lock); + __destroy_lock(lock); + } + } + + __insert_and_merge(pl_inode, reqlock); + + list_for_each_entry_safe(rw, itr, &pl_inode->rw_list, list) + { + list_del_init(&rw->list); + list_add(&rw->list, &unwind_rw_list); + } + } + pthread_mutex_unlock(&pl_inode->mutex); + + /* unwind blocked locks */ + list_for_each_entry_safe(lock, i, &unwind_blist, list) + { + PL_STACK_UNWIND_AND_FREE(((pl_local_t *)lock->frame->local), lk, + lock->frame, -1, EBUSY, &lock->user_flock, + NULL); + __destroy_lock(lock); + } + + /* unwind blocked IOs */ + list_for_each_entry_safe(rw, itr, &unwind_rw_list, list) + { + pl_clean_local(rw->stub->frame->local); + call_unwind_error(rw->stub, -1, EBUSY); + } + + return ret; +} + +/* Return true in case we need to ensure mandatory-locking + * semantics under different modes. + */ +gf_boolean_t +pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode) +{ + posix_locks_private_t *priv = THIS->private; + + if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory) + return _gf_true; + else if (priv->mandatory_mode == MLK_FORCED || + priv->mandatory_mode == MLK_OPTIMAL) + return _gf_true; + + return _gf_false; +} + +void +pl_clean_local(pl_local_t *local) +{ + if (!local) + return; + + if (local->inodelk_dom_count_req) + data_unref(local->inodelk_dom_count_req); + loc_wipe(&local->loc[0]); + loc_wipe(&local->loc[1]); + if (local->fd) + fd_unref(local->fd); + if (local->inode) + inode_unref(local->inode); + mem_put(local); +} + +/* +TODO: detach local initialization from PL_LOCAL_GET_REQUESTS and add it here +*/ +int +pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +{ + pl_local_t *local = NULL; + + if (!loc && !fd) { + return -1; + } + + if (!frame->local) { + local = mem_get0(this->local_pool); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "mem allocation failed"); + return -1; + } + + local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode)); + + frame->local = local; + } + + return 0; +} + +gf_boolean_t +pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) +{ + if (client && (client->opversion < GD_OP_VERSION_7_0)) { + return _gf_true; + } + + if (is_lk_owner_null(owner)) { + return _gf_false; + } + return _gf_true; +} + +static int32_t +pl_inode_from_loc(loc_t *loc, inode_t **pinode) +{ + inode_t *inode = NULL; + int32_t error = 0; + + if (loc->inode != NULL) { + inode = inode_ref(loc->inode); + goto done; + } + + if (loc->parent == NULL) { + error = EINVAL; + goto done; + } + + if (!gf_uuid_is_null(loc->gfid)) { + inode = inode_find(loc->parent->table, loc->gfid); + if (inode != NULL) { + goto done; + } + } + + if (loc->name == NULL) { + error = EINVAL; + goto done; + } + + inode = inode_grep(loc->parent->table, loc->parent, loc->name); + if (inode == NULL) { + /* We haven't found any inode. This means that the file doesn't exist + * or that even if it exists, we don't have any knowledge about it, so + * we don't have locks on it either, which is fine for our purposes. */ + goto done; + } + +done: + *pinode = inode; + + return error; +} + +static gf_boolean_t +pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, + struct timespec *now, struct list_head *contend) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *lock; + gf_boolean_t has_owners = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->inodelk_list, list) + { + /* If the lock belongs to the same client, we assume it's related + * to the same operation, so we allow the removal to continue. */ + if (lock->client == client) { + continue; + } + /* If the lock belongs to an internal process, we don't block the + * removal. */ + if (lock->client_pid < 0) { + continue; + } + if (contend == NULL) { + return _gf_true; + } + has_owners = _gf_true; + inodelk_contention_notify_check(xl, lock, now, contend); + } + } + + return has_owners; +} + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend) +{ + struct timespec now; + inode_t *inode; + pl_inode_t *pl_inode; + int32_t error; + + pl_inode = NULL; + + error = pl_inode_from_loc(loc, &inode); + if ((error != 0) || (inode == NULL)) { + goto done; + } + + pl_inode = pl_inode_get(xl, inode, NULL); + if (pl_inode == NULL) { + inode_unref(inode); + error = ENOMEM; + goto done; + } + + /* pl_inode_from_loc() already increments ref count for inode, so + * we only assign here our reference. */ + pl_inode->inode = inode; + + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (pl_inode->removed) { + error = ESTALE; + goto unlock; + } + + if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { + error = -1; + /* We skip the unlock here because the caller must create a stub when + * we return -1 and do a call to pl_inode_remove_complete(), which + * assumes the lock is still acquired and will release it once + * everything else is prepared. */ + goto done; + } + + pl_inode->is_locked = _gf_true; + pl_inode->remove_running++; + +unlock: + pthread_mutex_unlock(&pl_inode->mutex); + +done: + *ppl_inode = pl_inode; + + return error; +} + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend) +{ + pl_inode_lock_t *lock; + int32_t error = -1; + + if (stub != NULL) { + list_add_tail(&stub->list, &pl_inode->waiting); + pl_inode->is_locked = _gf_true; + } else { + error = ENOMEM; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, list); + list_del_init(&lock->list); + __pl_inodelk_unref(lock); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + if (error < 0) { + inodelk_contention_notify(xl, contend); + } + + inode_unref(pl_inode->inode); + + return error; +} + +void +pl_inode_remove_wake(struct list_head *list) +{ + call_stub_t *stub; + + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); + + call_resume(stub); + } +} + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) +{ + struct list_head contend, granted; + struct timespec now; + pl_dom_list_t *dom; + + if (pl_inode == NULL) { + return; + } + + INIT_LIST_HEAD(&contend); + INIT_LIST_HEAD(&granted); + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (error == 0) { + if (pl_inode->links >= 0) { + pl_inode->links--; + } + if (pl_inode->links == 0) { + pl_inode->removed = _gf_true; + } + } + + pl_inode->remove_running--; + + if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { + pl_inode->is_locked = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, + &contend); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(xl, pl_inode, &granted); + + inodelk_contention_notify(xl, &contend); + + inode_unref(pl_inode->inode); +} + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list) +{ + call_stub_t *stub, *tmp; + + if (!pl_inode->is_locked) { + return; + } + + list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) + { + if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, + NULL)) { + list_move_tail(&stub->list, list); + } + } +} + +/* This function determines if an inodelk attempt can be done now or it needs + * to wait. + * + * Possible return values: + * < 0: An error occurred. Currently only -ESTALE can be returned if the + * inode has been deleted previously by unlink/rmdir/rename + * = 0: The lock can be attempted. + * > 0: The lock needs to wait because a conflicting remove operation is + * ongoing. + */ +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *ilock; + + /* If the inode has been deleted, we won't allow any lock. */ + if (pl_inode->removed) { + return -ESTALE; + } + + /* We only synchronize with locks made for regular operations coming from + * the user. Locks done for internal purposes are hard to control and could + * lead to long delays or deadlocks quite easily. */ + if (lock->client_pid < 0) { + return 0; + } + if (!pl_inode->is_locked) { + return 0; + } + if (pl_inode->remove_running > 0) { + return 1; + } + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(ilock, &dom->inodelk_list, list) + { + /* If a lock from the same client is already granted, we allow this + * one to continue. This is necessary to prevent deadlocks when + * multiple locks are taken for the same operation. + * + * On the other side it's unlikely that the same client sends + * completely unrelated locks for the same inode. + */ + if (ilock->client == lock->client) { + return 0; + } + } + } + + return 1; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index c3d0e361933..281223bf3b8 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -10,7 +10,6 @@ #ifndef __COMMON_H__ #define __COMMON_H__ -#include "lkowner.h" /*dump locks format strings */ #define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu" #define ENTRY_FMT "type=%s on basename=%s" @@ -32,12 +31,34 @@ #define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid) +#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \ + do { \ + frame->local = NULL; \ + STACK_UNWIND_STRICT(fop, frame, op_ret, params); \ + if (__local) { \ + if (__local->inodelk_dom_count_req) \ + data_unref(__local->inodelk_dom_count_req); \ + loc_wipe(&__local->loc[0]); \ + loc_wipe(&__local->loc[1]); \ + if (__local->fd) \ + fd_unref(__local->fd); \ + if (__local->inode) \ + inode_unref(__local->inode); \ + if (__local->xdata) { \ + dict_unref(__local->xdata); \ + __local->xdata = NULL; \ + } \ + mem_put(__local); \ + } \ + } while (0) + posix_lock_t * new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, - gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int can_block); + gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking, + int32_t *op_errno); pl_inode_t * -pl_inode_get(xlator_t *this, inode_t *inode); +pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local); posix_lock_t * pl_getlk(pl_inode_t *inode, posix_lock_t *lock); @@ -45,6 +66,9 @@ pl_getlk(pl_inode_t *inode, posix_lock_t *lock); int pl_setlk(xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, int can_block); +int +pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock); + void grant_blocked_locks(xlator_t *this, pl_inode_t *inode); @@ -81,6 +105,15 @@ void __pl_inodelk_unref(pl_inode_lock_t *lock); void +__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend); + +void +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted); + +void grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend); @@ -177,9 +210,53 @@ __pl_entrylk_unref(pl_entry_lock_t *lock); int pl_metalock_is_active(pl_inode_t *pl_inode); -int -__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block); +void +__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock); + +void +inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock, + struct timespec *now, + struct list_head *contend); + +void +entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock, + struct timespec *now, + struct list_head *contend); gf_boolean_t pl_does_monkey_want_stuck_lock(); + +gf_boolean_t +pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode); + +void +pl_clean_local(pl_local_t *local); + +int +pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd); + +gf_boolean_t +pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client); + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend); + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend); + +void +pl_inode_remove_wake(struct list_head *list); + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error); + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list); + +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock); + #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index ea78f92d200..fd772c850dd 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -7,13 +7,13 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" -#include "upcall-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/list.h> +#include <glusterfs/upcall-utils.h> #include "locks.h" #include "clear.h" @@ -39,13 +39,20 @@ __pl_entrylk_ref(pl_entry_lock_t *lock) static pl_entry_lock_t * new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type, - const char *domain, call_frame_t *frame, char *conn_id) + const char *domain, call_frame_t *frame, char *conn_id, + int32_t *op_errno) { pl_entry_lock_t *newlock = NULL; + if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) { + *op_errno = EINVAL; + goto out; + } + newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t), gf_locks_mt_pl_entry_lock_t); if (!newlock) { + *op_errno = ENOMEM; goto out; } @@ -114,8 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, pl_entry_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; - gettimeofday(&curr, NULL); priv = this->private; @@ -123,7 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, * chance? Or just the locks we are attempting to acquire? */ if (names_conflict(candidate_lock->basename, requested_lock->basename)) { - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -197,9 +202,9 @@ out: return revoke_lock; } -static gf_boolean_t -__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, - struct timespec *now) +void +entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -209,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -218,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -231,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -325,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, break; } } - if (__entrylk_needs_contention_notify(this, tmp, now)) { - list_add_tail(&tmp->contend, contend); - } + entrylk_contention_notify_check(this, tmp, now, contend); } } @@ -539,19 +542,17 @@ static int __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, pl_entry_lock_t *lock, int nonblock) { - struct timeval now; - - gettimeofday(&now, NULL); - if (nonblock) goto out; - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks); gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", pinode, lock->basename); + entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename, + ENTRYLK_LOCK, lock->type); out: return -EAGAIN; } @@ -605,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, } __pl_entrylk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->domain_list, &dom->entrylk_list); ret = 0; @@ -644,11 +645,10 @@ int32_t check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename) { int32_t entrylk = 0; - pl_inode_t *pinode = 0; pl_dom_list_t *dom = NULL; pl_entry_lock_t *conf = NULL; - pinode = pl_inode_get(this, parent); + pl_inode_t *pinode = pl_inode_get(this, parent, NULL); if (!pinode) goto out; pthread_mutex_lock(&pinode->mutex); @@ -689,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + list_add_tail(&bl->blocked_locks, granted); } } - return; } /* Grants locks if possible which are blocked on a lock */ @@ -770,7 +769,7 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, if (xdata) dict_ret = dict_get_str(xdata, "connection-id", &conn_id); - pinode = pl_inode_get(this, inode); + pinode = pl_inode_get(this, inode, NULL); if (!pinode) { op_errno = ENOMEM; goto out; @@ -794,10 +793,9 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type); reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame, - conn_id); + conn_id, &op_errno); if (!reqlock) { op_ret = -1; - op_errno = ENOMEM; goto unwind; } @@ -933,8 +931,6 @@ out: op_ret, op_errno); unwind: STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL); - } else { - entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type); } if (pcontend != NULL) { @@ -1072,32 +1068,36 @@ pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) } pthread_mutex_unlock(&ctx->lock); - list_for_each_entry_safe(l, tmp, &unwind, client_list) - { - list_del_init(&l->client_list); + if (!list_empty(&unwind)) { + list_for_each_entry_safe(l, tmp, &unwind, client_list) + { + list_del_init(&l->client_list); - if (l->frame) - STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL); - list_add_tail(&l->client_list, &released); + if (l->frame) + STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL); + list_add_tail(&l->client_list, &released); + } } - list_for_each_entry_safe(l, tmp, &released, client_list) - { - list_del_init(&l->client_list); + if (!list_empty(&released)) { + list_for_each_entry_safe(l, tmp, &released, client_list) + { + list_del_init(&l->client_list); - pinode = l->pinode; + pinode = l->pinode; - dom = get_domain(pinode, l->volume); + dom = get_domain(pinode, l->volume); - grant_blocked_entry_locks(this, pinode, dom, &now, pcontend); + grant_blocked_entry_locks(this, pinode, dom, &now, pcontend); - pthread_mutex_lock(&pinode->mutex); - { - __pl_entrylk_unref(l); - } - pthread_mutex_unlock(&pinode->mutex); + pthread_mutex_lock(&pinode->mutex); + { + __pl_entrylk_unref(l); + } + pthread_mutex_unlock(&pinode->mutex); - inode_unref(pinode->inode); + inode_unref(pinode->inode); + } } if (pcontend != NULL) { diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index eff58a79569..d4e51d6e0a1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -7,18 +7,16 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" -#include "upcall-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> +#include <glusterfs/list.h> +#include <glusterfs/upcall-utils.h> #include "locks.h" #include "clear.h" #include "common.h" -#include "pl-messages.h" void __delete_inode_lock(pl_inode_lock_t *lock) @@ -142,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock, pl_inode_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; - gettimeofday(&curr, NULL); /* Question: Should we just prune them all given the * chance? Or just the locks we are attempting to acquire? */ if (inodelk_conflict(candidate_lock, requested_lock)) { - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -231,9 +227,9 @@ out: return revoke_lock; } -static gf_boolean_t -__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, - struct timespec *now) +void +inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -243,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -252,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -265,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -353,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, break; } } - if (__inodelk_needs_contention_notify(this, l, now)) { - list_add_tail(&l->contend, contend); - } + inodelk_contention_notify_check(this, l, now, contend); } } @@ -401,15 +395,11 @@ static int __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, int can_block) { - struct timeval now; - - gettimeofday(&now, NULL); - if (can_block == 0) { goto out; } - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks); gf_msg_trace(this->name, 0, @@ -420,6 +410,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); + pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + lock->volume); out: return -EAGAIN; } @@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, struct list_head *contend) { pl_inode_lock_t *conf = NULL; - int ret = -EINVAL; + int ret; - conf = __inodelk_grantable(this, dom, lock, now, contend); - if (conf) { - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + ret = pl_inode_remove_inodelk(pl_inode, lock); + if (ret < 0) { + return ret; + } + if (ret == 0) { + conf = __inodelk_grantable(this, dom, lock, now, contend); + } + if ((ret > 0) || (conf != NULL)) { + return __lock_blocked_add(this, dom, lock, can_block); } /* To prevent blocked locks starvation, check if there are any blocked @@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, "starvation"); } - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + return __lock_blocked_add(this, dom, lock, can_block); } __pl_inodelk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->list, &dom->inodelk_list); - ret = 0; - -out: - return ret; + return 0; } /* Return true if the two inodelks have exactly same lock boundaries */ @@ -502,33 +495,36 @@ static pl_inode_lock_t * __inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) { pl_inode_lock_t *conf = NULL; + inode_t *inode = NULL; + + inode = lock->pl_inode->inode; conf = find_matching_inodelk(lock, dom); if (!conf) { gf_log(this->name, GF_LOG_ERROR, " Matching lock not found for unlock %llu-%llu, by %s " - "on %p", + "on %p for gfid:%s", (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner), - lock->client); + lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); goto out; } __delete_inode_lock(conf); gf_log(this->name, GF_LOG_DEBUG, - " Matching lock found for unlock %llu-%llu, by %s on %p", + " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s", (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, - lkowner_utoa(&lock->owner), lock->client); + lkowner_utoa(&lock->owner), lock->client, + inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); out: return conf; } -static void +void __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend) { - int bl_ret = 0; pl_inode_lock_t *bl = NULL; pl_inode_lock_t *tmp = NULL; @@ -541,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, { list_del_init(&bl->blocked_locks); - bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); + bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); - if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + if (bl->status != -EAGAIN) { + list_add_tail(&bl->blocked_locks, granted); } } - return; } -/* Grant all inodelks blocked on a lock */ void -grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct timespec *now, - struct list_head *contend) +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) { - struct list_head granted; pl_inode_lock_t *lock; pl_inode_lock_t *tmp; + int32_t op_ret; + int32_t op_errno; - INIT_LIST_HEAD(&granted); - - pthread_mutex_lock(&pl_inode->mutex); + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { - __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, - contend); - } - pthread_mutex_unlock(&pl_inode->mutex); - - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) - { - gf_log(this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lkowner_utoa(&lock->owner), lock->user_flock.l_start, - lock->user_flock.l_len); - + if (lock->status == 0) { + op_ret = 0; + op_errno = 0; + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + } else { + op_ret = -1; + op_errno = -lock->status; + } pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, - 0, 0, lock->volume); + op_ret, op_errno, lock->volume); - STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL); + STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL); lock->frame = NULL; } pthread_mutex_lock(&pl_inode->mutex); { - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { list_del_init(&lock->blocked_locks); __pl_inodelk_unref(lock); @@ -595,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, pthread_mutex_unlock(&pl_inode->mutex); } +/* Grant all inodelks blocked on a lock */ +void +grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) +{ + struct list_head granted; + + INIT_LIST_HEAD(&granted); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(this, pl_inode, &granted); +} + static void pl_inodelk_log_cleanup(pl_inode_lock_t *lock) { @@ -656,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) * and blocked lists, then this means that a parallel * unlock on another inodelk (L2 say) may have 'granted' * L1 and added it to 'granted' list in - * __grant_blocked_node_locks() (although using the + * __grant_blocked_inode_locks() (although using the * 'blocked_locks' member). In that case, the cleanup * codepath must try and grant other overlapping * blocked inodelks from other clients, now that L1 is @@ -691,31 +703,35 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) } pthread_mutex_unlock(&ctx->lock); - list_for_each_entry_safe(l, tmp, &unwind, client_list) - { - list_del_init(&l->client_list); + if (!list_empty(&unwind)) { + list_for_each_entry_safe(l, tmp, &unwind, client_list) + { + list_del_init(&l->client_list); - if (l->frame) - STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL); - list_add_tail(&l->client_list, &released); + if (l->frame) + STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL); + list_add_tail(&l->client_list, &released); + } } - list_for_each_entry_safe(l, tmp, &released, client_list) - { - list_del_init(&l->client_list); + if (!list_empty(&released)) { + list_for_each_entry_safe(l, tmp, &released, client_list) + { + list_del_init(&l->client_list); - pl_inode = l->pl_inode; + pl_inode = l->pl_inode; - dom = get_domain(pl_inode, l->volume); + dom = get_domain(pl_inode, l->volume); - grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend); + grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend); - pthread_mutex_lock(&pl_inode->mutex); - { - __pl_inodelk_unref(l); + pthread_mutex_lock(&pl_inode->mutex); + { + __pl_inodelk_unref(l); + } + pthread_mutex_unlock(&pl_inode->mutex); + inode_unref(pl_inode->inode); } - pthread_mutex_unlock(&pl_inode->mutex); - inode_unref(pl_inode->inode); } if (pcontend != NULL) { @@ -737,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, gf_boolean_t need_inode_unref = _gf_false; struct list_head *pcontend = NULL; struct list_head contend; + struct list_head wake; struct timespec now = {}; short fl_type; @@ -788,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, timespec_now(&now); } + INIT_LIST_HEAD(&wake); + if (ctx) pthread_mutex_lock(&ctx->lock); pthread_mutex_lock(&pl_inode->mutex); @@ -810,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); - if (can_block) + if (can_block) { unref = _gf_false; - /* For all but the case where a non-blocking - * lock attempt fails, the extra ref taken at - * the start of this function must be negated. - */ - else - need_inode_unref = _gf_true; + } } - - if (ctx && (!ret || can_block)) + /* For all but the case where a non-blocking lock attempt fails + * with -EAGAIN, the extra ref taken at the start of this function + * must be negated. */ + need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block); + if (ctx && !need_inode_unref) { list_add_tail(&lock->client_list, &ctx->inodelk_lockers); + } } else { /* Irrespective of whether unlock succeeds or not, * the extra inode ref that was done at the start of @@ -839,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, list_del_init(&retlock->client_list); __pl_inodelk_unref(retlock); + pl_inode_remove_unlocked(this, pl_inode, &wake); + ret = 0; } out: @@ -849,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, if (ctx) pthread_mutex_unlock(&ctx->lock); + pl_inode_remove_wake(&wake); + /* The following (extra) unref corresponds to the ref that * was done at the time the lock was granted. */ @@ -869,17 +891,23 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, } /* Create a new inode_lock_t */ -pl_inode_lock_t * +static pl_inode_lock_t * new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, call_frame_t *frame, xlator_t *this, const char *volume, - char *conn_id) + char *conn_id, int32_t *op_errno) { pl_inode_lock_t *lock = NULL; + if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) { + *op_errno = EINVAL; + goto out; + } + lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t); if (!lock) { - return NULL; + *op_errno = ENOMEM; + goto out; } lock->fl_start = flock->l_start; @@ -907,6 +935,7 @@ new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, INIT_LIST_HEAD(&lock->contend); __pl_inodelk_ref(lock); +out: return lock; } @@ -951,6 +980,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, int ret = -1; GF_UNUSED int dict_ret = -1; int can_block = 0; + short lock_type = 0; pl_inode_t *pinode = NULL; pl_inode_lock_t *reqlock = NULL; pl_dom_list_t *dom = NULL; @@ -988,7 +1018,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, } } - pinode = pl_inode_get(this, inode); + pinode = pl_inode_get(this, inode, NULL); if (!pinode) { op_errno = ENOMEM; goto unwind; @@ -1001,11 +1031,10 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, } reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid, - frame, this, dom->domain, conn_id); + frame, this, dom->domain, conn_id, &op_errno); if (!reqlock) { op_ret = -1; - op_errno = ENOMEM; goto unwind; } @@ -1016,16 +1045,20 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, /* fall through */ case F_SETLK: + lock_type = flock->l_type; memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock)); ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom, inode); if (ret < 0) { - if ((can_block) && (F_UNLCK != flock->l_type)) { - pl_trace_block(this, frame, fd, loc, cmd, flock, volume); - goto out; + if (ret == -EAGAIN) { + if (can_block && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + } else { + gf_log(this->name, GF_LOG_TRACE, "returning %d", ret); } - gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); op_errno = -ret; goto unwind; } diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h index 240c1957a42..a76605027b3 100644 --- a/xlators/features/locks/src/locks-mem-types.h +++ b/xlators/features/locks/src/locks-mem-types.h @@ -11,7 +11,7 @@ #ifndef __LOCKS_MEM_TYPES_H__ #define __LOCKS_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_locks_mem_types_ { gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1, @@ -19,7 +19,6 @@ enum gf_locks_mem_types_ { gf_locks_mt_posix_lock_t, gf_locks_mt_pl_entry_lock_t, gf_locks_mt_pl_inode_lock_t, - gf_locks_mt_truncate_ops, gf_locks_mt_pl_rw_req_t, gf_locks_mt_posix_locks_private_t, gf_locks_mt_pl_fdctx_t, diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index cf2849fc251..c868eb494a2 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -10,13 +10,13 @@ #ifndef __POSIX_LOCKS_H__ #define __POSIX_LOCKS_H__ -#include "compat-errno.h" -#include "stack.h" -#include "call-stub.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/stack.h> +#include <glusterfs/call-stub.h> #include "locks-mem-types.h" -#include "client_t.h" +#include <glusterfs/client_t.h> -#include "lkowner.h" +#include <glusterfs/lkowner.h> typedef enum { MLK_NONE, @@ -30,11 +30,11 @@ struct __pl_fd; struct __posix_lock { struct list_head list; - short fl_type; off_t fl_start; off_t fl_end; uint32_t lk_flags; + short fl_type; short blocked; /* waiting to acquire */ struct gf_flock user_flock; /* the flock supplied by the user */ xlator_t *this; /* required for blocked locks */ @@ -43,9 +43,8 @@ struct __posix_lock { fd_t *fd; call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ /* These two together serve to uniquely identify each process across nodes */ @@ -74,7 +73,6 @@ struct __pl_inode_lock { struct list_head contend; /* list of contending locks */ int ref; - short fl_type; off_t fl_start; off_t fl_end; @@ -86,9 +84,9 @@ struct __pl_inode_lock { call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -102,6 +100,10 @@ struct __pl_inode_lock { char *connection_id; /* stores the client connection id */ struct list_head client_list; /* list of all locks from a client */ + short fl_type; + + int32_t status; /* Error code when we try to grant a lock in blocked + state */ }; typedef struct __pl_inode_lock pl_inode_lock_t; @@ -135,11 +137,10 @@ struct __entry_lock { const char *volume; const char *basename; - entrylk_type type; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -150,6 +151,7 @@ struct __entry_lock { char *connection_id; /* stores the client connection id */ struct list_head client_list; /* list of all locks from a client */ + entrylk_type type; }; typedef struct __entry_lock pl_entry_lock_t; @@ -164,13 +166,14 @@ struct __pl_inode { struct list_head rw_list; /* list of waiting r/w requests */ struct list_head reservelk_list; /* list of reservelks */ struct list_head blocked_reservelks; /* list of blocked reservelks */ - struct list_head - blocked_calls; /* List of blocked lock calls while a reserve is held*/ - struct list_head metalk_list; /* Meta lock list */ - /* This is to store the incoming lock - requests while meta lock is enabled */ - struct list_head queued_locks; - int mandatory; /* if mandatory locking is enabled */ + struct list_head blocked_calls; /* List of blocked lock calls while a + reserve is held*/ + struct list_head metalk_list; /* Meta lock list */ + struct list_head queued_locks; /* This is to store the incoming lock + requests while meta lock is enabled */ + struct list_head waiting; /* List of pending fops waiting to unlink/rmdir + the inode. */ + int mandatory; /* if mandatory locking is enabled */ inode_t *refkeeper; /* hold refs on an inode while locks are held to prevent pruning */ @@ -179,6 +182,31 @@ struct __pl_inode { of inode_t as long as there are locks on it */ gf_boolean_t migrated; + + /* Flag to indicate whether to read mlock-enforce xattr from disk */ + gf_boolean_t check_mlock_info; + + /* Mandatory_lock enforce: IO will be allowed if and only if the lkowner has + held the lock. + + Note: An xattr is set on the file to recover this information post + reboot. If client does not want mandatory lock to be enforced, then it + should remove this xattr explicitly + */ + gf_boolean_t mlock_enforced; + /* There are scenarios where mandatory lock is granted but there are IOs + pending at posix level. To avoid this before preempting the previous lock + owner, we wait for all the fops to be unwound. + */ + int fop_wind_count; + pthread_cond_t check_fop_wind_count; + + gf_boolean_t track_fop_wind_count; + + int32_t links; /* Number of hard links the inode has. */ + uint32_t remove_running; /* Number of remove operations running. */ + gf_boolean_t is_locked; /* Regular locks will be blocked. */ + gf_boolean_t removed; /* The inode has been deleted. */ }; typedef struct __pl_inode pl_inode_t; @@ -196,29 +224,33 @@ struct __pl_metalk { typedef struct __pl_metalk pl_meta_lock_t; typedef struct { + char *brickname; + uint32_t revocation_secs; + uint32_t revocation_max_blocked; + uint32_t notify_contention_delay; mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */ gf_boolean_t trace; /* trace lock requests in and out */ - char *brickname; gf_boolean_t monkey_unlocking; - uint32_t revocation_secs; gf_boolean_t revocation_clear_all; - uint32_t revocation_max_blocked; gf_boolean_t notify_contention; - uint32_t notify_contention_delay; + gf_boolean_t mlock_enforced; } posix_locks_private_t; typedef struct { - gf_boolean_t entrylk_count_req; - gf_boolean_t inodelk_count_req; - gf_boolean_t posixlk_count_req; - gf_boolean_t parent_entrylk_req; data_t *inodelk_dom_count_req; dict_t *xdata; loc_t loc[2]; fd_t *fd; + inode_t *inode; off_t offset; glusterfs_fop_t op; + gf_boolean_t entrylk_count_req; + gf_boolean_t inodelk_count_req; + gf_boolean_t posixlk_count_req; + gf_boolean_t parent_entrylk_req; + gf_boolean_t multiple_dom_lk_requests; + int update_mlock_enforced_flag; } pl_local_t; typedef struct { @@ -239,6 +271,15 @@ typedef struct _locks_ctx { struct list_head metalk_list; } pl_ctx_t; +typedef struct _multi_dom_lk_data { + xlator_t *this; + inode_t *inode; + dict_t *xdata_rsp; + gf_boolean_t keep_max; +} multi_dom_lk_data; + +typedef enum { DECREMENT, INCREMENT } pl_count_op_t; + pl_ctx_t * pl_ctx_get(client_t *client, xlator_t *xlator); diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h index a99e1bbce43..e2d3d7ca974 100644 --- a/xlators/features/locks/src/pl-messages.h +++ b/xlators/features/locks/src/pl-messages.h @@ -11,7 +11,7 @@ #ifndef _PL_MESSAGES_H_ #define _PL_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 917eacee8da..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -12,19 +12,15 @@ #include <limits.h> #include <pthread.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/compat.h> +#include <glusterfs/logging.h> #include "locks.h" #include "common.h" -#include "statedump.h" +#include <glusterfs/statedump.h> #include "clear.h" -#include "defaults.h" -#include "syncop.h" -#include "pl-messages.h" +#include <glusterfs/defaults.h> +#include <glusterfs/syncop.h> #ifndef LLONG_MAX #define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ @@ -43,21 +39,6 @@ pl_lockinfo_get_brickname(xlator_t *, inode_t *, int32_t *); static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); -#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \ - do { \ - frame->local = NULL; \ - STACK_UNWIND_STRICT(fop, frame, op_ret, params); \ - if (__local) { \ - if (__local->inodelk_dom_count_req) \ - data_unref(__local->inodelk_dom_count_req); \ - loc_wipe(&__local->loc[0]); \ - loc_wipe(&__local->loc[1]); \ - if (__local->fd) \ - fd_unref(__local->fd); \ - mem_put(__local); \ - } \ - } while (0) - /* * The client is always requesting data, but older * servers were not returning it. Newer ones are, so @@ -115,69 +96,156 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); #define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc) \ do { \ if (pl_has_xdata_requests(xdata)) { \ - frame->local = mem_get0(this->local_pool); \ + if (!frame->local) \ + frame->local = mem_get0(this->local_pool); \ pl_local_t *__local = frame->local; \ if (__local) { \ if (__fd) { \ __local->fd = fd_ref(__fd); \ + __local->inode = inode_ref(__fd->inode); \ } else { \ if (__loc) \ loc_copy(&__local->loc[0], __loc); \ if (__newloc) \ loc_copy(&__local->loc[1], __newloc); \ + __local->inode = inode_ref(__local->loc[0].inode); \ } \ pl_get_xdata_requests(__local, xdata); \ } \ } \ } while (0) +#define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \ + do { \ + if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \ + (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \ + inode_t *__inode = (loc ? loc->inode : fd->inode); \ + pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \ + if (__pl_inode == NULL) { \ + op_ret = -1; \ + op_errno = ENOMEM; \ + goto unwind; \ + } \ + if (!pl_is_mandatory_locking_enabled(__pl_inode) || \ + !priv->mlock_enforced) { \ + op_ret = -1; \ + gf_msg(this->name, GF_LOG_DEBUG, EINVAL, 0, \ + "option %s would need mandatory lock to be enabled " \ + "and feature.enforce-mandatory-lock option to be set " \ + "to on", \ + GF_ENFORCE_MANDATORY_LOCK); \ + op_errno = EINVAL; \ + goto unwind; \ + } \ + \ + op_ret = pl_local_init(frame, this, loc, fd); \ + if (op_ret) { \ + op_errno = ENOMEM; \ + goto unwind; \ + } \ + \ + ((pl_local_t *)(frame->local))->update_mlock_enforced_flag = 1; \ + } \ + } while (0) + +#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \ + _args...) \ + ({ \ + struct list_head contend; \ + pl_inode_t *__pl_inode; \ + call_stub_t *__stub; \ + int32_t __error; \ + INIT_LIST_HEAD(&contend); \ + __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \ + &__pl_inode, &contend); \ + if (__error < 0) { \ + __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \ + __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \ + &contend); \ + } else if (__error == 0) { \ + PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \ + _loc2); \ + STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \ + FIRST_CHILD(_xl)->fops->_fop, ##_args); \ + } \ + __error; \ + }) + gf_boolean_t pl_has_xdata_requests(dict_t *xdata) { - char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, - GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT, - GLUSTERFS_PARENT_ENTRYLK, NULL}; + static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, + GLUSTERFS_INODELK_COUNT, + GLUSTERFS_INODELK_DOM_COUNT, + GLUSTERFS_POSIXLK_COUNT, + GLUSTERFS_PARENT_ENTRYLK, + GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, + NULL}; + static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT), + SLEN(GLUSTERFS_INODELK_COUNT), + SLEN(GLUSTERFS_INODELK_DOM_COUNT), + SLEN(GLUSTERFS_POSIXLK_COUNT), + SLEN(GLUSTERFS_PARENT_ENTRYLK), + SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS), + 0}; int i = 0; if (!xdata) return _gf_false; for (i = 0; reqs[i]; i++) - if (dict_get(xdata, reqs[i])) + if (dict_getn(xdata, reqs[i], reqs_size[i])) return _gf_true; return _gf_false; } +static int +dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data) +{ + dict_del(dict, key); + return 0; +} + void pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) { if (!local || !xdata) return; - if (dict_get(xdata, GLUSTERFS_ENTRYLK_COUNT)) { + GF_ASSERT(local->xdata == NULL); + local->xdata = dict_copy_with_ref(xdata, NULL); + + if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) { local->entrylk_count_req = 1; - dict_del(xdata, GLUSTERFS_ENTRYLK_COUNT); + dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT); } - if (dict_get(xdata, GLUSTERFS_INODELK_COUNT)) { + if (dict_get_sizen(xdata, GLUSTERFS_INODELK_COUNT)) { local->inodelk_count_req = 1; - dict_del(xdata, GLUSTERFS_INODELK_COUNT); + dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT); + } + if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) { + local->multiple_dom_lk_requests = 1; + dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS); + dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", + dict_delete_domain_key, NULL); } - local->inodelk_dom_count_req = dict_get(xdata, GLUSTERFS_INODELK_DOM_COUNT); + local->inodelk_dom_count_req = dict_get_sizen(xdata, + GLUSTERFS_INODELK_DOM_COUNT); if (local->inodelk_dom_count_req) { data_ref(local->inodelk_dom_count_req); - dict_del(xdata, GLUSTERFS_INODELK_DOM_COUNT); + dict_del_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT); } - if (dict_get(xdata, GLUSTERFS_POSIXLK_COUNT)) { + if (dict_get_sizen(xdata, GLUSTERFS_POSIXLK_COUNT)) { local->posixlk_count_req = 1; - dict_del(xdata, GLUSTERFS_POSIXLK_COUNT); + dict_del_sizen(xdata, GLUSTERFS_POSIXLK_COUNT); } - if (dict_get(xdata, GLUSTERFS_PARENT_ENTRYLK)) { + if (dict_get_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK)) { local->parent_entrylk_req = 1; - dict_del(xdata, GLUSTERFS_PARENT_ENTRYLK); + dict_del_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK); } } @@ -187,20 +255,11 @@ pl_needs_xdata_response(pl_local_t *local) if (!local) return _gf_false; - if (local->parent_entrylk_req) - return _gf_true; - - if (local->entrylk_count_req) - return _gf_true; - - if (local->inodelk_dom_count_req) - return _gf_true; - - if (local->inodelk_count_req) + if (local->parent_entrylk_req || local->entrylk_count_req || + local->inodelk_dom_count_req || local->inodelk_count_req || + local->posixlk_count_req || local->multiple_dom_lk_requests) return _gf_true; - if (local->posixlk_count_req) - return _gf_true; return _gf_false; } @@ -221,8 +280,43 @@ pl_get_xdata_rsp_args(pl_local_t *local, char *fop, inode_t **parent, } } -int32_t -__get_posixlk_count(xlator_t *this, pl_inode_t *pl_inode) +static inline int +pl_track_io_fop_count(pl_local_t *local, xlator_t *this, pl_count_op_t op) +{ + pl_inode_t *pl_inode = NULL; + + if (!local) + return -1; + + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) + return -1; + + if (pl_inode->mlock_enforced && pl_inode->track_fop_wind_count) { + pthread_mutex_lock(&pl_inode->mutex); + { + if (op == DECREMENT) { + pl_inode->fop_wind_count--; + /* fop_wind_count can go negative when lock enforcement is + * enabled on unwind path of an IO. Hence the "<" comparision. + */ + if (pl_inode->fop_wind_count <= 0) { + pthread_cond_broadcast(&pl_inode->check_fop_wind_count); + pl_inode->track_fop_wind_count = _gf_false; + pl_inode->fop_wind_count = 0; + } + } else { + pl_inode->fop_wind_count++; + } + } + pthread_mutex_unlock(&pl_inode->mutex); + } + + return 0; +} + +static int32_t +__get_posixlk_count(pl_inode_t *pl_inode) { posix_lock_t *lock = NULL; int32_t count = 0; @@ -237,10 +331,9 @@ get_posixlk_count(xlator_t *this, inode_t *inode) { pl_inode_t *pl_inode = NULL; uint64_t tmp_pl_inode = 0; - int ret = 0; int32_t count = 0; - ret = inode_ctx_get(inode, this, &tmp_pl_inode); + int ret = inode_ctx_get(inode, this, &tmp_pl_inode); if (ret != 0) { goto out; } @@ -249,7 +342,7 @@ get_posixlk_count(xlator_t *this, inode_t *inode) pthread_mutex_lock(&pl_inode->mutex); { - count = __get_posixlk_count(this, pl_inode); + count = __get_posixlk_count(pl_inode); } pthread_mutex_unlock(&pl_inode->mutex); @@ -265,10 +358,10 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename, int32_t maxcount = -1; int ret = -1; - if (!parent || !basename || !strlen(basename)) + if (!parent || !basename) goto out; if (keep_max) { - ret = dict_get_int32(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount); + ret = dict_get_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount); if (ret < 0) gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", GLUSTERFS_PARENT_ENTRYLK); @@ -277,7 +370,7 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename, if (maxcount >= entrylk) return; out: - ret = dict_set_int32(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk); + ret = dict_set_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk); if (ret < 0) { gf_msg_debug(this->name, 0, " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK); @@ -293,7 +386,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, int ret = -1; if (keep_max) { - ret = dict_get_int32(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount); + ret = dict_get_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount); if (ret < 0) gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", GLUSTERFS_ENTRYLK_COUNT); @@ -302,7 +395,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, if (maxcount >= count) return; - ret = dict_set_int32(dict, GLUSTERFS_ENTRYLK_COUNT, count); + ret = dict_set_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, count); if (ret < 0) { gf_msg_debug(this->name, 0, " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT); @@ -318,7 +411,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, int ret = -1; if (keep_max) { - ret = dict_get_int32(dict, GLUSTERFS_INODELK_COUNT, &maxcount); + ret = dict_get_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, &maxcount); if (ret < 0) gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", GLUSTERFS_INODELK_COUNT); @@ -327,7 +420,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, if (maxcount >= count) return; - ret = dict_set_int32(dict, GLUSTERFS_INODELK_COUNT, count); + ret = dict_set_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, count); if (ret < 0) { gf_msg_debug(this->name, 0, "Failed to set count for " @@ -347,7 +440,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, int ret = -1; if (keep_max) { - ret = dict_get_int32(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount); + ret = dict_get_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount); if (ret < 0) gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", GLUSTERFS_POSIXLK_COUNT); @@ -356,7 +449,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, if (maxcount >= count) return; - ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, count); + ret = dict_set_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, count); if (ret < 0) { gf_msg_debug(this->name, 0, " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT); @@ -364,6 +457,80 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, } void +pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict, + char *domname, gf_boolean_t keep_max, char *key) +{ + int32_t count = 0; + int32_t maxcount = -1; + int ret = -1; + + if (keep_max) { + ret = dict_get_int32(dict, key, &maxcount); + if (ret < 0) + gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", + GLUSTERFS_INODELK_COUNT); + } + count = get_inodelk_count(this, inode, domname); + if (maxcount >= count) + return; + + ret = dict_set_int32(dict, key, count); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "Failed to set count for " + "key %s", + key); + } + + return; +} + +static int +pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, + void *data) +{ + multi_dom_lk_data *d = data; + char *tmp_key = NULL; + char *save_ptr = NULL; + + tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + + strtok_r(tmp_key, ":", &save_ptr); + if (!*save_ptr) { + if (tmp_key) + GF_FREE(tmp_key); + gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL, + "Could not tokenize domain string from key %s", key); + return -1; + } + + pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr, + d->keep_max, key); + if (tmp_key) + GF_FREE(tmp_key); + + return 0; +} + +void +pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local, + inode_t *inode, dict_t *dict, + gf_boolean_t keep_max) +{ + multi_dom_lk_data data; + + data.this = this; + data.inode = inode; + data.xdata_rsp = dict; + data.keep_max = keep_max; + + dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", + pl_inodelk_xattr_fill_multiple, &data); +} + +void pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, inode_t *inode, char *name, dict_t *xdata, gf_boolean_t max_lock) @@ -371,41 +538,28 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, if (!xdata || !local) return; - if (local->parent_entrylk_req && parent && name && strlen(name)) + if (local->parent_entrylk_req && parent && name && name[0] != '\0') pl_parent_entrylk_xattr_fill(this, parent, name, xdata, max_lock); - if (local->entrylk_count_req && inode) + if (!inode) + return; + + if (local->entrylk_count_req) pl_entrylk_xattr_fill(this, inode, xdata, max_lock); - if (local->inodelk_dom_count_req && inode) + if (local->inodelk_dom_count_req) pl_inodelk_xattr_fill(this, inode, xdata, data_to_str(local->inodelk_dom_count_req), max_lock); - if (local->inodelk_count_req && inode) + if (local->inodelk_count_req) pl_inodelk_xattr_fill(this, inode, xdata, NULL, max_lock); - if (local->posixlk_count_req && inode) + if (local->posixlk_count_req) pl_posixlk_xattr_fill(this, inode, xdata, max_lock); -} - -/* Return true in case we need to ensure mandatory-locking - * semnatics under different modes. - */ -gf_boolean_t -pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode) -{ - posix_locks_private_t *priv = NULL; - - priv = THIS->private; - if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory) - return _gf_true; - else if (priv->mandatory_mode == MLK_FORCED || - priv->mandatory_mode == MLK_OPTIMAL) - return _gf_true; - - return _gf_false; + if (local->multiple_dom_lk_requests) + pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock); } /* Checks whether the region where fop is acting upon conflicts @@ -420,15 +574,19 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd, int ret = 0; if (!__rw_allowable(pl_inode, region, op)) { - if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) { + if (pl_inode->mlock_enforced) { + *can_block = _gf_false; + } else if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) { gf_log("locks", GF_LOG_TRACE, "returning EAGAIN" " because fd is O_NONBLOCK"); *can_block = _gf_false; - } else + } else { *can_block = _gf_true; - } else + } + } else { ret = 1; + } return ret; } @@ -436,9 +594,7 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd, static pl_fdctx_t * pl_new_fdctx() { - pl_fdctx_t *fdctx = NULL; - - fdctx = GF_CALLOC(1, sizeof(*fdctx), gf_locks_mt_pl_fdctx_t); + pl_fdctx_t *fdctx = GF_MALLOC(sizeof(*fdctx), gf_locks_mt_pl_fdctx_t); GF_VALIDATE_OR_GOTO("posix-locks", fdctx, out); INIT_LIST_HEAD(&fdctx->locks_list); @@ -471,7 +627,9 @@ pl_check_n_create_fdctx(xlator_t *this, fd_t *fd) if (ret != 0) { GF_FREE(fdctx); fdctx = NULL; + UNLOCK(&fd->lock); gf_log(this->name, GF_LOG_DEBUG, "failed to set fd ctx"); + goto out; } } unlock: @@ -486,8 +644,10 @@ pl_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + pl_track_io_fop_count(frame->local, this, DECREMENT); + + PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } @@ -495,6 +655,8 @@ int pl_discard_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, INCREMENT); + STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); return 0; @@ -504,6 +666,7 @@ int32_t pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { + pl_local_t *local = NULL; pl_inode_t *pl_inode = NULL; pl_rw_req_t *rw = NULL; posix_lock_t region = { @@ -520,17 +683,28 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, GF_VALIDATE_OR_GOTO("locks", this, unwind); - pl_inode = pl_inode_get(this, fd->inode); - if (!pl_inode) { + local = mem_get0(this->local_pool); + if (!local) { op_ret = -1; op_errno = ENOMEM; goto unwind; } - enabled = pl_is_mandatory_locking_enabled(pl_inode); + frame->local = local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + + pl_inode = pl_inode_get(this, fd->inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } if (frame->root->pid < 0) enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); if (enabled) { region.fl_start = offset; @@ -544,15 +718,19 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, { allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_DISCARD, &can_block); - if (allowed == 1) + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } goto unlock; - else if (!can_block) { + } else if (!can_block) { op_errno = EAGAIN; op_ret = -1; goto unlock; } - rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t); + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); if (!rw) { op_errno = ENOMEM; op_ret = -1; @@ -581,7 +759,8 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); unwind: if (op_ret == -1) - STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, NULL, NULL, NULL); + PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, NULL, NULL, + NULL); return 0; } @@ -591,8 +770,10 @@ pl_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + pl_track_io_fop_count(frame->local, this, DECREMENT); + + PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } @@ -600,6 +781,8 @@ int pl_zerofill_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, INCREMENT); + STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); return 0; @@ -609,6 +792,7 @@ int32_t pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { + pl_local_t *local = NULL; pl_inode_t *pl_inode = NULL; pl_rw_req_t *rw = NULL; posix_lock_t region = { @@ -625,17 +809,28 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, GF_VALIDATE_OR_GOTO("locks", this, unwind); - pl_inode = pl_inode_get(this, fd->inode); - if (!pl_inode) { + local = mem_get0(this->local_pool); + if (!local) { op_ret = -1; op_errno = ENOMEM; goto unwind; } - enabled = pl_is_mandatory_locking_enabled(pl_inode); + frame->local = local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + + pl_inode = pl_inode_get(this, fd->inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } if (frame->root->pid < 0) enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); if (enabled) { region.fl_start = offset; @@ -649,15 +844,19 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, { allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_ZEROFILL, &can_block); - if (allowed == 1) + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } goto unlock; - else if (!can_block) { + } else if (!can_block) { op_errno = EAGAIN; op_ret = -1; goto unlock; } - rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t); + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); if (!rw) { op_errno = ENOMEM; op_ret = -1; @@ -686,8 +885,8 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); unwind: if (op_ret == -1) - STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, - NULL); + PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, NULL, NULL, + NULL); return 0; } @@ -697,24 +896,16 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - pl_local_t *local = NULL; - - local = frame->local; - - if (local->op == GF_FOP_TRUNCATE) - loc_wipe(&local->loc[0]); + pl_local_t *local = frame->local; - if (local->xdata) - dict_unref(local->xdata); - if (local->fd) - fd_unref(local->fd); + pl_track_io_fop_count(local, this, DECREMENT); if (local->op == GF_FOP_TRUNCATE) - STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, prebuf, + postbuf, xdata); else - STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, prebuf, + postbuf, xdata); return 0; } @@ -722,6 +913,8 @@ int pl_ftruncate_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, INCREMENT); + STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); return 0; @@ -731,6 +924,8 @@ int pl_truncate_cont(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, INCREMENT); + STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; @@ -741,7 +936,7 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - pl_local_t *local = NULL; + pl_local_t *local = frame->local; inode_t *inode = NULL; pl_inode_t *pl_inode = NULL; pl_rw_req_t *rw = NULL; @@ -756,7 +951,6 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int allowed = 1; GF_VALIDATE_OR_GOTO("locks", this, unwind); - local = frame->local; if (op_ret != 0) { gf_log(this->name, GF_LOG_ERROR, @@ -770,17 +964,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, else inode = local->fd->inode; - pl_inode = pl_inode_get(this, inode); + local->inode = inode_ref(inode); + + pl_inode = pl_inode_get(this, inode, local); if (!pl_inode) { op_ret = -1; op_errno = ENOMEM; goto unwind; } - enabled = pl_is_mandatory_locking_enabled(pl_inode); - if (frame->root->pid < 0) enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); if (enabled) { region.fl_start = local->offset; @@ -794,15 +990,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, allowed = pl_is_fop_allowed(pl_inode, ®ion, local->fd, local->op, &can_block); - if (allowed == 1) + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } goto unlock; - else if (!can_block) { + } else if (!can_block) { op_errno = EAGAIN; op_ret = -1; goto unlock; } - rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t); + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); if (!rw) { op_errno = ENOMEM; op_ret = -1; @@ -850,25 +1050,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } unwind: if (op_ret == -1) { - gf_log(this->name, GF_LOG_ERROR, + gf_log(this ? this->name : "locks", GF_LOG_ERROR, "truncate failed with " "ret: %d, error: %s", op_ret, strerror(op_errno)); - if (local->op == GF_FOP_TRUNCATE) - loc_wipe(&local->loc[0]); - if (local->xdata) - dict_unref(local->xdata); - if (local->fd) - fd_unref(local->fd); switch (local->op) { case GF_FOP_TRUNCATE: - STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, buf, - NULL, xdata); + PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf, + NULL, xdata); break; case GF_FOP_FTRUNCATE: - STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, buf, - NULL, xdata); + PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, buf, + NULL, xdata); break; default: break; @@ -900,9 +1094,10 @@ pl_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, loc, NULL); ret = 0; + unwind: if (ret == -1) { - gf_log(this->name, GF_LOG_ERROR, + gf_log(this ? this->name : "locks", GF_LOG_ERROR, "truncate on %s failed with" " ret: %d, error: %s", loc->path, -1, strerror(ENOMEM)); @@ -935,7 +1130,7 @@ pl_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ret = 0; unwind: if (ret == -1) { - gf_log(this->name, GF_LOG_ERROR, + gf_log(this ? this->name : "locks", GF_LOG_ERROR, "ftruncate failed with" " ret: %d, error: %s", -1, strerror(ENOMEM)); @@ -1040,68 +1235,68 @@ pl_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int32_t -pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, - dict_t *xdata) +static int32_t +pl_getxattr_clrlk(xlator_t *this, const char *name, inode_t *inode, + dict_t **dict, int32_t *op_errno) { - int32_t op_errno = EINVAL; - int op_ret = -1; int32_t bcount = 0; int32_t gcount = 0; - char key[PATH_MAX] = { - 0, - }; + char *key = NULL; char *lk_summary = NULL; pl_inode_t *pl_inode = NULL; - dict_t *dict = NULL; clrlk_args args = { 0, }; char *brickname = NULL; + int32_t op_ret = -1; - if (!name) - goto usual; - - if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD))) - goto usual; + *op_errno = EINVAL; if (clrlk_parse_args(name, &args)) { - op_errno = EINVAL; + *op_errno = EINVAL; goto out; } - dict = dict_new(); - if (!dict) { - op_errno = ENOMEM; + *dict = dict_new(); + if (!*dict) { + *op_errno = ENOMEM; goto out; } - pl_inode = pl_inode_get(this, loc->inode); + pl_inode = pl_inode_get(this, inode, NULL); if (!pl_inode) { - op_errno = ENOMEM; + *op_errno = ENOMEM; goto out; } switch (args.type) { case CLRLK_INODE: case CLRLK_ENTRY: - op_ret = clrlk_clear_lks_in_all_domains( - this, pl_inode, &args, &bcount, &gcount, &op_errno); - if (op_ret) - goto out; + op_ret = clrlk_clear_lks_in_all_domains(this, pl_inode, &args, + &bcount, &gcount, op_errno); break; case CLRLK_POSIX: op_ret = clrlk_clear_posixlk(this, pl_inode, &args, &bcount, - &gcount, &op_errno); - if (op_ret) - goto out; + &gcount, op_errno); break; - case CLRLK_TYPE_MAX: - op_errno = EINVAL; - goto out; + default: + op_ret = -1; + *op_errno = EINVAL; + } + if (op_ret) { + if (args.type >= CLRLK_TYPE_MAX) { + gf_log(this->name, GF_LOG_ERROR, + "clear locks: invalid lock type %d", args.type); + } else { + gf_log(this->name, GF_LOG_ERROR, + "clear locks of type %s failed: %s", + clrlk_type_names[args.type], strerror(*op_errno)); + } + + goto out; } - op_ret = fetch_pathinfo(this, loc->inode, &op_errno, &brickname); + op_ret = fetch_pathinfo(this, inode, op_errno, &brickname); if (op_ret) { gf_log(this->name, GF_LOG_WARNING, "Couldn't get brickname"); } else { @@ -1116,43 +1311,62 @@ pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, if (!gcount && !bcount) { if (gf_asprintf(&lk_summary, "No locks cleared.") == -1) { op_ret = -1; - op_errno = ENOMEM; + *op_errno = ENOMEM; goto out; } - } else if (gf_asprintf( - &lk_summary, - "%s: %s blocked locks=%d " - "granted locks=%d", - (brickname == NULL) ? this->name : brickname, - (args.type == CLRLK_INODE) - ? "inode" - : (args.type == CLRLK_ENTRY) - ? "entry" - : (args.type == CLRLK_POSIX) ? "posix" : " ", - bcount, gcount) == -1) { + } else if (gf_asprintf(&lk_summary, + "%s: %s blocked locks=%d " + "granted locks=%d", + (brickname == NULL) ? this->name : brickname, + clrlk_type_names[args.type], bcount, gcount) == -1) { op_ret = -1; - op_errno = ENOMEM; + *op_errno = ENOMEM; goto out; } + gf_log(this->name, GF_LOG_DEBUG, "%s", lk_summary); - if (snprintf(key, sizeof(key), "%s", name) >= sizeof(key)) { + key = gf_strdup(name); + if (!key) { op_ret = -1; goto out; } - if (dict_set_dynstr(dict, key, lk_summary)) { + if (dict_set_dynstr(*dict, key, lk_summary)) { op_ret = -1; - op_errno = ENOMEM; + *op_errno = ENOMEM; goto out; } op_ret = 0; + out: GF_FREE(brickname); - STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata); - GF_FREE(args.opts); - if (op_ret && lk_summary) + GF_FREE(key); + if (op_ret) { GF_FREE(lk_summary); + } + + return op_ret; +} + +int32_t +pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) +{ + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + dict_t *dict = NULL; + + if (!name) + goto usual; + + if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD))) + goto usual; + + op_ret = pl_getxattr_clrlk(this, name, loc->inode, &dict, &op_errno); + + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata); + if (dict) dict_unref(dict); return 0; @@ -1218,7 +1432,7 @@ fetch_pathinfo(xlator_t *this, inode_t *inode, int32_t *op_errno, goto out; } - ret = dict_get_str(dict, GF_XATTR_PATHINFO_KEY, brickname); + ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, brickname); if (ret) goto out; @@ -1241,15 +1455,12 @@ out: int pl_lockinfo_get_brickname(xlator_t *this, inode_t *inode, int32_t *op_errno) { - int ret = -1; - posix_locks_private_t *priv = NULL; + posix_locks_private_t *priv = this->private; char *brickname = NULL; char *end = NULL; char *tmp = NULL; - priv = this->private; - - ret = fetch_pathinfo(this, inode, op_errno, &brickname); + int ret = fetch_pathinfo(this, inode, op_errno, &brickname); if (ret) goto out; @@ -1277,12 +1488,10 @@ out: char * pl_lockinfo_key(xlator_t *this, inode_t *inode, int32_t *op_errno) { - posix_locks_private_t *priv = NULL; + posix_locks_private_t *priv = this->private; char *key = NULL; int ret = 0; - priv = this->private; - if (priv->brickname == NULL) { ret = pl_lockinfo_get_brickname(this, inode, op_errno); if (ret < 0) { @@ -1300,14 +1509,13 @@ int32_t pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, int32_t *op_errno) { - pl_inode_t *pl_inode = NULL; char *key = NULL, *buf = NULL; int32_t op_ret = 0; unsigned long fdnum = 0; int32_t len = 0; dict_t *tmp = NULL; - pl_inode = pl_inode_get(this, fd->inode); + pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL); if (!pl_inode) { gf_log(this->name, GF_LOG_DEBUG, "Could not get inode."); @@ -1347,8 +1555,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, goto out; } - len = dict_serialized_length(tmp); - if (len < 0) { + op_ret = dict_allocate_and_serialize(tmp, (char **)&buf, + (unsigned int *)&len); + if (op_ret != 0) { *op_errno = -op_ret; op_ret = -1; gf_log(this->name, GF_LOG_WARNING, @@ -1358,24 +1567,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, goto out; } - buf = GF_CALLOC(1, len, gf_common_mt_char); - if (buf == NULL) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - op_ret = dict_serialize(tmp, buf); - if (op_ret < 0) { - *op_errno = -op_ret; - op_ret = -1; - gf_log(this->name, GF_LOG_WARNING, - "dict_serialize failed (%s) while handling lockinfo " - "for fd (ptr: %p inode-gfid:%s)", - strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid)); - goto out; - } - op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len); if (op_ret < 0) { *op_errno = -op_ret; @@ -1428,6 +1619,11 @@ pl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, } goto unwind; + } else if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == + 0) { + op_ret = pl_getxattr_clrlk(this, name, fd->inode, &dict, &op_errno); + + goto unwind; } else { goto usual; } @@ -1450,14 +1646,11 @@ int32_t pl_migrate_locks(call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num, int32_t *op_errno) { - pl_inode_t *pl_inode = NULL; - uint64_t newfd_num = 0; posix_lock_t *l = NULL; int32_t op_ret = 0; + uint64_t newfd_num = fd_to_fdnum(newfd); - newfd_num = fd_to_fdnum(newfd); - - pl_inode = pl_inode_get(frame->this, newfd->inode); + pl_inode_t *pl_inode = pl_inode_get(frame->this, newfd->inode, NULL); if (pl_inode == NULL) { op_ret = -1; *op_errno = EBADFD; @@ -1486,11 +1679,10 @@ pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf, int len, int32_t *op_errno) { int32_t op_ret = -1; - dict_t *lockinfo = NULL; uint64_t oldfd_num = 0; char *key = NULL; - lockinfo = dict_new(); + dict_t *lockinfo = dict_new(); if (lockinfo == NULL) { op_ret = -1; *op_errno = ENOMEM; @@ -1522,7 +1714,7 @@ pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf, gf_log(frame->this->name, GF_LOG_WARNING, "migration of locks from oldfd (ptr:%p) to newfd " "(ptr:%p) (inode-gfid:%s)", - (void *)oldfd_num, fd, uuid_utoa(fd->inode->gfid)); + (void *)(uintptr_t)oldfd_num, fd, uuid_utoa(fd->inode->gfid)); goto out; } @@ -1536,6 +1728,27 @@ int32_t pl_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + pl_inode->mlock_enforced = _gf_true; + pl_inode->check_mlock_info = _gf_false; + } + pthread_mutex_unlock(&pl_inode->mutex); + } + +unwind: PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, xdata); return 0; @@ -1545,12 +1758,14 @@ int32_t pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata) { - int32_t op_ret = 0, op_errno = 0; + int32_t op_errno = 0; void *lockinfo_buf = NULL; int len = 0; + char *name = NULL; + posix_locks_private_t *priv = this->private; - op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, &lockinfo_buf, - &len); + int32_t op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, + &lockinfo_buf, &len); if (lockinfo_buf == NULL) { goto usual; } @@ -1563,12 +1778,17 @@ pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, usual: PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + + PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, ((loc_t *)NULL), fd, + priv); + STACK_WIND(frame, pl_fsetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); return 0; unwind: - STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); + PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, NULL); + return 0; } @@ -1616,10 +1836,7 @@ pl_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int pl_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - pl_inode_t *pl_inode = NULL; - - pl_inode = pl_inode_get(this, fd->inode); - + pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL); if (!pl_inode) { gf_log(this->name, GF_LOG_DEBUG, "Could not get inode."); STACK_UNWIND_STRICT(flush, frame, -1, EBADFD, NULL); @@ -1695,14 +1912,18 @@ pl_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, int op_errno = EINVAL; pl_inode_t *pl_inode = NULL; posix_lock_t *l = NULL; - posix_locks_private_t *priv = NULL; - - priv = this->private; + posix_locks_private_t *priv = this->private; GF_VALIDATE_OR_GOTO("locks", this, unwind); op_ret = 0, op_errno = 0; - pl_inode = pl_inode_get(this, fd->inode); + pl_inode = pl_inode_get(this, fd->inode, NULL); + if (!pl_inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "Could not get inode"); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } /* As per design, under forced and file-based mandatory locking modes * it doesn't matter whether inodes's lock list contain advisory or @@ -1777,7 +1998,8 @@ int pl_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, xdata); @@ -1789,6 +2011,8 @@ pl_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, DECREMENT); + PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, vector, count, stbuf, iobref, xdata); @@ -1800,6 +2024,8 @@ pl_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, DECREMENT); + PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, prebuf, postbuf, xdata); @@ -1822,6 +2048,10 @@ do_blocked_rw(pl_inode_t *pl_inode) if (__rw_allowable(pl_inode, &rw->region, rw->stub->fop)) { list_del_init(&rw->list); list_add_tail(&rw->list, &wind_list); + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } } } } @@ -1837,14 +2067,68 @@ do_blocked_rw(pl_inode_t *pl_inode) return; } +/* when mandatory lock is enforced: + If an IO request comes on a region which is out of the boundary of the + granted mandatory lock, it will be rejected. + + Note: There is no IO blocking with mandatory lock enforced as it may be + a stale data from an old client. + */ +gf_boolean_t static within_range(posix_lock_t *existing, posix_lock_t *new) +{ + if (existing->fl_start <= new->fl_start && existing->fl_end >= new->fl_end) + return _gf_true; + + return _gf_false; +} + static int __rw_allowable(pl_inode_t *pl_inode, posix_lock_t *region, glusterfs_fop_t op) { posix_lock_t *l = NULL; - posix_locks_private_t *priv = NULL; + posix_locks_private_t *priv = THIS->private; int ret = 1; - priv = THIS->private; + if (pl_inode->mlock_enforced) { + list_for_each_entry(l, &pl_inode->ext_list, list) + { + /* + with lock enforced (fencing) there should not be any blocking + lock coexisting. + */ + if (same_owner(l, region)) { + /* Should range check be strict for same owner with fencing? */ + if (locks_overlap(l, region)) { + if (within_range(l, region)) { + return 1; + } else { + /* + Should we allow read fop if it does not fit it in the + range? + if (op == GF_FOP_READ && l->fl_type != F_WRLCK) { + return 1; + } + */ + return 0; + } + } + } else { + if (locks_overlap(l, region)) { + /* + with fencing should a read from a different owner be + allowed if the mandatory lock taken is F_RDLCK? + if (op == GF_FOP_READ && l->fl_type != F_WRLCK) { + return 1; + } + */ + return 0; + } + } + } + + /* No lock has been taken by this owner */ + return 0; + } list_for_each_entry(l, &pl_inode->ext_list, list) { @@ -1868,6 +2152,8 @@ int pl_readv_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, INCREMENT); + STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); @@ -1878,6 +2164,7 @@ int pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { + pl_local_t *local = NULL; pl_inode_t *pl_inode = NULL; pl_rw_req_t *rw = NULL; posix_lock_t region = { @@ -1894,18 +2181,26 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, GF_VALIDATE_OR_GOTO("locks", this, unwind); - pl_inode = pl_inode_get(this, fd->inode); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + + if (!frame->local) { + frame->local = mem_get0(this->local_pool); + local = frame->local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + } + + pl_inode = pl_inode_get(this, fd->inode, local); if (!pl_inode) { op_ret = -1; op_errno = ENOMEM; goto unwind; } - PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); - enabled = pl_is_mandatory_locking_enabled(pl_inode); - if (frame->root->pid < 0) enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); if (enabled) { region.fl_start = offset; @@ -1919,15 +2214,19 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, { allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_READ, &can_block); - if (allowed == 1) + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } goto unlock; - else if (!can_block) { + } else if (!can_block) { op_errno = EAGAIN; op_ret = -1; goto unlock; } - rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t); + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); if (!rw) { op_errno = ENOMEM; op_ret = -1; @@ -1958,8 +2257,8 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } unwind: if (op_ret == -1) - STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL, - NULL); + PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, NULL, 0, NULL, + NULL, NULL); return 0; } @@ -1969,6 +2268,8 @@ pl_writev_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { + pl_track_io_fop_count(frame->local, this, INCREMENT); + STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, flags, iobref, xdata); @@ -1981,6 +2282,7 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { + pl_local_t *local = NULL; pl_inode_t *pl_inode = NULL; pl_rw_req_t *rw = NULL; posix_lock_t region = { @@ -1997,18 +2299,26 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, GF_VALIDATE_OR_GOTO("locks", this, unwind); - pl_inode = pl_inode_get(this, fd->inode); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + + if (!frame->local) { + frame->local = mem_get0(this->local_pool); + local = frame->local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + } + + pl_inode = pl_inode_get(this, fd->inode, local); if (!pl_inode) { op_ret = -1; op_errno = ENOMEM; goto unwind; } - PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); - enabled = pl_is_mandatory_locking_enabled(pl_inode); - if (frame->root->pid < 0) enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); if (enabled) { region.fl_start = offset; @@ -2022,15 +2332,24 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, { allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_WRITE, &can_block); - if (allowed == 1) + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } goto unlock; - else if (!can_block) { - op_errno = EAGAIN; + } else if (!can_block) { + if (pl_inode->mlock_enforced) { + op_errno = EBUSY; + } else { + op_errno = EAGAIN; + } + op_ret = -1; goto unlock; } - rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t); + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); if (!rw) { op_errno = ENOMEM; op_ret = -1; @@ -2061,7 +2380,8 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, } unwind: if (op_ret == -1) - STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, NULL, NULL, NULL); + PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, NULL, NULL, + NULL); return 0; } @@ -2069,29 +2389,25 @@ unwind: static int __fd_has_locks(pl_inode_t *pl_inode, fd_t *fd) { - int found = 0; posix_lock_t *l = NULL; list_for_each_entry(l, &pl_inode->ext_list, list) { if (l->fd_num == fd_to_fdnum(fd)) { - found = 1; - break; + return 1; } } - return found; + return 0; } static posix_lock_t * lock_dup(posix_lock_t *lock) { - posix_lock_t *new_lock = NULL; - - new_lock = new_posix_lock(&lock->user_flock, lock->client, lock->client_pid, - &lock->owner, (fd_t *)lock->fd_num, - lock->lk_flags, lock->blocking); - return new_lock; + int32_t op_errno = 0; + return new_posix_lock(&lock->user_flock, lock->client, lock->client_pid, + &lock->owner, (fd_t *)lock->fd_num, lock->lk_flags, + lock->blocking, &op_errno); } static int @@ -2120,14 +2436,7 @@ __dup_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx) static int __copy_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx) { - int ret = 0; - - ret = __dup_locks_to_fdctx(pl_inode, fd, fdctx); - if (ret) - goto out; - -out: - return ret; + return __dup_locks_to_fdctx(pl_inode, fd, fdctx); } static void @@ -2198,9 +2507,10 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd, pthread_mutex_lock(&pl_inode->mutex); { if (!__fd_has_locks(pl_inode, fd)) { + pthread_mutex_unlock(&pl_inode->mutex); gf_log(this->name, GF_LOG_DEBUG, "fd=%p has no active locks", fd); ret = 0; - goto unlock; + goto out; } gf_log(this->name, GF_LOG_DEBUG, "There are active locks on fd"); @@ -2224,15 +2534,17 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd, "fdctx present -> returning the next lock"); ret = __set_next_lock_fd(fdctx, reqlock); if (ret) { + pthread_mutex_unlock(&pl_inode->mutex); gf_log(this->name, GF_LOG_DEBUG, "could not get next lock of fd"); - goto unlock; + goto out; } } } unlock: pthread_mutex_unlock(&pl_inode->mutex); +out: return ret; } @@ -2245,12 +2557,10 @@ pl_metalock_is_active(pl_inode_t *pl_inode) return 1; } -int -__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block) +void +__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock) { list_add_tail(&reqlock->list, &pl_inode->queued_locks); - - return 0; } int @@ -2263,13 +2573,12 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, int can_block = 0; posix_lock_t *reqlock = NULL; posix_lock_t *conf = NULL; - int ret = 0; uint32_t lk_flags = 0; - posix_locks_private_t *priv = NULL; - - priv = this->private; + posix_locks_private_t *priv = this->private; + pl_local_t *local = NULL; + short lock_type = 0; - ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags); + int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags); if (ret == 0) { if (priv->mandatory_mode == MLK_NONE) gf_log(this->name, GF_LOG_DEBUG, @@ -2298,7 +2607,17 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, flock->l_len = labs(flock->l_len); } - pl_inode = pl_inode_get(this, fd->inode); + local = mem_get0(this->local_pool); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } else { + frame->local = local; + local->fd = fd_ref(fd); + } + + pl_inode = pl_inode_get(this, fd->inode, local); if (!pl_inode) { op_ret = -1; op_errno = ENOMEM; @@ -2306,11 +2625,11 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, } reqlock = new_posix_lock(flock, frame->root->client, frame->root->pid, - &frame->root->lk_owner, fd, lk_flags, can_block); + &frame->root->lk_owner, fd, lk_flags, can_block, + &op_errno); if (!reqlock) { op_ret = -1; - op_errno = ENOMEM; goto unwind; } @@ -2402,6 +2721,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, case F_SETLK: reqlock->frame = frame; reqlock->this = this; + lock_type = flock->l_type; pthread_mutex_lock(&pl_inode->mutex); { @@ -2423,10 +2743,23 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, goto out; } + if (reqlock->fl_type != F_UNLCK && pl_inode->mlock_enforced) { + ret = pl_lock_preempt(pl_inode, reqlock); + if (ret == -1) { + gf_log(this->name, GF_LOG_ERROR, "lock preempt failed"); + op_ret = -1; + op_errno = EAGAIN; + __destroy_lock(reqlock); + goto out; + } + + pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL); + goto unwind; + } + ret = pl_setlk(this, pl_inode, reqlock, can_block); if (ret == -1) { - if ((can_block) && (F_UNLCK != flock->l_type)) { - pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL); + if ((can_block) && (F_UNLCK != lock_type)) { goto out; } gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); @@ -2448,7 +2781,7 @@ unwind: pl_trace_out(this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL); pl_update_refkeeper(this, fd->inode); - STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata); + PL_STACK_UNWIND(lk, xdata, frame, op_ret, op_errno, flock, xdata); out: return 0; } @@ -2481,7 +2814,9 @@ pl_forget(xlator_t *this, inode_t *inode) INIT_LIST_HEAD(&inodelks_released); INIT_LIST_HEAD(&entrylks_released); - pl_inode = pl_inode_get(this, inode); + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) + return 0; pthread_mutex_lock(&pl_inode->mutex); { @@ -2553,25 +2888,33 @@ pl_forget(xlator_t *this, inode_t *inode) } pthread_mutex_unlock(&pl_inode->mutex); - list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list) - { - STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock, NULL); - __destroy_lock(ext_l); + if (!list_empty(&posixlks_released)) { + list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list) + { + STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock, + NULL); + __destroy_lock(ext_l); + } } - list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released, blocked_locks) - { - STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL); - __pl_inodelk_unref(ino_l); + if (!list_empty(&inodelks_released)) { + list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released, + blocked_locks) + { + STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL); + __pl_inodelk_unref(ino_l); + } } - list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released, - blocked_locks) - { - STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL); - GF_FREE((char *)entry_l->basename); - GF_FREE(entry_l->connection_id); - GF_FREE(entry_l); + if (!list_empty(&entrylks_released)) { + list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released, + blocked_locks) + { + STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL); + GF_FREE((char *)entry_l->basename); + GF_FREE(entry_l->connection_id); + GF_FREE(entry_l); + } } pthread_mutex_destroy(&pl_inode->mutex); @@ -2645,11 +2988,85 @@ out: return ret; } +static int32_t +pl_request_link_count(dict_t **pxdata) +{ + dict_t *xdata; + + xdata = *pxdata; + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + return ENOMEM; + } + } else { + dict_ref(xdata); + } + + if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) { + dict_unref(xdata); + return ENOMEM; + } + + *pxdata = xdata; + + return 0; +} + +static int32_t +pl_check_link_count(dict_t *xdata) +{ + int32_t count; + + /* In case we are unable to read the link count from xdata, we take a + * conservative approach and return -2, which will prevent the inode from + * being considered deleted. In fact it will cause link tracking for this + * inode to be disabled completely to avoid races. */ + + if (xdata == NULL) { + return -2; + } + + if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) { + return -2; + } + + return count; +} + int32_t pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { + pl_inode_t *pl_inode; + + if (op_ret >= 0) { + pl_inode = pl_inode_get(this, inode, NULL); + if (pl_inode == NULL) { + PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL, + NULL); + return 0; + } + + pthread_mutex_lock(&pl_inode->mutex); + + /* We only update the link count if we previously didn't know it. + * Doing it always can lead to races since lookup is not executed + * atomically most of the times. */ + if (pl_inode->links == -2) { + pl_inode->links = pl_check_link_count(xdata); + if (buf->ia_type == IA_IFDIR) { + /* Directories have at least 2 links. To avoid special handling + * for directories, we simply decrement the value here to make + * them equivalent to regular files. */ + pl_inode->links--; + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata, postparent); return 0; @@ -2658,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); - STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); + int32_t error; + + error = pl_request_link_count(&xdata); + if (error == 0) { + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref(xdata); + } else { + STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL); + } return 0; } @@ -2721,9 +3146,8 @@ pl_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, lock_migration_info_t * gf_mig_info_for_lock(posix_lock_t *lock) { - lock_migration_info_t *new = NULL; - - new = GF_CALLOC(1, sizeof(lock_migration_info_t), gf_common_mt_lock_mig); + lock_migration_info_t *new = GF_MALLOC(sizeof(lock_migration_info_t), + gf_common_mt_lock_mig); if (new == NULL) { goto out; } @@ -2751,7 +3175,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi) { if (list_empty(&pl_inode->ext_list)) { count = 0; - goto out; + goto unlock; } list_for_each_entry(temp, &pl_inode->ext_list, list) @@ -2761,6 +3185,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi) newlock = gf_mig_info_for_lock(temp); if (!newlock) { + pthread_mutex_unlock(&pl_inode->mutex); gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "lock_dup failed"); count = -1; goto out; @@ -2771,8 +3196,9 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi) } } -out: +unlock: pthread_mutex_unlock(&pl_inode->mutex); +out: return count; } @@ -2788,7 +3214,7 @@ pl_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) INIT_LIST_HEAD(&locks.list); - pl_inode = pl_inode_get(this, loc->inode); + pl_inode = pl_inode_get(this, loc->inode, NULL); if (!pl_inode) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed"); @@ -2828,9 +3254,8 @@ __pl_metalk_ref(pl_meta_lock_t *lock) pl_meta_lock_t * new_meta_lock(call_frame_t *frame, xlator_t *this) { - pl_meta_lock_t *lock = NULL; - - lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_meta_lock_t); + pl_meta_lock_t *lock = GF_CALLOC(1, sizeof(*lock), + gf_locks_mt_pl_meta_lock_t); if (!lock) { gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, @@ -2904,7 +3329,7 @@ pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode) pl_meta_lock_t *reqlk = NULL; pl_ctx_t *ctx = NULL; - pl_inode = pl_inode_get(this, inode); + pl_inode = pl_inode_get(this, inode, NULL); if (!pl_inode) { gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "pl_inode mem allocation failedd"); @@ -2934,15 +3359,15 @@ pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode) pthread_mutex_lock(&pl_inode->mutex); { if (pl_metalock_is_active(pl_inode)) { - gf_msg(this->name, GF_LOG_WARNING, EINVAL, 0, - "More than one meta-lock can not be granted on" - "the inode"); ret = -1; } } pthread_mutex_unlock(&pl_inode->mutex); if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, 0, + "More than one meta-lock cannot be granted on" + " the inode"); goto out; } @@ -2978,9 +3403,8 @@ out: return ret; } -void -__unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode, - struct list_head *tmp_list) +static void +__unwind_queued_locks(pl_inode_t *pl_inode, struct list_head *tmp_list) { if (list_empty(&pl_inode->queued_locks)) return; @@ -2988,9 +3412,8 @@ __unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode, list_splice_init(&pl_inode->queued_locks, tmp_list); } -void -__unwind_blocked_locks(xlator_t *this, pl_inode_t *pl_inode, - struct list_head *tmp_list) +static void +__unwind_blocked_locks(pl_inode_t *pl_inode, struct list_head *tmp_list) { posix_lock_t *lock = NULL; posix_lock_t *tmp = NULL; @@ -3038,7 +3461,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict) goto out; } - pl_inode = pl_inode_get(this, inode); + pl_inode = pl_inode_get(this, inode, NULL); if (!pl_inode) { ret = -1; goto out; @@ -3049,12 +3472,12 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict) pthread_mutex_lock(&pl_inode->mutex); { /* Unwind queued locks regardless of migration status */ - __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list); + __unwind_queued_locks(pl_inode, &tmp_posixlk_list); /* Unwind blocked locks only for successful migration */ - if (dict_get(dict, "status")) { + if (dict_get_sizen(dict, "status")) { /* unwind all blocked locks */ - __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list); + __unwind_blocked_locks(pl_inode, &tmp_posixlk_list); } /* unlock metalk */ @@ -3081,7 +3504,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict) inode_unref(pl_inode->inode); } - if (dict_get(dict, "status")) + if (dict_get_sizen(dict, "status")) pl_inode->migrated = _gf_true; else pl_inode->migrated = _gf_false; @@ -3110,6 +3533,34 @@ int32_t pl_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + while (pl_inode->fop_wind_count > 0) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "waiting for existing fops (count %d) to drain for " + "gfid %s", + pl_inode->fop_wind_count, uuid_utoa(pl_inode->gfid)); + pthread_cond_wait(&pl_inode->check_fop_wind_count, + &pl_inode->mutex); + } + pl_inode->mlock_enforced = _gf_true; + pl_inode->check_mlock_info = _gf_false; + } + pthread_mutex_unlock(&pl_inode->mutex); + } + +unwind: PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata); return 0; } @@ -3121,15 +3572,16 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int op_ret = 0; int op_errno = EINVAL; dict_t *xdata_rsp = NULL; + char *name = NULL; + posix_locks_private_t *priv = this->private; - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - if (dict_get(dict, GF_META_LOCK_KEY)) { + if (dict_get_sizen(dict, GF_META_LOCK_KEY)) { op_ret = pl_metalk(frame, this, loc->inode); - } else if (dict_get(dict, GF_META_UNLOCK_KEY)) { + } else if (dict_get_sizen(dict, GF_META_UNLOCK_KEY)) { op_ret = pl_metaunlock(frame, this, loc->inode, dict); - } else { goto usual; } @@ -3139,9 +3591,17 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, return 0; usual: + PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, ((fd_t *)NULL), + priv); + STACK_WIND(frame, pl_setxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); return 0; + +unwind: + PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata); + + return 0; } void @@ -3150,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner, time_t *blkd_time, gf_boolean_t active) { char *type_str = NULL; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; @@ -3204,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode) { pl_dom_list_t *dom = NULL; pl_entry_lock_t *lock = NULL; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; int count = 0; @@ -3227,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->entrylk_list, domain_list) { - gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec, + gf_time_fmt(granted, sizeof(granted), lock->granted_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count); - if (lock->blkd_time.tv_sec == 0) { + if (lock->blkd_time == 0) { snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", @@ -3238,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode) lkowner_utoa(&lock->owner), lock->client, lock->connection_id, granted); } else { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" @@ -3255,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count); @@ -3307,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode) SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, - &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, - _gf_true); + lock->client, lock->connection_id, &lock->granted_time, + &lock->blkd_time, _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3321,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode) count); SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, 0, - &lock->blkd_time.tv_sec, _gf_false); + lock->client, lock->connection_id, 0, &lock->blkd_time, + _gf_false); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3355,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode) gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count, lock->blocked ? "BLOCKED" : "ACTIVE"); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, NULL, &lock->granted_time.tv_sec, - &lock->blkd_time.tv_sec, - (lock->blocked) ? _gf_false : _gf_true); + lock->client, lock->client_uid, &lock->granted_time, + &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3436,11 +3894,15 @@ unlock: __dump_inodelks(pl_inode); } - count = __get_posixlk_count(this, pl_inode); + count = __get_posixlk_count(pl_inode); if (count) { gf_proc_dump_write("posixlk-count", "%d", count); __dump_posixlks(pl_inode); } + + gf_proc_dump_write("links", "%d", pl_inode->links); + gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running); + gf_proc_dump_write("removed", "%u", pl_inode->removed); } pthread_mutex_unlock(&pl_inode->mutex); @@ -3549,9 +4011,9 @@ pl_metalk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) * unwind all queued and blocked locks to check * migration status and find the correct * destination */ - __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list); + __unwind_queued_locks(pl_inode, &tmp_posixlk_list); - __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list); + __unwind_blocked_locks(pl_inode, &tmp_posixlk_list); list_del_init(&meta_lock->list); @@ -3583,10 +4045,7 @@ unlock: static int pl_client_disconnect_cbk(xlator_t *this, client_t *client) { - pl_ctx_t *pl_ctx = NULL; - - pl_ctx = pl_ctx_get(client, this); - + pl_ctx_t *pl_ctx = pl_ctx_get(client, this); if (pl_ctx) { pl_inodelk_client_cleanup(this, pl_ctx); pl_entrylk_client_cleanup(this, pl_ctx); @@ -3623,10 +4082,9 @@ pl_client_destroy_cbk(xlator_t *this, client_t *client) int reconfigure(xlator_t *this, dict_t *options) { - posix_locks_private_t *priv = NULL; + posix_locks_private_t *priv = this->private; int ret = -1; - - priv = this->private; + char *tmp_str = NULL; GF_OPTION_RECONF("trace", priv->trace, options, bool, out); @@ -3648,6 +4106,20 @@ reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("notify-contention-delay", priv->notify_contention_delay, options, uint32, out); + GF_OPTION_RECONF("mandatory-locking", tmp_str, options, str, out); + + GF_OPTION_RECONF("enforce-mandatory-lock", priv->mlock_enforced, options, + bool, out); + + if (!strcmp(tmp_str, "forced")) + priv->mandatory_mode = MLK_FORCED; + else if (!strcmp(tmp_str, "file")) + priv->mandatory_mode = MLK_FILE_BASED; + else if (!strcmp(tmp_str, "optimal")) + priv->mandatory_mode = MLK_OPTIMAL; + else + priv->mandatory_mode = MLK_NONE; + ret = 0; out: @@ -3695,6 +4167,7 @@ init(xlator_t *this) priv->mandatory_mode = MLK_OPTIMAL; else priv->mandatory_mode = MLK_NONE; + tmp_str = NULL; GF_OPTION_INIT("trace", priv->trace, bool, out); @@ -3714,6 +4187,8 @@ init(xlator_t *this) GF_OPTION_INIT("notify-contention-delay", priv->notify_contention_delay, uint32, out); + GF_OPTION_INIT("enforce-mandatory-lock", priv->mlock_enforced, bool, out); + this->local_pool = mem_pool_new(pl_local_t, 32); if (!this->local_pool) { ret = -1; @@ -3732,19 +4207,21 @@ out: return ret; } -int +void fini(xlator_t *this) { - posix_locks_private_t *priv = NULL; - - priv = this->private; + posix_locks_private_t *priv = this->private; if (!priv) - return 0; + return; this->private = NULL; + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } GF_FREE(priv->brickname); GF_FREE(priv); - return 0; + return; } int @@ -3771,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, struct iatt *postoldparent, struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent, postoldparent, prenewparent, postnewparent, xdata); + return 0; } @@ -3780,19 +4260,23 @@ int32_t pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc); + int32_t error; + + error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename, + pl_rename_cbk, oldloc, newloc, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL, + NULL, NULL); + } - STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); return 0; } posix_lock_t * gf_lkmig_info_to_posix_lock(call_frame_t *frame, lock_migration_info_t *lmi) { - posix_lock_t *lock = NULL; - - lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t); + posix_lock_t *lock = GF_CALLOC(1, sizeof(posix_lock_t), + gf_locks_mt_posix_lock_t); if (!lock) goto out; @@ -3840,6 +4324,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode, /* Just making sure the activelk list is empty. Should not * happen though*/ if (!list_empty(&pl_inode->ext_list)) { + pthread_mutex_unlock(&pl_inode->mutex); gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "invalid locks found"); ret = -1; @@ -3848,6 +4333,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode, /* This list also should not be empty */ if (list_empty(&locklist->list)) { + pthread_mutex_unlock(&pl_inode->mutex); gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "empty lock list"); ret = -1; @@ -3858,6 +4344,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode, { newlock = gf_lkmig_info_to_posix_lock(frame, temp); if (!newlock) { + pthread_mutex_unlock(&pl_inode->mutex); gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "mem allocation failed for newlock"); @@ -3867,12 +4354,10 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode, list_add_tail(&newlock->list, &pl_inode->ext_list); } } - -out: /*TODO: What if few lock add failed with ENOMEM. Should the already * added locks be clearted */ pthread_mutex_unlock(&pl_inode->mutex); - +out: return ret; } @@ -3880,12 +4365,11 @@ static int pl_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, lock_migration_info_t *locklist, dict_t *xdata) { - pl_inode_t *pl_inode = NULL; int op_ret = 0; int op_errno = 0; int ret = 0; - pl_inode = pl_inode_get(this, loc->inode); + pl_inode_t *pl_inode = pl_inode_get(this, loc->inode, NULL); if (!pl_inode) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed"); @@ -3908,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -3917,9 +4404,14 @@ int32_t pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); - STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + int32_t error; + + error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink, + pl_unlink_cbk, loc, xflag, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -3937,7 +4429,7 @@ int pl_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_mkdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); return 0; @@ -3955,7 +4447,7 @@ pl_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int pl_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, loc, xdata); return 0; @@ -3975,7 +4467,7 @@ int pl_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); return 0; @@ -3986,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -3995,9 +4490,14 @@ int pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); - STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); + int32_t error; + + error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir, + pl_rmdir_cbk, loc, xflags, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -4016,7 +4516,7 @@ int pl_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_symlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); return 0; @@ -4027,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_t *pl_inode = (pl_inode_t *)cookie; + + if (op_ret >= 0) { + pthread_mutex_lock(&pl_inode->mutex); + + /* TODO: can happen pl_inode->links == 0 ? */ + if (pl_inode->links >= 0) { + pl_inode->links++; + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf, preparent, postparent, xdata); return 0; @@ -4036,9 +4549,18 @@ int pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc); - STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + pl_inode_t *pl_inode; + + pl_inode = pl_inode_get(this, oldloc->inode, NULL); + if (pl_inode == NULL) { + STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } + + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); + STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); return 0; } @@ -4112,7 +4634,7 @@ pl_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int pl_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_statfs_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs, loc, xdata); return 0; @@ -4122,6 +4644,28 @@ int32_t pl_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + pl_inode->mlock_enforced = _gf_false; + pl_inode->check_mlock_info = _gf_false; + pl_inode->track_fop_wind_count = _gf_true; + } + pthread_mutex_unlock(&pl_inode->mutex); + } + +unwind: PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno, xdata); return 0; @@ -4131,16 +4675,51 @@ int pl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + int op_ret = 0; + int op_errno = EINVAL; + posix_locks_private_t *priv = this->private; + + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + + PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, loc, + ((fd_t *)NULL), priv); + STACK_WIND(frame, pl_removexattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); return 0; + +unwind: + PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno, + NULL); + + return 0; } int32_t pl_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + pl_inode->mlock_enforced = _gf_false; + pl_inode->check_mlock_info = _gf_false; + } + pthread_mutex_unlock(&pl_inode->mutex); + } + +unwind: PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno, xdata); return 0; @@ -4150,10 +4729,23 @@ int pl_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { + int op_ret = -1; + int op_errno = EINVAL; + posix_locks_private_t *priv = this->private; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + + PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, + ((loc_t *)NULL), fd, priv); + STACK_WIND(frame, pl_fremovexattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); return 0; + +unwind: + PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno, + NULL); + return 0; } int32_t @@ -4189,7 +4781,7 @@ int pl_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_xattrop_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata); return 0; @@ -4228,7 +4820,7 @@ int pl_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_setattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); return 0; @@ -4289,7 +4881,7 @@ int pl_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_readlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink, loc, size, xdata); return 0; @@ -4307,7 +4899,7 @@ int pl_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); STACK_WIND(frame, pl_access_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->access, loc, mask, xdata); return 0; @@ -4456,7 +5048,7 @@ struct volume_options options[] = { "be used in conjunction w/ revocation-clear-all."}, {.key = {"notify-contention"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "no", + .default_value = "yes", .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, .op_version = {GD_OP_VERSION_4_0_0}, .tags = {"locks", "contention"}, @@ -4479,5 +5071,25 @@ struct volume_options options[] = { "on the same inode. If multiple lock requests are " "received during this period, only one upcall will " "be sent."}, + {.key = {"enforce-mandatory-lock"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .flags = OPT_FLAG_SETTABLE, + .op_version = {GD_OP_VERSION_6_0}, + .description = "option to enable lock enforcement"}, {.key = {NULL}}, }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "locks", + .category = GF_MAINTAINED, +}; diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c index 8b080dba030..604691fd887 100644 --- a/xlators/features/locks/src/reservelk.c +++ b/xlators/features/locks/src/reservelk.c @@ -7,12 +7,12 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/list.h> #include "locks.h" #include "common.h" @@ -31,12 +31,10 @@ reservelks_equal(posix_lock_t *l1, posix_lock_t *l2) static posix_lock_t * __reservelk_grantable(pl_inode_t *pl_inode, posix_lock_t *lock) { - xlator_t *this = NULL; + xlator_t *this = THIS; posix_lock_t *l = NULL; posix_lock_t *ret_lock = NULL; - this = THIS; - if (list_empty(&pl_inode->reservelk_list)) { gf_log(this->name, GF_LOG_TRACE, "No reservelks in list"); goto out; @@ -82,10 +80,9 @@ __matching_reservelk(pl_inode_t *pl_inode, posix_lock_t *lock) static int __reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *conf = NULL; int ret = 0; - conf = __matching_reservelk(pl_inode, lock); + posix_lock_t *conf = __matching_reservelk(pl_inode, lock); if (conf) { gf_log(this->name, GF_LOG_TRACE, "Matching reservelk found"); if (__same_owner_reservelk(lock, conf)) { @@ -104,29 +101,28 @@ __reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) int pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block) + const int can_block) { int ret = 0; pthread_mutex_lock(&pl_inode->mutex); { if (__reservelk_conflict(this, pl_inode, lock)) { + lock->blocked = can_block; + list_add_tail(&lock->list, &pl_inode->blocked_calls); + pthread_mutex_unlock(&pl_inode->mutex); gf_log(this->name, GF_LOG_TRACE, "Found conflicting reservelk. Blocking until reservelk is " "unlocked."); - lock->blocked = can_block; - list_add_tail(&lock->list, &pl_inode->blocked_calls); ret = -1; - goto unlock; + goto out; } - - gf_log(this->name, GF_LOG_TRACE, - "no conflicting reservelk found. Call continuing"); - ret = 0; } -unlock: pthread_mutex_unlock(&pl_inode->mutex); - + gf_log(this->name, GF_LOG_TRACE, + "no conflicting reservelk found. Call continuing"); + ret = 0; +out: return ret; } @@ -135,12 +131,11 @@ unlock: */ static int __lock_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block) + const int can_block) { - posix_lock_t *conf = NULL; int ret = -EINVAL; - conf = __reservelk_grantable(pl_inode, lock); + posix_lock_t *conf = __reservelk_grantable(pl_inode, lock); if (conf) { ret = -EAGAIN; if (can_block == 0) @@ -183,9 +178,7 @@ find_matching_reservelk(posix_lock_t *lock, pl_inode_t *pl_inode) static posix_lock_t * __reserve_unlock_lock(xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode) { - posix_lock_t *conf = NULL; - - conf = find_matching_reservelk(lock, pl_inode); + posix_lock_t *conf = find_matching_reservelk(lock, pl_inode); if (!conf) { gf_log(this->name, GF_LOG_DEBUG, " Matching lock not found for unlock"); goto out; @@ -319,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode) ret = pl_setlk(this, pl_inode, lock, can_block); if (ret == -1) { if (can_block) { - pl_trace_block(this, lock->frame, fd, NULL, cmd, - &lock->user_flock, NULL); continue; } else { gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); @@ -345,6 +336,7 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) { retlock = __reserve_unlock_lock(this, lock, pl_inode); if (!retlock) { + pthread_mutex_unlock(&pl_inode->mutex); gf_log(this->name, GF_LOG_DEBUG, "Bad Unlock issued on Inode lock"); ret = -EINVAL; goto out; @@ -354,9 +346,8 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) __destroy_lock(retlock); ret = 0; } -out: pthread_mutex_unlock(&pl_inode->mutex); - +out: grant_blocked_reserve_locks(this, pl_inode); grant_blocked_lock_calls(this, pl_inode); @@ -372,19 +363,20 @@ pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, pthread_mutex_lock(&pl_inode->mutex); { ret = __lock_reservelk(this, pl_inode, lock, can_block); - if (ret < 0) - gf_log(this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, lkowner_utoa(&lock->owner), - lock->user_flock.l_start, lock->user_flock.l_len); - else - gf_log(this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, lkowner_utoa(&lock->owner), lock->fl_start, - lock->fl_end); } pthread_mutex_unlock(&pl_inode->mutex); + + if (ret < 0) + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); + else + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->fl_start, lock->fl_end); + return ret; } |
