summaryrefslogtreecommitdiffstats
path: root/xlators/features/locks/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/features/locks/src')
-rw-r--r--xlators/features/locks/src/clear.c58
-rw-r--r--xlators/features/locks/src/clear.h8
-rw-r--r--xlators/features/locks/src/common.c580
-rw-r--r--xlators/features/locks/src/common.h87
-rw-r--r--xlators/features/locks/src/entrylk.c106
-rw-r--r--xlators/features/locks/src/inodelk.c249
-rw-r--r--xlators/features/locks/src/locks-mem-types.h3
-rw-r--r--xlators/features/locks/src/locks.h105
-rw-r--r--xlators/features/locks/src/pl-messages.h2
-rw-r--r--xlators/features/locks/src/posix.c1442
-rw-r--r--xlators/features/locks/src/reservelk.c78
11 files changed, 1999 insertions, 719 deletions
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 0966ee753d6..ab1eac68a53 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -12,17 +12,23 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
+const char *clrlk_type_names[CLRLK_TYPE_MAX] = {
+ [CLRLK_INODE] = "inode",
+ [CLRLK_ENTRY] = "entry",
+ [CLRLK_POSIX] = "posix",
+};
+
int
clrlk_get_kind(char *kind)
{
@@ -175,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
if (plock->blocked) {
bcount++;
pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW,
- &plock->user_flock, -1, EAGAIN, NULL);
+ &plock->user_flock, -1, EINTR, NULL);
- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN,
+ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR,
&plock->user_flock, NULL);
} else {
@@ -254,14 +260,16 @@ blkd:
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(ilock, tmp, &released, blocked_locks)
- {
- list_del_init(&ilock->blocked_locks);
- pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW,
- &ilock->user_flock, -1, EAGAIN, ilock->volume);
- STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL);
- // No need to take lock as the locks are only in one list
- __pl_inodelk_unref(ilock);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(ilock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&ilock->blocked_locks);
+ pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN, ilock->volume);
+ STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL);
+ // No need to take lock as the locks are only in one list
+ __pl_inodelk_unref(ilock);
+ }
}
if (!(args->kind & CLRLK_GRANTED)) {
@@ -357,15 +365,17 @@ blkd:
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(elock, tmp, &released, blocked_locks)
- {
- list_del_init(&elock->blocked_locks);
- entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL,
- elock->basename, ENTRYLK_LOCK, elock->type, -1,
- EAGAIN);
- STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(elock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&elock->blocked_locks);
+ entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type, -1,
+ EAGAIN);
+ STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL);
- __pl_entrylk_unref(elock);
+ __pl_entrylk_unref(elock);
+ }
}
if (!(args->kind & CLRLK_GRANTED)) {
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
index 08662746f98..bc118cb1b81 100644
--- a/xlators/features/locks/src/clear.h
+++ b/xlators/features/locks/src/clear.h
@@ -10,9 +10,9 @@
#ifndef __CLEAR_H__
#define __CLEAR_H__
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks.h"
typedef enum {
@@ -22,6 +22,8 @@ typedef enum {
CLRLK_TYPE_MAX
} clrlk_type;
+extern const char *clrlk_type_names[];
+
typedef enum {
CLRLK_BLOCKED = 1,
CLRLK_GRANTED,
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index a953e0d1a4a..a2c6be93e03 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -12,11 +12,10 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syncop.h>
#include "locks.h"
#include "common.h"
@@ -213,13 +212,11 @@ void
pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd,
struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char pl_locker[256];
char pl_lockee[256];
char pl_lock[256];
- priv = this->private;
-
if (!priv->trace)
return;
@@ -291,13 +288,11 @@ pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
int cmd, struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char pl_locker[256];
char pl_lockee[256];
char pl_lock[256];
- priv = this->private;
-
if (!priv->trace)
return;
@@ -326,7 +321,7 @@ pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd)
if (!priv->trace)
return;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
if (pl_inode && __pl_inode_is_empty(pl_inode))
return;
@@ -362,7 +357,9 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode)
int need_unref = 0;
int need_ref = 0;
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (!pl_inode)
+ return;
pthread_mutex_lock(&pl_inode->mutex);
{
@@ -387,8 +384,51 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode)
inode_ref(inode);
}
+/* Get lock enforcement info from disk */
+int
+pl_fetch_mlock_info_from_disk(xlator_t *this, pl_inode_t *pl_inode,
+ pl_local_t *local)
+{
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int op_ret = 0;
+
+ if (!local) {
+ return -1;
+ }
+
+ if (local->fd) {
+ op_ret = syncop_fgetxattr(this, local->fd, &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ } else {
+ op_ret = syncop_getxattr(this, &local->loc[0], &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op_ret >= 0) {
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, -op_ret, 0,
+ "getxattr failed with %d", op_ret);
+ pl_inode->mlock_enforced = _gf_false;
+
+ if (-op_ret == ENODATA) {
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ pl_inode->check_mlock_info = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ return ret;
+}
+
pl_inode_t *
-pl_inode_get(xlator_t *this, inode_t *inode)
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
{
uint64_t tmp_pl_inode = 0;
pl_inode_t *pl_inode = NULL;
@@ -401,6 +441,7 @@ pl_inode_get(xlator_t *this, inode_t *inode)
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
goto unlock;
}
+
pl_inode = GF_CALLOC(1, sizeof(*pl_inode), gf_locks_mt_pl_inode_t);
if (!pl_inode) {
goto unlock;
@@ -409,6 +450,7 @@ pl_inode_get(xlator_t *this, inode_t *inode)
gf_log(this->name, GF_LOG_TRACE, "Allocating new pl inode");
pthread_mutex_init(&pl_inode->mutex, NULL);
+ pthread_cond_init(&pl_inode->check_fop_wind_count, 0);
INIT_LIST_HEAD(&pl_inode->dom_list);
INIT_LIST_HEAD(&pl_inode->ext_list);
@@ -418,8 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode)
INIT_LIST_HEAD(&pl_inode->blocked_calls);
INIT_LIST_HEAD(&pl_inode->metalk_list);
INIT_LIST_HEAD(&pl_inode->queued_locks);
+ INIT_LIST_HEAD(&pl_inode->waiting);
gf_uuid_copy(pl_inode->gfid, inode->gfid);
+ pl_inode->check_mlock_info = _gf_true;
+ pl_inode->mlock_enforced = _gf_false;
+
+ /* -2 means never looked up. -1 means something went wrong and link
+ * tracking is disabled. */
+ pl_inode->links = -2;
+
ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
if (ret) {
pthread_mutex_destroy(&pl_inode->mutex);
@@ -431,13 +481,23 @@ pl_inode_get(xlator_t *this, inode_t *inode)
unlock:
UNLOCK(&inode->lock);
+ if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) &&
+ pl_inode->check_mlock_info && local) {
+ /* Note: The lock enforcement information per file can be stored in the
+ attribute flag of stat(x) in posix. With that there won't be a need
+ for doing getxattr post a reboot
+ */
+ pl_fetch_mlock_info_from_disk(this, pl_inode, local);
+ }
+
return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking)
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno)
{
posix_lock_t *lock = NULL;
@@ -445,8 +505,14 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
GF_VALIDATE_OR_GOTO("posix-locks", client, out);
GF_VALIDATE_OR_GOTO("posix-locks", fd, out);
+ if (!pl_is_lk_owner_valid(owner, client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
if (!lock) {
+ *op_errno = ENOMEM;
goto out;
}
@@ -464,6 +530,7 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
if (lock->client_uid == NULL) {
GF_FREE(lock);
lock = NULL;
+ *op_errno = ENOMEM;
goto out;
}
@@ -538,13 +605,11 @@ static void
__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock)
{
if (lock->blocked)
- gettimeofday(&lock->blkd_time, NULL);
+ lock->blkd_time = gf_time();
else
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add_tail(&lock->list, &pl_inode->ext_list);
-
- return;
}
/* Return true if the locks overlap, false otherwise */
@@ -900,7 +965,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
struct list_head granted_list;
posix_lock_t *tmp = NULL;
posix_lock_t *lock = NULL;
-
+ pl_local_t *local = NULL;
INIT_LIST_HEAD(&granted_list);
pthread_mutex_lock(&pl_inode->mutex);
@@ -915,9 +980,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
0, 0, NULL);
-
- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
-
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
__destroy_lock(lock);
}
@@ -932,10 +997,12 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
0,
};
posix_lock_t *unlock_lock = NULL;
+ int32_t op_errno = 0;
struct list_head granted_list;
posix_lock_t *tmp = NULL;
posix_lock_t *lock = NULL;
+ pl_local_t *local = NULL;
int ret = -1;
@@ -949,7 +1016,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid,
&old_lock->owner, old_lock->fd,
- old_lock->lk_flags, 0);
+ old_lock->lk_flags, 0, &op_errno);
GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out);
ret = 0;
@@ -963,9 +1030,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
0, 0, NULL);
-
- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
-
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
__destroy_lock(lock);
}
@@ -1000,7 +1067,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
if (__is_lock_grantable(pl_inode, lock)) {
if (pl_metalock_is_active(pl_inode)) {
- __pl_queue_lock(pl_inode, lock, can_block);
+ __pl_queue_lock(pl_inode, lock);
pthread_mutex_unlock(&pl_inode->mutex);
ret = -2;
goto out;
@@ -1013,7 +1080,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
__insert_and_merge(pl_inode, lock);
} else if (can_block) {
if (pl_metalock_is_active(pl_inode)) {
- __pl_queue_lock(pl_inode, lock, can_block);
+ __pl_queue_lock(pl_inode, lock);
pthread_mutex_unlock(&pl_inode->mutex);
ret = -2;
goto out;
@@ -1024,6 +1091,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
+
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, NULL);
+
lock->blocked = 1;
__insert_lock(pl_inode, lock);
ret = -1;
@@ -1050,10 +1121,7 @@ out:
posix_lock_t *
pl_getlk(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
-
- conf = first_conflicting_overlap(pl_inode, lock);
-
+ posix_lock_t *conf = first_conflicting_overlap(pl_inode, lock);
if (conf == NULL) {
lock->fl_type = F_UNLCK;
return lock;
@@ -1075,3 +1143,449 @@ pl_does_monkey_want_stuck_lock()
return _gf_true;
return _gf_false;
}
+
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ posix_lock_t *i = NULL;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *itr = NULL;
+ struct list_head unwind_blist = {
+ 0,
+ };
+ struct list_head unwind_rw_list = {
+ 0,
+ };
+ int ret = 0;
+
+ INIT_LIST_HEAD(&unwind_blist);
+ INIT_LIST_HEAD(&unwind_rw_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /*
+ - go through the lock list
+ - remove all locks from different owners
+ - same owner locks will be added or substracted based on
+ the new request
+ - add the new lock
+ */
+ list_for_each_entry_safe(lock, i, &pl_inode->ext_list, list)
+ {
+ if (lock->blocked) {
+ list_del_init(&lock->list);
+ list_add(&lock->list, &unwind_blist);
+ continue;
+ }
+
+ if (locks_overlap(lock, reqlock)) {
+ if (same_owner(lock, reqlock))
+ continue;
+
+ /* remove conflicting locks */
+ list_del_init(&lock->list);
+ __delete_lock(lock);
+ __destroy_lock(lock);
+ }
+ }
+
+ __insert_and_merge(pl_inode, reqlock);
+
+ list_for_each_entry_safe(rw, itr, &pl_inode->rw_list, list)
+ {
+ list_del_init(&rw->list);
+ list_add(&rw->list, &unwind_rw_list);
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ /* unwind blocked locks */
+ list_for_each_entry_safe(lock, i, &unwind_blist, list)
+ {
+ PL_STACK_UNWIND_AND_FREE(((pl_local_t *)lock->frame->local), lk,
+ lock->frame, -1, EBUSY, &lock->user_flock,
+ NULL);
+ __destroy_lock(lock);
+ }
+
+ /* unwind blocked IOs */
+ list_for_each_entry_safe(rw, itr, &unwind_rw_list, list)
+ {
+ pl_clean_local(rw->stub->frame->local);
+ call_unwind_error(rw->stub, -1, EBUSY);
+ }
+
+ return ret;
+}
+
+/* Return true in case we need to ensure mandatory-locking
+ * semantics under different modes.
+ */
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode)
+{
+ posix_locks_private_t *priv = THIS->private;
+
+ if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
+ return _gf_true;
+ else if (priv->mandatory_mode == MLK_FORCED ||
+ priv->mandatory_mode == MLK_OPTIMAL)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+pl_clean_local(pl_local_t *local)
+{
+ if (!local)
+ return;
+
+ if (local->inodelk_dom_count_req)
+ data_unref(local->inodelk_dom_count_req);
+ loc_wipe(&local->loc[0]);
+ loc_wipe(&local->loc[1]);
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->inode)
+ inode_unref(local->inode);
+ mem_put(local);
+}
+
+/*
+TODO: detach local initialization from PL_LOCAL_GET_REQUESTS and add it here
+*/
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+{
+ pl_local_t *local = NULL;
+
+ if (!loc && !fd) {
+ return -1;
+ }
+
+ if (!frame->local) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "mem allocation failed");
+ return -1;
+ }
+
+ local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode));
+
+ frame->local = local;
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client)
+{
+ if (client && (client->opversion < GD_OP_VERSION_7_0)) {
+ return _gf_true;
+ }
+
+ if (is_lk_owner_null(owner)) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
+static int32_t
+pl_inode_from_loc(loc_t *loc, inode_t **pinode)
+{
+ inode_t *inode = NULL;
+ int32_t error = 0;
+
+ if (loc->inode != NULL) {
+ inode = inode_ref(loc->inode);
+ goto done;
+ }
+
+ if (loc->parent == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (!gf_uuid_is_null(loc->gfid)) {
+ inode = inode_find(loc->parent->table, loc->gfid);
+ if (inode != NULL) {
+ goto done;
+ }
+ }
+
+ if (loc->name == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ inode = inode_grep(loc->parent->table, loc->parent, loc->name);
+ if (inode == NULL) {
+ /* We haven't found any inode. This means that the file doesn't exist
+ * or that even if it exists, we don't have any knowledge about it, so
+ * we don't have locks on it either, which is fine for our purposes. */
+ goto done;
+ }
+
+done:
+ *pinode = inode;
+
+ return error;
+}
+
+static gf_boolean_t
+pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *lock;
+ gf_boolean_t has_owners = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ /* If the lock belongs to the same client, we assume it's related
+ * to the same operation, so we allow the removal to continue. */
+ if (lock->client == client) {
+ continue;
+ }
+ /* If the lock belongs to an internal process, we don't block the
+ * removal. */
+ if (lock->client_pid < 0) {
+ continue;
+ }
+ if (contend == NULL) {
+ return _gf_true;
+ }
+ has_owners = _gf_true;
+ inodelk_contention_notify_check(xl, lock, now, contend);
+ }
+ }
+
+ return has_owners;
+}
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend)
+{
+ struct timespec now;
+ inode_t *inode;
+ pl_inode_t *pl_inode;
+ int32_t error;
+
+ pl_inode = NULL;
+
+ error = pl_inode_from_loc(loc, &inode);
+ if ((error != 0) || (inode == NULL)) {
+ goto done;
+ }
+
+ pl_inode = pl_inode_get(xl, inode, NULL);
+ if (pl_inode == NULL) {
+ inode_unref(inode);
+ error = ENOMEM;
+ goto done;
+ }
+
+ /* pl_inode_from_loc() already increments ref count for inode, so
+ * we only assign here our reference. */
+ pl_inode->inode = inode;
+
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (pl_inode->removed) {
+ error = ESTALE;
+ goto unlock;
+ }
+
+ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+ error = -1;
+ /* We skip the unlock here because the caller must create a stub when
+ * we return -1 and do a call to pl_inode_remove_complete(), which
+ * assumes the lock is still acquired and will release it once
+ * everything else is prepared. */
+ goto done;
+ }
+
+ pl_inode->is_locked = _gf_true;
+ pl_inode->remove_running++;
+
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+done:
+ *ppl_inode = pl_inode;
+
+ return error;
+}
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend)
+{
+ pl_inode_lock_t *lock;
+ int32_t error = -1;
+
+ if (stub != NULL) {
+ list_add_tail(&stub->list, &pl_inode->waiting);
+ pl_inode->is_locked = _gf_true;
+ } else {
+ error = ENOMEM;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, list);
+ list_del_init(&lock->list);
+ __pl_inodelk_unref(lock);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (error < 0) {
+ inodelk_contention_notify(xl, contend);
+ }
+
+ inode_unref(pl_inode->inode);
+
+ return error;
+}
+
+void
+pl_inode_remove_wake(struct list_head *list)
+{
+ call_stub_t *stub;
+
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
+
+ call_resume(stub);
+ }
+}
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+{
+ struct list_head contend, granted;
+ struct timespec now;
+ pl_dom_list_t *dom;
+
+ if (pl_inode == NULL) {
+ return;
+ }
+
+ INIT_LIST_HEAD(&contend);
+ INIT_LIST_HEAD(&granted);
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (error == 0) {
+ if (pl_inode->links >= 0) {
+ pl_inode->links--;
+ }
+ if (pl_inode->links == 0) {
+ pl_inode->removed = _gf_true;
+ }
+ }
+
+ pl_inode->remove_running--;
+
+ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+ pl_inode->is_locked = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+ &contend);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(xl, pl_inode, &granted);
+
+ inodelk_contention_notify(xl, &contend);
+
+ inode_unref(pl_inode->inode);
+}
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list)
+{
+ call_stub_t *stub, *tmp;
+
+ if (!pl_inode->is_locked) {
+ return;
+ }
+
+ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list)
+ {
+ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL,
+ NULL)) {
+ list_move_tail(&stub->list, list);
+ }
+ }
+}
+
+/* This function determines if an inodelk attempt can be done now or it needs
+ * to wait.
+ *
+ * Possible return values:
+ * < 0: An error occurred. Currently only -ESTALE can be returned if the
+ * inode has been deleted previously by unlink/rmdir/rename
+ * = 0: The lock can be attempted.
+ * > 0: The lock needs to wait because a conflicting remove operation is
+ * ongoing.
+ */
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *ilock;
+
+ /* If the inode has been deleted, we won't allow any lock. */
+ if (pl_inode->removed) {
+ return -ESTALE;
+ }
+
+ /* We only synchronize with locks made for regular operations coming from
+ * the user. Locks done for internal purposes are hard to control and could
+ * lead to long delays or deadlocks quite easily. */
+ if (lock->client_pid < 0) {
+ return 0;
+ }
+ if (!pl_inode->is_locked) {
+ return 0;
+ }
+ if (pl_inode->remove_running > 0) {
+ return 1;
+ }
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(ilock, &dom->inodelk_list, list)
+ {
+ /* If a lock from the same client is already granted, we allow this
+ * one to continue. This is necessary to prevent deadlocks when
+ * multiple locks are taken for the same operation.
+ *
+ * On the other side it's unlikely that the same client sends
+ * completely unrelated locks for the same inode.
+ */
+ if (ilock->client == lock->client) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index c3d0e361933..281223bf3b8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -10,7 +10,6 @@
#ifndef __COMMON_H__
#define __COMMON_H__
-#include "lkowner.h"
/*dump locks format strings */
#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
#define ENTRY_FMT "type=%s on basename=%s"
@@ -32,12 +31,34 @@
#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT(fop, frame, op_ret, params); \
+ if (__local) { \
+ if (__local->inodelk_dom_count_req) \
+ data_unref(__local->inodelk_dom_count_req); \
+ loc_wipe(&__local->loc[0]); \
+ loc_wipe(&__local->loc[1]); \
+ if (__local->fd) \
+ fd_unref(__local->fd); \
+ if (__local->inode) \
+ inode_unref(__local->inode); \
+ if (__local->xdata) { \
+ dict_unref(__local->xdata); \
+ __local->xdata = NULL; \
+ } \
+ mem_put(__local); \
+ } \
+ } while (0)
+
posix_lock_t *
new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int can_block);
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno);
pl_inode_t *
-pl_inode_get(xlator_t *this, inode_t *inode);
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local);
posix_lock_t *
pl_getlk(pl_inode_t *inode, posix_lock_t *lock);
@@ -45,6 +66,9 @@ pl_getlk(pl_inode_t *inode, posix_lock_t *lock);
int
pl_setlk(xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, int can_block);
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
void
grant_blocked_locks(xlator_t *this, pl_inode_t *inode);
@@ -81,6 +105,15 @@ void
__pl_inodelk_unref(pl_inode_lock_t *lock);
void
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend);
+
+void
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted);
+
+void
grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
pl_dom_list_t *dom, struct timespec *now,
struct list_head *contend);
@@ -177,9 +210,53 @@ __pl_entrylk_unref(pl_entry_lock_t *lock);
int
pl_metalock_is_active(pl_inode_t *pl_inode);
-int
-__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block);
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+void
+inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
gf_boolean_t
pl_does_monkey_want_stuck_lock();
+
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode);
+
+void
+pl_clean_local(pl_local_t *local);
+
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd);
+
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client);
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend);
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend);
+
+void
+pl_inode_remove_wake(struct list_head *list);
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error);
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list);
+
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index ea78f92d200..fd772c850dd 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -7,13 +7,13 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-#include "upcall-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
#include "clear.h"
@@ -39,13 +39,20 @@ __pl_entrylk_ref(pl_entry_lock_t *lock)
static pl_entry_lock_t *
new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type,
- const char *domain, call_frame_t *frame, char *conn_id)
+ const char *domain, call_frame_t *frame, char *conn_id,
+ int32_t *op_errno)
{
pl_entry_lock_t *newlock = NULL;
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t),
gf_locks_mt_pl_entry_lock_t);
if (!newlock) {
+ *op_errno = ENOMEM;
goto out;
}
@@ -114,8 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
- gettimeofday(&curr, NULL);
priv = this->private;
@@ -123,7 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
* chance? Or just the locks we are attempting to acquire?
*/
if (names_conflict(candidate_lock->basename, requested_lock->basename)) {
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -197,9 +202,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
- struct timespec *now)
+void
+entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -209,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -218,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -231,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -325,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
break;
}
}
- if (__entrylk_needs_contention_notify(this, tmp, now)) {
- list_add_tail(&tmp->contend, contend);
- }
+ entrylk_contention_notify_check(this, tmp, now, contend);
}
}
@@ -539,19 +542,17 @@ static int
__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
pl_entry_lock_t *lock, int nonblock)
{
- struct timeval now;
-
- gettimeofday(&now, NULL);
-
if (nonblock)
goto out;
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks);
gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
pinode, lock->basename);
+ entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename,
+ ENTRYLK_LOCK, lock->type);
out:
return -EAGAIN;
}
@@ -605,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
}
__pl_entrylk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->domain_list, &dom->entrylk_list);
ret = 0;
@@ -644,11 +645,10 @@ int32_t
check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename)
{
int32_t entrylk = 0;
- pl_inode_t *pinode = 0;
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *conf = NULL;
- pinode = pl_inode_get(this, parent);
+ pl_inode_t *pinode = pl_inode_get(this, parent, NULL);
if (!pinode)
goto out;
pthread_mutex_lock(&pinode->mutex);
@@ -689,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend);
if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
/* Grants locks if possible which are blocked on a lock */
@@ -770,7 +769,7 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
if (xdata)
dict_ret = dict_get_str(xdata, "connection-id", &conn_id);
- pinode = pl_inode_get(this, inode);
+ pinode = pl_inode_get(this, inode, NULL);
if (!pinode) {
op_errno = ENOMEM;
goto out;
@@ -794,10 +793,9 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type);
reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame,
- conn_id);
+ conn_id, &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -933,8 +931,6 @@ out:
op_ret, op_errno);
unwind:
STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL);
- } else {
- entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type);
}
if (pcontend != NULL) {
@@ -1072,32 +1068,36 @@ pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
}
pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry_safe(l, tmp, &unwind, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL);
- list_add_tail(&l->client_list, &released);
+ if (l->frame)
+ STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
}
- list_for_each_entry_safe(l, tmp, &released, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
- pinode = l->pinode;
+ pinode = l->pinode;
- dom = get_domain(pinode, l->volume);
+ dom = get_domain(pinode, l->volume);
- grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
+ grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
- pthread_mutex_lock(&pinode->mutex);
- {
- __pl_entrylk_unref(l);
- }
- pthread_mutex_unlock(&pinode->mutex);
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ __pl_entrylk_unref(l);
+ }
+ pthread_mutex_unlock(&pinode->mutex);
- inode_unref(pinode->inode);
+ inode_unref(pinode->inode);
+ }
}
if (pcontend != NULL) {
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index eff58a79569..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -7,18 +7,16 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-#include "upcall-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
#include "clear.h"
#include "common.h"
-#include "pl-messages.h"
void
__delete_inode_lock(pl_inode_lock_t *lock)
@@ -142,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
- gettimeofday(&curr, NULL);
/* Question: Should we just prune them all given the
* chance? Or just the locks we are attempting to acquire?
*/
if (inodelk_conflict(candidate_lock, requested_lock)) {
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -231,9 +227,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
- struct timespec *now)
+void
+inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -243,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -252,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -265,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -353,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
break;
}
}
- if (__inodelk_needs_contention_notify(this, l, now)) {
- list_add_tail(&l->contend, contend);
- }
+ inodelk_contention_notify_check(this, l, now, contend);
}
}
@@ -401,15 +395,11 @@ static int
__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
int can_block)
{
- struct timeval now;
-
- gettimeofday(&now, NULL);
-
if (can_block == 0) {
goto out;
}
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
gf_msg_trace(this->name, 0,
@@ -420,6 +410,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
lkowner_utoa(&lock->owner), lock->user_flock.l_start,
lock->user_flock.l_len);
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ lock->volume);
out:
return -EAGAIN;
}
@@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
struct list_head *contend)
{
pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
+ int ret;
- conf = __inodelk_grantable(this, dom, lock, now, contend);
- if (conf) {
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ ret = pl_inode_remove_inodelk(pl_inode, lock);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret == 0) {
+ conf = __inodelk_grantable(this, dom, lock, now, contend);
+ }
+ if ((ret > 0) || (conf != NULL)) {
+ return __lock_blocked_add(this, dom, lock, can_block);
}
/* To prevent blocked locks starvation, check if there are any blocked
@@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
"starvation");
}
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ return __lock_blocked_add(this, dom, lock, can_block);
}
__pl_inodelk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->list, &dom->inodelk_list);
- ret = 0;
-
-out:
- return ret;
+ return 0;
}
/* Return true if the two inodelks have exactly same lock boundaries */
@@ -502,33 +495,36 @@ static pl_inode_lock_t *
__inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
pl_inode_lock_t *conf = NULL;
+ inode_t *inode = NULL;
+
+ inode = lock->pl_inode->inode;
conf = find_matching_inodelk(lock, dom);
if (!conf) {
gf_log(this->name, GF_LOG_ERROR,
" Matching lock not found for unlock %llu-%llu, by %s "
- "on %p",
+ "on %p for gfid:%s",
(unsigned long long)lock->fl_start,
(unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner),
- lock->client);
+ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
goto out;
}
__delete_inode_lock(conf);
gf_log(this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock %llu-%llu, by %s on %p",
+ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s",
(unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end,
- lkowner_utoa(&lock->owner), lock->client);
+ lkowner_utoa(&lock->owner), lock->client,
+ inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
out:
return conf;
}
-static void
+void
__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
struct list_head *granted, pl_dom_list_t *dom,
struct timespec *now, struct list_head *contend)
{
- int bl_ret = 0;
pl_inode_lock_t *bl = NULL;
pl_inode_lock_t *tmp = NULL;
@@ -541,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
{
list_del_init(&bl->blocked_locks);
- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
+ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
- if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ if (bl->status != -EAGAIN) {
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
-/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct timespec *now,
- struct list_head *contend)
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- struct list_head granted;
pl_inode_lock_t *lock;
pl_inode_lock_t *tmp;
+ int32_t op_ret;
+ int32_t op_errno;
- INIT_LIST_HEAD(&granted);
-
- pthread_mutex_lock(&pl_inode->mutex);
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
- contend);
- }
- pthread_mutex_unlock(&pl_inode->mutex);
-
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
- {
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
- lkowner_utoa(&lock->owner), lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ if (lock->status == 0) {
+ op_ret = 0;
+ op_errno = 0;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ } else {
+ op_ret = -1;
+ op_errno = -lock->status;
+ }
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
- 0, 0, lock->volume);
+ op_ret, op_errno, lock->volume);
- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL);
+ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
lock->frame = NULL;
}
pthread_mutex_lock(&pl_inode->mutex);
{
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
list_del_init(&lock->blocked_locks);
__pl_inodelk_unref(lock);
@@ -595,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_unlock(&pl_inode->mutex);
}
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ struct list_head granted;
+
+ INIT_LIST_HEAD(&granted);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(this, pl_inode, &granted);
+}
+
static void
pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
{
@@ -656,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* and blocked lists, then this means that a parallel
* unlock on another inodelk (L2 say) may have 'granted'
* L1 and added it to 'granted' list in
- * __grant_blocked_node_locks() (although using the
+ * __grant_blocked_inode_locks() (although using the
* 'blocked_locks' member). In that case, the cleanup
* codepath must try and grant other overlapping
* blocked inodelks from other clients, now that L1 is
@@ -691,31 +703,35 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
}
pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry_safe(l, tmp, &unwind, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
- list_add_tail(&l->client_list, &released);
+ if (l->frame)
+ STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
}
- list_for_each_entry_safe(l, tmp, &released, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
- pl_inode = l->pl_inode;
+ pl_inode = l->pl_inode;
- dom = get_domain(pl_inode, l->volume);
+ dom = get_domain(pl_inode, l->volume);
- grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
- pthread_mutex_lock(&pl_inode->mutex);
- {
- __pl_inodelk_unref(l);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __pl_inodelk_unref(l);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ inode_unref(pl_inode->inode);
}
- pthread_mutex_unlock(&pl_inode->mutex);
- inode_unref(pl_inode->inode);
}
if (pcontend != NULL) {
@@ -737,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
gf_boolean_t need_inode_unref = _gf_false;
struct list_head *pcontend = NULL;
struct list_head contend;
+ struct list_head wake;
struct timespec now = {};
short fl_type;
@@ -788,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
timespec_now(&now);
}
+ INIT_LIST_HEAD(&wake);
+
if (ctx)
pthread_mutex_lock(&ctx->lock);
pthread_mutex_lock(&pl_inode->mutex);
@@ -810,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
- if (can_block)
+ if (can_block) {
unref = _gf_false;
- /* For all but the case where a non-blocking
- * lock attempt fails, the extra ref taken at
- * the start of this function must be negated.
- */
- else
- need_inode_unref = _gf_true;
+ }
}
-
- if (ctx && (!ret || can_block))
+ /* For all but the case where a non-blocking lock attempt fails
+ * with -EAGAIN, the extra ref taken at the start of this function
+ * must be negated. */
+ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
+ if (ctx && !need_inode_unref) {
list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
+ }
} else {
/* Irrespective of whether unlock succeeds or not,
* the extra inode ref that was done at the start of
@@ -839,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
list_del_init(&retlock->client_list);
__pl_inodelk_unref(retlock);
+ pl_inode_remove_unlocked(this, pl_inode, &wake);
+
ret = 0;
}
out:
@@ -849,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
if (ctx)
pthread_mutex_unlock(&ctx->lock);
+ pl_inode_remove_wake(&wake);
+
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -869,17 +891,23 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
}
/* Create a new inode_lock_t */
-pl_inode_lock_t *
+static pl_inode_lock_t *
new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
call_frame_t *frame, xlator_t *this, const char *volume,
- char *conn_id)
+ char *conn_id, int32_t *op_errno)
{
pl_inode_lock_t *lock = NULL;
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t);
if (!lock) {
- return NULL;
+ *op_errno = ENOMEM;
+ goto out;
}
lock->fl_start = flock->l_start;
@@ -907,6 +935,7 @@ new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
INIT_LIST_HEAD(&lock->contend);
__pl_inodelk_ref(lock);
+out:
return lock;
}
@@ -951,6 +980,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
int ret = -1;
GF_UNUSED int dict_ret = -1;
int can_block = 0;
+ short lock_type = 0;
pl_inode_t *pinode = NULL;
pl_inode_lock_t *reqlock = NULL;
pl_dom_list_t *dom = NULL;
@@ -988,7 +1018,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
}
}
- pinode = pl_inode_get(this, inode);
+ pinode = pl_inode_get(this, inode, NULL);
if (!pinode) {
op_errno = ENOMEM;
goto unwind;
@@ -1001,11 +1031,10 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
}
reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid,
- frame, this, dom->domain, conn_id);
+ frame, this, dom->domain, conn_id, &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -1016,16 +1045,20 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
/* fall through */
case F_SETLK:
+ lock_type = flock->l_type;
memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock));
ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom,
inode);
if (ret < 0) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block(this, frame, fd, loc, cmd, flock, volume);
- goto out;
+ if (ret == -EAGAIN) {
+ if (can_block && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
}
- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
op_errno = -ret;
goto unwind;
}
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index 240c1957a42..a76605027b3 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __LOCKS_MEM_TYPES_H__
#define __LOCKS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_locks_mem_types_ {
gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1,
@@ -19,7 +19,6 @@ enum gf_locks_mem_types_ {
gf_locks_mt_posix_lock_t,
gf_locks_mt_pl_entry_lock_t,
gf_locks_mt_pl_inode_lock_t,
- gf_locks_mt_truncate_ops,
gf_locks_mt_pl_rw_req_t,
gf_locks_mt_posix_locks_private_t,
gf_locks_mt_pl_fdctx_t,
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index cf2849fc251..c868eb494a2 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -10,13 +10,13 @@
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks-mem-types.h"
-#include "client_t.h"
+#include <glusterfs/client_t.h>
-#include "lkowner.h"
+#include <glusterfs/lkowner.h>
typedef enum {
MLK_NONE,
@@ -30,11 +30,11 @@ struct __pl_fd;
struct __posix_lock {
struct list_head list;
- short fl_type;
off_t fl_start;
off_t fl_end;
uint32_t lk_flags;
+ short fl_type;
short blocked; /* waiting to acquire */
struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
@@ -43,9 +43,8 @@ struct __posix_lock {
fd_t *fd;
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
/* These two together serve to uniquely identify each process
across nodes */
@@ -74,7 +73,6 @@ struct __pl_inode_lock {
struct list_head contend; /* list of contending locks */
int ref;
- short fl_type;
off_t fl_start;
off_t fl_end;
@@ -86,9 +84,9 @@ struct __pl_inode_lock {
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -102,6 +100,10 @@ struct __pl_inode_lock {
char *connection_id; /* stores the client connection id */
struct list_head client_list; /* list of all locks from a client */
+ short fl_type;
+
+ int32_t status; /* Error code when we try to grant a lock in blocked
+ state */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -135,11 +137,10 @@ struct __entry_lock {
const char *volume;
const char *basename;
- entrylk_type type;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -150,6 +151,7 @@ struct __entry_lock {
char *connection_id; /* stores the client connection id */
struct list_head client_list; /* list of all locks from a client */
+ entrylk_type type;
};
typedef struct __entry_lock pl_entry_lock_t;
@@ -164,13 +166,14 @@ struct __pl_inode {
struct list_head rw_list; /* list of waiting r/w requests */
struct list_head reservelk_list; /* list of reservelks */
struct list_head blocked_reservelks; /* list of blocked reservelks */
- struct list_head
- blocked_calls; /* List of blocked lock calls while a reserve is held*/
- struct list_head metalk_list; /* Meta lock list */
- /* This is to store the incoming lock
- requests while meta lock is enabled */
- struct list_head queued_locks;
- int mandatory; /* if mandatory locking is enabled */
+ struct list_head blocked_calls; /* List of blocked lock calls while a
+ reserve is held*/
+ struct list_head metalk_list; /* Meta lock list */
+ struct list_head queued_locks; /* This is to store the incoming lock
+ requests while meta lock is enabled */
+ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir
+ the inode. */
+ int mandatory; /* if mandatory locking is enabled */
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
@@ -179,6 +182,31 @@ struct __pl_inode {
of inode_t as long as there are
locks on it */
gf_boolean_t migrated;
+
+ /* Flag to indicate whether to read mlock-enforce xattr from disk */
+ gf_boolean_t check_mlock_info;
+
+ /* Mandatory_lock enforce: IO will be allowed if and only if the lkowner has
+ held the lock.
+
+ Note: An xattr is set on the file to recover this information post
+ reboot. If client does not want mandatory lock to be enforced, then it
+ should remove this xattr explicitly
+ */
+ gf_boolean_t mlock_enforced;
+ /* There are scenarios where mandatory lock is granted but there are IOs
+ pending at posix level. To avoid this before preempting the previous lock
+ owner, we wait for all the fops to be unwound.
+ */
+ int fop_wind_count;
+ pthread_cond_t check_fop_wind_count;
+
+ gf_boolean_t track_fop_wind_count;
+
+ int32_t links; /* Number of hard links the inode has. */
+ uint32_t remove_running; /* Number of remove operations running. */
+ gf_boolean_t is_locked; /* Regular locks will be blocked. */
+ gf_boolean_t removed; /* The inode has been deleted. */
};
typedef struct __pl_inode pl_inode_t;
@@ -196,29 +224,33 @@ struct __pl_metalk {
typedef struct __pl_metalk pl_meta_lock_t;
typedef struct {
+ char *brickname;
+ uint32_t revocation_secs;
+ uint32_t revocation_max_blocked;
+ uint32_t notify_contention_delay;
mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
gf_boolean_t trace; /* trace lock requests in and out */
- char *brickname;
gf_boolean_t monkey_unlocking;
- uint32_t revocation_secs;
gf_boolean_t revocation_clear_all;
- uint32_t revocation_max_blocked;
gf_boolean_t notify_contention;
- uint32_t notify_contention_delay;
+ gf_boolean_t mlock_enforced;
} posix_locks_private_t;
typedef struct {
- gf_boolean_t entrylk_count_req;
- gf_boolean_t inodelk_count_req;
- gf_boolean_t posixlk_count_req;
- gf_boolean_t parent_entrylk_req;
data_t *inodelk_dom_count_req;
dict_t *xdata;
loc_t loc[2];
fd_t *fd;
+ inode_t *inode;
off_t offset;
glusterfs_fop_t op;
+ gf_boolean_t entrylk_count_req;
+ gf_boolean_t inodelk_count_req;
+ gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+ gf_boolean_t multiple_dom_lk_requests;
+ int update_mlock_enforced_flag;
} pl_local_t;
typedef struct {
@@ -239,6 +271,15 @@ typedef struct _locks_ctx {
struct list_head metalk_list;
} pl_ctx_t;
+typedef struct _multi_dom_lk_data {
+ xlator_t *this;
+ inode_t *inode;
+ dict_t *xdata_rsp;
+ gf_boolean_t keep_max;
+} multi_dom_lk_data;
+
+typedef enum { DECREMENT, INCREMENT } pl_count_op_t;
+
pl_ctx_t *
pl_ctx_get(client_t *client, xlator_t *xlator);
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
index a99e1bbce43..e2d3d7ca974 100644
--- a/xlators/features/locks/src/pl-messages.h
+++ b/xlators/features/locks/src/pl-messages.h
@@ -11,7 +11,7 @@
#ifndef _PL_MESSAGES_H_
#define _PL_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 917eacee8da..cf0ae4c57dd 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -12,19 +12,15 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
-#include "defaults.h"
-#include "syncop.h"
-#include "pl-messages.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -43,21 +39,6 @@ pl_lockinfo_get_brickname(xlator_t *, inode_t *, int32_t *);
static int
fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
-#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \
- do { \
- frame->local = NULL; \
- STACK_UNWIND_STRICT(fop, frame, op_ret, params); \
- if (__local) { \
- if (__local->inodelk_dom_count_req) \
- data_unref(__local->inodelk_dom_count_req); \
- loc_wipe(&__local->loc[0]); \
- loc_wipe(&__local->loc[1]); \
- if (__local->fd) \
- fd_unref(__local->fd); \
- mem_put(__local); \
- } \
- } while (0)
-
/*
* The client is always requesting data, but older
* servers were not returning it. Newer ones are, so
@@ -115,69 +96,156 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc) \
do { \
if (pl_has_xdata_requests(xdata)) { \
- frame->local = mem_get0(this->local_pool); \
+ if (!frame->local) \
+ frame->local = mem_get0(this->local_pool); \
pl_local_t *__local = frame->local; \
if (__local) { \
if (__fd) { \
__local->fd = fd_ref(__fd); \
+ __local->inode = inode_ref(__fd->inode); \
} else { \
if (__loc) \
loc_copy(&__local->loc[0], __loc); \
if (__newloc) \
loc_copy(&__local->loc[1], __newloc); \
+ __local->inode = inode_ref(__local->loc[0].inode); \
} \
pl_get_xdata_requests(__local, xdata); \
} \
} \
} while (0)
+#define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \
+ do { \
+ if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \
+ (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \
+ inode_t *__inode = (loc ? loc->inode : fd->inode); \
+ pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \
+ if (__pl_inode == NULL) { \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ if (!pl_is_mandatory_locking_enabled(__pl_inode) || \
+ !priv->mlock_enforced) { \
+ op_ret = -1; \
+ gf_msg(this->name, GF_LOG_DEBUG, EINVAL, 0, \
+ "option %s would need mandatory lock to be enabled " \
+ "and feature.enforce-mandatory-lock option to be set " \
+ "to on", \
+ GF_ENFORCE_MANDATORY_LOCK); \
+ op_errno = EINVAL; \
+ goto unwind; \
+ } \
+ \
+ op_ret = pl_local_init(frame, this, loc, fd); \
+ if (op_ret) { \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ \
+ ((pl_local_t *)(frame->local))->update_mlock_enforced_flag = 1; \
+ } \
+ } while (0)
+
+#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \
+ _args...) \
+ ({ \
+ struct list_head contend; \
+ pl_inode_t *__pl_inode; \
+ call_stub_t *__stub; \
+ int32_t __error; \
+ INIT_LIST_HEAD(&contend); \
+ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \
+ &__pl_inode, &contend); \
+ if (__error < 0) { \
+ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \
+ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \
+ &contend); \
+ } else if (__error == 0) { \
+ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \
+ _loc2); \
+ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \
+ FIRST_CHILD(_xl)->fops->_fop, ##_args); \
+ } \
+ __error; \
+ })
+
gf_boolean_t
pl_has_xdata_requests(dict_t *xdata)
{
- char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT,
- GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT,
- GLUSTERFS_PARENT_ENTRYLK, NULL};
+ static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_INODELK_DOM_COUNT,
+ GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS,
+ NULL};
+ static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT),
+ SLEN(GLUSTERFS_INODELK_COUNT),
+ SLEN(GLUSTERFS_INODELK_DOM_COUNT),
+ SLEN(GLUSTERFS_POSIXLK_COUNT),
+ SLEN(GLUSTERFS_PARENT_ENTRYLK),
+ SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS),
+ 0};
int i = 0;
if (!xdata)
return _gf_false;
for (i = 0; reqs[i]; i++)
- if (dict_get(xdata, reqs[i]))
+ if (dict_getn(xdata, reqs[i], reqs_size[i]))
return _gf_true;
return _gf_false;
}
+static int
+dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
void
pl_get_xdata_requests(pl_local_t *local, dict_t *xdata)
{
if (!local || !xdata)
return;
- if (dict_get(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
+ GF_ASSERT(local->xdata == NULL);
+ local->xdata = dict_copy_with_ref(xdata, NULL);
+
+ if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
local->entrylk_count_req = 1;
- dict_del(xdata, GLUSTERFS_ENTRYLK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_INODELK_COUNT)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_INODELK_COUNT)) {
local->inodelk_count_req = 1;
- dict_del(xdata, GLUSTERFS_INODELK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT);
+ }
+ if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) {
+ local->multiple_dom_lk_requests = 1;
+ dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS);
+ dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ dict_delete_domain_key, NULL);
}
- local->inodelk_dom_count_req = dict_get(xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ local->inodelk_dom_count_req = dict_get_sizen(xdata,
+ GLUSTERFS_INODELK_DOM_COUNT);
if (local->inodelk_dom_count_req) {
data_ref(local->inodelk_dom_count_req);
- dict_del(xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_POSIXLK_COUNT)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_POSIXLK_COUNT)) {
local->posixlk_count_req = 1;
- dict_del(xdata, GLUSTERFS_POSIXLK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_POSIXLK_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_PARENT_ENTRYLK)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK)) {
local->parent_entrylk_req = 1;
- dict_del(xdata, GLUSTERFS_PARENT_ENTRYLK);
+ dict_del_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK);
}
}
@@ -187,20 +255,11 @@ pl_needs_xdata_response(pl_local_t *local)
if (!local)
return _gf_false;
- if (local->parent_entrylk_req)
- return _gf_true;
-
- if (local->entrylk_count_req)
- return _gf_true;
-
- if (local->inodelk_dom_count_req)
- return _gf_true;
-
- if (local->inodelk_count_req)
+ if (local->parent_entrylk_req || local->entrylk_count_req ||
+ local->inodelk_dom_count_req || local->inodelk_count_req ||
+ local->posixlk_count_req || local->multiple_dom_lk_requests)
return _gf_true;
- if (local->posixlk_count_req)
- return _gf_true;
return _gf_false;
}
@@ -221,8 +280,43 @@ pl_get_xdata_rsp_args(pl_local_t *local, char *fop, inode_t **parent,
}
}
-int32_t
-__get_posixlk_count(xlator_t *this, pl_inode_t *pl_inode)
+static inline int
+pl_track_io_fop_count(pl_local_t *local, xlator_t *this, pl_count_op_t op)
+{
+ pl_inode_t *pl_inode = NULL;
+
+ if (!local)
+ return -1;
+
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode)
+ return -1;
+
+ if (pl_inode->mlock_enforced && pl_inode->track_fop_wind_count) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op == DECREMENT) {
+ pl_inode->fop_wind_count--;
+ /* fop_wind_count can go negative when lock enforcement is
+ * enabled on unwind path of an IO. Hence the "<" comparision.
+ */
+ if (pl_inode->fop_wind_count <= 0) {
+ pthread_cond_broadcast(&pl_inode->check_fop_wind_count);
+ pl_inode->track_fop_wind_count = _gf_false;
+ pl_inode->fop_wind_count = 0;
+ }
+ } else {
+ pl_inode->fop_wind_count++;
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+ return 0;
+}
+
+static int32_t
+__get_posixlk_count(pl_inode_t *pl_inode)
{
posix_lock_t *lock = NULL;
int32_t count = 0;
@@ -237,10 +331,9 @@ get_posixlk_count(xlator_t *this, inode_t *inode)
{
pl_inode_t *pl_inode = NULL;
uint64_t tmp_pl_inode = 0;
- int ret = 0;
int32_t count = 0;
- ret = inode_ctx_get(inode, this, &tmp_pl_inode);
+ int ret = inode_ctx_get(inode, this, &tmp_pl_inode);
if (ret != 0) {
goto out;
}
@@ -249,7 +342,7 @@ get_posixlk_count(xlator_t *this, inode_t *inode)
pthread_mutex_lock(&pl_inode->mutex);
{
- count = __get_posixlk_count(this, pl_inode);
+ count = __get_posixlk_count(pl_inode);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -265,10 +358,10 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename,
int32_t maxcount = -1;
int ret = -1;
- if (!parent || !basename || !strlen(basename))
+ if (!parent || !basename)
goto out;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_PARENT_ENTRYLK);
@@ -277,7 +370,7 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename,
if (maxcount >= entrylk)
return;
out:
- ret = dict_set_int32(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_PARENT_ENTRYLK);
@@ -293,7 +386,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_ENTRYLK_COUNT);
@@ -302,7 +395,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_ENTRYLK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_ENTRYLK_COUNT);
@@ -318,7 +411,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_INODELK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_INODELK_COUNT);
@@ -327,7 +420,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_INODELK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0,
"Failed to set count for "
@@ -347,7 +440,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_POSIXLK_COUNT);
@@ -356,7 +449,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_POSIXLK_COUNT);
@@ -364,6 +457,80 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
}
void
+pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict,
+ char *domname, gf_boolean_t keep_max, char *key)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32(dict, key, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_INODELK_COUNT);
+ }
+ count = get_inodelk_count(this, inode, domname);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32(dict, key, count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set count for "
+ "key %s",
+ key);
+ }
+
+ return;
+}
+
+static int
+pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ multi_dom_lk_data *d = data;
+ char *tmp_key = NULL;
+ char *save_ptr = NULL;
+
+ tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
+ strtok_r(tmp_key, ":", &save_ptr);
+ if (!*save_ptr) {
+ if (tmp_key)
+ GF_FREE(tmp_key);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
+ "Could not tokenize domain string from key %s", key);
+ return -1;
+ }
+
+ pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr,
+ d->keep_max, key);
+ if (tmp_key)
+ GF_FREE(tmp_key);
+
+ return 0;
+}
+
+void
+pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local,
+ inode_t *inode, dict_t *dict,
+ gf_boolean_t keep_max)
+{
+ multi_dom_lk_data data;
+
+ data.this = this;
+ data.inode = inode;
+ data.xdata_rsp = dict;
+ data.keep_max = keep_max;
+
+ dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ pl_inodelk_xattr_fill_multiple, &data);
+}
+
+void
pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
inode_t *inode, char *name, dict_t *xdata,
gf_boolean_t max_lock)
@@ -371,41 +538,28 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
if (!xdata || !local)
return;
- if (local->parent_entrylk_req && parent && name && strlen(name))
+ if (local->parent_entrylk_req && parent && name && name[0] != '\0')
pl_parent_entrylk_xattr_fill(this, parent, name, xdata, max_lock);
- if (local->entrylk_count_req && inode)
+ if (!inode)
+ return;
+
+ if (local->entrylk_count_req)
pl_entrylk_xattr_fill(this, inode, xdata, max_lock);
- if (local->inodelk_dom_count_req && inode)
+ if (local->inodelk_dom_count_req)
pl_inodelk_xattr_fill(this, inode, xdata,
data_to_str(local->inodelk_dom_count_req),
max_lock);
- if (local->inodelk_count_req && inode)
+ if (local->inodelk_count_req)
pl_inodelk_xattr_fill(this, inode, xdata, NULL, max_lock);
- if (local->posixlk_count_req && inode)
+ if (local->posixlk_count_req)
pl_posixlk_xattr_fill(this, inode, xdata, max_lock);
-}
-
-/* Return true in case we need to ensure mandatory-locking
- * semnatics under different modes.
- */
-gf_boolean_t
-pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode)
-{
- posix_locks_private_t *priv = NULL;
-
- priv = THIS->private;
- if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
- return _gf_true;
- else if (priv->mandatory_mode == MLK_FORCED ||
- priv->mandatory_mode == MLK_OPTIMAL)
- return _gf_true;
-
- return _gf_false;
+ if (local->multiple_dom_lk_requests)
+ pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock);
}
/* Checks whether the region where fop is acting upon conflicts
@@ -420,15 +574,19 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
int ret = 0;
if (!__rw_allowable(pl_inode, region, op)) {
- if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
+ if (pl_inode->mlock_enforced) {
+ *can_block = _gf_false;
+ } else if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
gf_log("locks", GF_LOG_TRACE,
"returning EAGAIN"
" because fd is O_NONBLOCK");
*can_block = _gf_false;
- } else
+ } else {
*can_block = _gf_true;
- } else
+ }
+ } else {
ret = 1;
+ }
return ret;
}
@@ -436,9 +594,7 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
static pl_fdctx_t *
pl_new_fdctx()
{
- pl_fdctx_t *fdctx = NULL;
-
- fdctx = GF_CALLOC(1, sizeof(*fdctx), gf_locks_mt_pl_fdctx_t);
+ pl_fdctx_t *fdctx = GF_MALLOC(sizeof(*fdctx), gf_locks_mt_pl_fdctx_t);
GF_VALIDATE_OR_GOTO("posix-locks", fdctx, out);
INIT_LIST_HEAD(&fdctx->locks_list);
@@ -471,7 +627,9 @@ pl_check_n_create_fdctx(xlator_t *this, fd_t *fd)
if (ret != 0) {
GF_FREE(fdctx);
fdctx = NULL;
+ UNLOCK(&fd->lock);
gf_log(this->name, GF_LOG_DEBUG, "failed to set fd ctx");
+ goto out;
}
}
unlock:
@@ -486,8 +644,10 @@ pl_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -495,6 +655,8 @@ int
pl_discard_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
return 0;
@@ -504,6 +666,7 @@ int32_t
pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -520,17 +683,28 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
- if (!pl_inode) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -544,15 +718,19 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_DISCARD,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -581,7 +759,8 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, NULL, NULL, NULL);
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -591,8 +770,10 @@ pl_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -600,6 +781,8 @@ int
pl_zerofill_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
return 0;
@@ -609,6 +792,7 @@ int32_t
pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -625,17 +809,28 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
- if (!pl_inode) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -649,15 +844,19 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_ZEROFILL,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -686,8 +885,8 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL,
- NULL);
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -697,24 +896,16 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- pl_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->op == GF_FOP_TRUNCATE)
- loc_wipe(&local->loc[0]);
+ pl_local_t *local = frame->local;
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
+ pl_track_io_fop_count(local, this, DECREMENT);
if (local->op == GF_FOP_TRUNCATE)
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
else
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
@@ -722,6 +913,8 @@ int
pl_ftruncate_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
@@ -731,6 +924,8 @@ int
pl_truncate_cont(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
@@ -741,7 +936,7 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
dict_t *xdata)
{
- pl_local_t *local = NULL;
+ pl_local_t *local = frame->local;
inode_t *inode = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
@@ -756,7 +951,6 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int allowed = 1;
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- local = frame->local;
if (op_ret != 0) {
gf_log(this->name, GF_LOG_ERROR,
@@ -770,17 +964,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
else
inode = local->fd->inode;
- pl_inode = pl_inode_get(this, inode);
+ local->inode = inode_ref(inode);
+
+ pl_inode = pl_inode_get(this, inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = local->offset;
@@ -794,15 +990,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
allowed = pl_is_fop_allowed(pl_inode, &region, local->fd, local->op,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -850,25 +1050,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
unwind:
if (op_ret == -1) {
- gf_log(this->name, GF_LOG_ERROR,
+ gf_log(this ? this->name : "locks", GF_LOG_ERROR,
"truncate failed with "
"ret: %d, error: %s",
op_ret, strerror(op_errno));
- if (local->op == GF_FOP_TRUNCATE)
- loc_wipe(&local->loc[0]);
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
switch (local->op) {
case GF_FOP_TRUNCATE:
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, buf,
- NULL, xdata);
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
break;
case GF_FOP_FTRUNCATE:
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, buf,
- NULL, xdata);
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
break;
default:
break;
@@ -900,9 +1094,10 @@ pl_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat, loc, NULL);
ret = 0;
+
unwind:
if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR,
+ gf_log(this ? this->name : "locks", GF_LOG_ERROR,
"truncate on %s failed with"
" ret: %d, error: %s",
loc->path, -1, strerror(ENOMEM));
@@ -935,7 +1130,7 @@ pl_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
ret = 0;
unwind:
if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR,
+ gf_log(this ? this->name : "locks", GF_LOG_ERROR,
"ftruncate failed with"
" ret: %d, error: %s",
-1, strerror(ENOMEM));
@@ -1040,68 +1235,68 @@ pl_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
-pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
- dict_t *xdata)
+static int32_t
+pl_getxattr_clrlk(xlator_t *this, const char *name, inode_t *inode,
+ dict_t **dict, int32_t *op_errno)
{
- int32_t op_errno = EINVAL;
- int op_ret = -1;
int32_t bcount = 0;
int32_t gcount = 0;
- char key[PATH_MAX] = {
- 0,
- };
+ char *key = NULL;
char *lk_summary = NULL;
pl_inode_t *pl_inode = NULL;
- dict_t *dict = NULL;
clrlk_args args = {
0,
};
char *brickname = NULL;
+ int32_t op_ret = -1;
- if (!name)
- goto usual;
-
- if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)))
- goto usual;
+ *op_errno = EINVAL;
if (clrlk_parse_args(name, &args)) {
- op_errno = EINVAL;
+ *op_errno = EINVAL;
goto out;
}
- dict = dict_new();
- if (!dict) {
- op_errno = ENOMEM;
+ *dict = dict_new();
+ if (!*dict) {
+ *op_errno = ENOMEM;
goto out;
}
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
switch (args.type) {
case CLRLK_INODE:
case CLRLK_ENTRY:
- op_ret = clrlk_clear_lks_in_all_domains(
- this, pl_inode, &args, &bcount, &gcount, &op_errno);
- if (op_ret)
- goto out;
+ op_ret = clrlk_clear_lks_in_all_domains(this, pl_inode, &args,
+ &bcount, &gcount, op_errno);
break;
case CLRLK_POSIX:
op_ret = clrlk_clear_posixlk(this, pl_inode, &args, &bcount,
- &gcount, &op_errno);
- if (op_ret)
- goto out;
+ &gcount, op_errno);
break;
- case CLRLK_TYPE_MAX:
- op_errno = EINVAL;
- goto out;
+ default:
+ op_ret = -1;
+ *op_errno = EINVAL;
+ }
+ if (op_ret) {
+ if (args.type >= CLRLK_TYPE_MAX) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks: invalid lock type %d", args.type);
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks of type %s failed: %s",
+ clrlk_type_names[args.type], strerror(*op_errno));
+ }
+
+ goto out;
}
- op_ret = fetch_pathinfo(this, loc->inode, &op_errno, &brickname);
+ op_ret = fetch_pathinfo(this, inode, op_errno, &brickname);
if (op_ret) {
gf_log(this->name, GF_LOG_WARNING, "Couldn't get brickname");
} else {
@@ -1116,43 +1311,62 @@ pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
if (!gcount && !bcount) {
if (gf_asprintf(&lk_summary, "No locks cleared.") == -1) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
- } else if (gf_asprintf(
- &lk_summary,
- "%s: %s blocked locks=%d "
- "granted locks=%d",
- (brickname == NULL) ? this->name : brickname,
- (args.type == CLRLK_INODE)
- ? "inode"
- : (args.type == CLRLK_ENTRY)
- ? "entry"
- : (args.type == CLRLK_POSIX) ? "posix" : " ",
- bcount, gcount) == -1) {
+ } else if (gf_asprintf(&lk_summary,
+ "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL) ? this->name : brickname,
+ clrlk_type_names[args.type], bcount, gcount) == -1) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
+ gf_log(this->name, GF_LOG_DEBUG, "%s", lk_summary);
- if (snprintf(key, sizeof(key), "%s", name) >= sizeof(key)) {
+ key = gf_strdup(name);
+ if (!key) {
op_ret = -1;
goto out;
}
- if (dict_set_dynstr(dict, key, lk_summary)) {
+ if (dict_set_dynstr(*dict, key, lk_summary)) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
op_ret = 0;
+
out:
GF_FREE(brickname);
- STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
-
GF_FREE(args.opts);
- if (op_ret && lk_summary)
+ GF_FREE(key);
+ if (op_ret) {
GF_FREE(lk_summary);
+ }
+
+ return op_ret;
+}
+
+int32_t
+pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ dict_t *dict = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ op_ret = pl_getxattr_clrlk(this, name, loc->inode, &dict, &op_errno);
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+
if (dict)
dict_unref(dict);
return 0;
@@ -1218,7 +1432,7 @@ fetch_pathinfo(xlator_t *this, inode_t *inode, int32_t *op_errno,
goto out;
}
- ret = dict_get_str(dict, GF_XATTR_PATHINFO_KEY, brickname);
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, brickname);
if (ret)
goto out;
@@ -1241,15 +1455,12 @@ out:
int
pl_lockinfo_get_brickname(xlator_t *this, inode_t *inode, int32_t *op_errno)
{
- int ret = -1;
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char *brickname = NULL;
char *end = NULL;
char *tmp = NULL;
- priv = this->private;
-
- ret = fetch_pathinfo(this, inode, op_errno, &brickname);
+ int ret = fetch_pathinfo(this, inode, op_errno, &brickname);
if (ret)
goto out;
@@ -1277,12 +1488,10 @@ out:
char *
pl_lockinfo_key(xlator_t *this, inode_t *inode, int32_t *op_errno)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char *key = NULL;
int ret = 0;
- priv = this->private;
-
if (priv->brickname == NULL) {
ret = pl_lockinfo_get_brickname(this, inode, op_errno);
if (ret < 0) {
@@ -1300,14 +1509,13 @@ int32_t
pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
int32_t *op_errno)
{
- pl_inode_t *pl_inode = NULL;
char *key = NULL, *buf = NULL;
int32_t op_ret = 0;
unsigned long fdnum = 0;
int32_t len = 0;
dict_t *tmp = NULL;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
@@ -1347,8 +1555,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
goto out;
}
- len = dict_serialized_length(tmp);
- if (len < 0) {
+ op_ret = dict_allocate_and_serialize(tmp, (char **)&buf,
+ (unsigned int *)&len);
+ if (op_ret != 0) {
*op_errno = -op_ret;
op_ret = -1;
gf_log(this->name, GF_LOG_WARNING,
@@ -1358,24 +1567,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
goto out;
}
- buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (buf == NULL) {
- op_ret = -1;
- *op_errno = ENOMEM;
- goto out;
- }
-
- op_ret = dict_serialize(tmp, buf);
- if (op_ret < 0) {
- *op_errno = -op_ret;
- op_ret = -1;
- gf_log(this->name, GF_LOG_WARNING,
- "dict_serialize failed (%s) while handling lockinfo "
- "for fd (ptr: %p inode-gfid:%s)",
- strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid));
- goto out;
- }
-
op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len);
if (op_ret < 0) {
*op_errno = -op_ret;
@@ -1428,6 +1619,11 @@ pl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
}
goto unwind;
+ } else if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) ==
+ 0) {
+ op_ret = pl_getxattr_clrlk(this, name, fd->inode, &dict, &op_errno);
+
+ goto unwind;
} else {
goto usual;
}
@@ -1450,14 +1646,11 @@ int32_t
pl_migrate_locks(call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
int32_t *op_errno)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t newfd_num = 0;
posix_lock_t *l = NULL;
int32_t op_ret = 0;
+ uint64_t newfd_num = fd_to_fdnum(newfd);
- newfd_num = fd_to_fdnum(newfd);
-
- pl_inode = pl_inode_get(frame->this, newfd->inode);
+ pl_inode_t *pl_inode = pl_inode_get(frame->this, newfd->inode, NULL);
if (pl_inode == NULL) {
op_ret = -1;
*op_errno = EBADFD;
@@ -1486,11 +1679,10 @@ pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
int len, int32_t *op_errno)
{
int32_t op_ret = -1;
- dict_t *lockinfo = NULL;
uint64_t oldfd_num = 0;
char *key = NULL;
- lockinfo = dict_new();
+ dict_t *lockinfo = dict_new();
if (lockinfo == NULL) {
op_ret = -1;
*op_errno = ENOMEM;
@@ -1522,7 +1714,7 @@ pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
gf_log(frame->this->name, GF_LOG_WARNING,
"migration of locks from oldfd (ptr:%p) to newfd "
"(ptr:%p) (inode-gfid:%s)",
- (void *)oldfd_num, fd, uuid_utoa(fd->inode->gfid));
+ (void *)(uintptr_t)oldfd_num, fd, uuid_utoa(fd->inode->gfid));
goto out;
}
@@ -1536,6 +1728,27 @@ int32_t
pl_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -1545,12 +1758,14 @@ int32_t
pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int32_t op_ret = 0, op_errno = 0;
+ int32_t op_errno = 0;
void *lockinfo_buf = NULL;
int len = 0;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
- op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, &lockinfo_buf,
- &len);
+ int32_t op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
if (lockinfo_buf == NULL) {
goto usual;
}
@@ -1563,12 +1778,17 @@ pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
usual:
PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, ((loc_t *)NULL), fd,
+ priv);
+
STACK_WIND(frame, pl_fsetxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
return 0;
unwind:
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+ PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, NULL);
+
return 0;
}
@@ -1616,10 +1836,7 @@ pl_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
-
- pl_inode = pl_inode_get(this, fd->inode);
-
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
STACK_UNWIND_STRICT(flush, frame, -1, EBADFD, NULL);
@@ -1695,14 +1912,18 @@ pl_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int op_errno = EINVAL;
pl_inode_t *pl_inode = NULL;
posix_lock_t *l = NULL;
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
GF_VALIDATE_OR_GOTO("locks", this, unwind);
op_ret = 0, op_errno = 0;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
+ if (!pl_inode) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "Could not get inode");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
/* As per design, under forced and file-based mandatory locking modes
* it doesn't matter whether inodes's lock list contain advisory or
@@ -1777,7 +1998,8 @@ int
pl_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
STACK_WIND(frame, pl_create_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
xdata);
@@ -1789,6 +2011,8 @@ pl_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iovec *vector, int32_t count,
struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, vector, count, stbuf,
iobref, xdata);
@@ -1800,6 +2024,8 @@ pl_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, prebuf, postbuf,
xdata);
@@ -1822,6 +2048,10 @@ do_blocked_rw(pl_inode_t *pl_inode)
if (__rw_allowable(pl_inode, &rw->region, rw->stub->fop)) {
list_del_init(&rw->list);
list_add_tail(&rw->list, &wind_list);
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
}
}
}
@@ -1837,14 +2067,68 @@ do_blocked_rw(pl_inode_t *pl_inode)
return;
}
+/* when mandatory lock is enforced:
+ If an IO request comes on a region which is out of the boundary of the
+ granted mandatory lock, it will be rejected.
+
+ Note: There is no IO blocking with mandatory lock enforced as it may be
+ a stale data from an old client.
+ */
+gf_boolean_t static within_range(posix_lock_t *existing, posix_lock_t *new)
+{
+ if (existing->fl_start <= new->fl_start && existing->fl_end >= new->fl_end)
+ return _gf_true;
+
+ return _gf_false;
+}
+
static int
__rw_allowable(pl_inode_t *pl_inode, posix_lock_t *region, glusterfs_fop_t op)
{
posix_lock_t *l = NULL;
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = THIS->private;
int ret = 1;
- priv = THIS->private;
+ if (pl_inode->mlock_enforced) {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ /*
+ with lock enforced (fencing) there should not be any blocking
+ lock coexisting.
+ */
+ if (same_owner(l, region)) {
+ /* Should range check be strict for same owner with fencing? */
+ if (locks_overlap(l, region)) {
+ if (within_range(l, region)) {
+ return 1;
+ } else {
+ /*
+ Should we allow read fop if it does not fit it in the
+ range?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
+ }
+ }
+ } else {
+ if (locks_overlap(l, region)) {
+ /*
+ with fencing should a read from a different owner be
+ allowed if the mandatory lock taken is F_RDLCK?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
+ }
+ }
+ }
+
+ /* No lock has been taken by this owner */
+ return 0;
+ }
list_for_each_entry(l, &pl_inode->ext_list, list)
{
@@ -1868,6 +2152,8 @@ int
pl_readv_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
@@ -1878,6 +2164,7 @@ int
pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -1894,18 +2181,26 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -1919,15 +2214,19 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_READ,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -1958,8 +2257,8 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
- NULL);
+ PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, NULL, 0, NULL,
+ NULL, NULL);
return 0;
}
@@ -1969,6 +2268,8 @@ pl_writev_cont(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
flags, iobref, xdata);
@@ -1981,6 +2282,7 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -1997,18 +2299,26 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -2022,15 +2332,24 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_WRITE,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
- op_errno = EAGAIN;
+ } else if (!can_block) {
+ if (pl_inode->mlock_enforced) {
+ op_errno = EBUSY;
+ } else {
+ op_errno = EAGAIN;
+ }
+
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -2061,7 +2380,8 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
}
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+ PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -2069,29 +2389,25 @@ unwind:
static int
__fd_has_locks(pl_inode_t *pl_inode, fd_t *fd)
{
- int found = 0;
posix_lock_t *l = NULL;
list_for_each_entry(l, &pl_inode->ext_list, list)
{
if (l->fd_num == fd_to_fdnum(fd)) {
- found = 1;
- break;
+ return 1;
}
}
- return found;
+ return 0;
}
static posix_lock_t *
lock_dup(posix_lock_t *lock)
{
- posix_lock_t *new_lock = NULL;
-
- new_lock = new_posix_lock(&lock->user_flock, lock->client, lock->client_pid,
- &lock->owner, (fd_t *)lock->fd_num,
- lock->lk_flags, lock->blocking);
- return new_lock;
+ int32_t op_errno = 0;
+ return new_posix_lock(&lock->user_flock, lock->client, lock->client_pid,
+ &lock->owner, (fd_t *)lock->fd_num, lock->lk_flags,
+ lock->blocking, &op_errno);
}
static int
@@ -2120,14 +2436,7 @@ __dup_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
static int
__copy_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
{
- int ret = 0;
-
- ret = __dup_locks_to_fdctx(pl_inode, fd, fdctx);
- if (ret)
- goto out;
-
-out:
- return ret;
+ return __dup_locks_to_fdctx(pl_inode, fd, fdctx);
}
static void
@@ -2198,9 +2507,10 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd,
pthread_mutex_lock(&pl_inode->mutex);
{
if (!__fd_has_locks(pl_inode, fd)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG, "fd=%p has no active locks", fd);
ret = 0;
- goto unlock;
+ goto out;
}
gf_log(this->name, GF_LOG_DEBUG, "There are active locks on fd");
@@ -2224,15 +2534,17 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd,
"fdctx present -> returning the next lock");
ret = __set_next_lock_fd(fdctx, reqlock);
if (ret) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG,
"could not get next lock of fd");
- goto unlock;
+ goto out;
}
}
}
unlock:
pthread_mutex_unlock(&pl_inode->mutex);
+out:
return ret;
}
@@ -2245,12 +2557,10 @@ pl_metalock_is_active(pl_inode_t *pl_inode)
return 1;
}
-int
-__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block)
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock)
{
list_add_tail(&reqlock->list, &pl_inode->queued_locks);
-
- return 0;
}
int
@@ -2263,13 +2573,12 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
int can_block = 0;
posix_lock_t *reqlock = NULL;
posix_lock_t *conf = NULL;
- int ret = 0;
uint32_t lk_flags = 0;
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
+ pl_local_t *local = NULL;
+ short lock_type = 0;
- ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
+ int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
if (ret == 0) {
if (priv->mandatory_mode == MLK_NONE)
gf_log(this->name, GF_LOG_DEBUG,
@@ -2298,7 +2607,17 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
flock->l_len = labs(flock->l_len);
}
- pl_inode = pl_inode_get(this, fd->inode);
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ } else {
+ frame->local = local;
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
@@ -2306,11 +2625,11 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
}
reqlock = new_posix_lock(flock, frame->root->client, frame->root->pid,
- &frame->root->lk_owner, fd, lk_flags, can_block);
+ &frame->root->lk_owner, fd, lk_flags, can_block,
+ &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -2402,6 +2721,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
case F_SETLK:
reqlock->frame = frame;
reqlock->this = this;
+ lock_type = flock->l_type;
pthread_mutex_lock(&pl_inode->mutex);
{
@@ -2423,10 +2743,23 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
goto out;
}
+ if (reqlock->fl_type != F_UNLCK && pl_inode->mlock_enforced) {
+ ret = pl_lock_preempt(pl_inode, reqlock);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "lock preempt failed");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock(reqlock);
+ goto out;
+ }
+
+ pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
+ goto unwind;
+ }
+
ret = pl_setlk(this, pl_inode, reqlock, can_block);
if (ret == -1) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
+ if ((can_block) && (F_UNLCK != lock_type)) {
goto out;
}
gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
@@ -2448,7 +2781,7 @@ unwind:
pl_trace_out(this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
pl_update_refkeeper(this, fd->inode);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
+ PL_STACK_UNWIND(lk, xdata, frame, op_ret, op_errno, flock, xdata);
out:
return 0;
}
@@ -2481,7 +2814,9 @@ pl_forget(xlator_t *this, inode_t *inode)
INIT_LIST_HEAD(&inodelks_released);
INIT_LIST_HEAD(&entrylks_released);
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (!pl_inode)
+ return 0;
pthread_mutex_lock(&pl_inode->mutex);
{
@@ -2553,25 +2888,33 @@ pl_forget(xlator_t *this, inode_t *inode)
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list)
- {
- STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock, NULL);
- __destroy_lock(ext_l);
+ if (!list_empty(&posixlks_released)) {
+ list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list)
+ {
+ STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock,
+ NULL);
+ __destroy_lock(ext_l);
+ }
}
- list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released, blocked_locks)
- {
- STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL);
- __pl_inodelk_unref(ino_l);
+ if (!list_empty(&inodelks_released)) {
+ list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref(ino_l);
+ }
}
- list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released,
- blocked_locks)
- {
- STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL);
- GF_FREE((char *)entry_l->basename);
- GF_FREE(entry_l->connection_id);
- GF_FREE(entry_l);
+ if (!list_empty(&entrylks_released)) {
+ list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE((char *)entry_l->basename);
+ GF_FREE(entry_l->connection_id);
+ GF_FREE(entry_l);
+ }
}
pthread_mutex_destroy(&pl_inode->mutex);
@@ -2645,11 +2988,85 @@ out:
return ret;
}
+static int32_t
+pl_request_link_count(dict_t **pxdata)
+{
+ dict_t *xdata;
+
+ xdata = *pxdata;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return ENOMEM;
+ }
+ } else {
+ dict_ref(xdata);
+ }
+
+ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+ dict_unref(xdata);
+ return ENOMEM;
+ }
+
+ *pxdata = xdata;
+
+ return 0;
+}
+
+static int32_t
+pl_check_link_count(dict_t *xdata)
+{
+ int32_t count;
+
+ /* In case we are unable to read the link count from xdata, we take a
+ * conservative approach and return -2, which will prevent the inode from
+ * being considered deleted. In fact it will cause link tracking for this
+ * inode to be disabled completely to avoid races. */
+
+ if (xdata == NULL) {
+ return -2;
+ }
+
+ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+ return -2;
+ }
+
+ return count;
+}
+
int32_t
pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
+ pl_inode_t *pl_inode;
+
+ if (op_ret >= 0) {
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (pl_inode == NULL) {
+ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* We only update the link count if we previously didn't know it.
+ * Doing it always can lead to races since lookup is not executed
+ * atomically most of the times. */
+ if (pl_inode->links == -2) {
+ pl_inode->links = pl_check_link_count(xdata);
+ if (buf->ia_type == IA_IFDIR) {
+ /* Directories have at least 2 links. To avoid special handling
+ * for directories, we simply decrement the value here to make
+ * them equivalent to regular files. */
+ pl_inode->links--;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
@@ -2658,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ int32_t error;
+
+ error = pl_request_link_count(&xdata);
+ if (error == 0) {
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ } else {
+ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+ }
return 0;
}
@@ -2721,9 +3146,8 @@ pl_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
lock_migration_info_t *
gf_mig_info_for_lock(posix_lock_t *lock)
{
- lock_migration_info_t *new = NULL;
-
- new = GF_CALLOC(1, sizeof(lock_migration_info_t), gf_common_mt_lock_mig);
+ lock_migration_info_t *new = GF_MALLOC(sizeof(lock_migration_info_t),
+ gf_common_mt_lock_mig);
if (new == NULL) {
goto out;
}
@@ -2751,7 +3175,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
{
if (list_empty(&pl_inode->ext_list)) {
count = 0;
- goto out;
+ goto unlock;
}
list_for_each_entry(temp, &pl_inode->ext_list, list)
@@ -2761,6 +3185,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
newlock = gf_mig_info_for_lock(temp);
if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "lock_dup failed");
count = -1;
goto out;
@@ -2771,8 +3196,9 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
}
}
-out:
+unlock:
pthread_mutex_unlock(&pl_inode->mutex);
+out:
return count;
}
@@ -2788,7 +3214,7 @@ pl_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
INIT_LIST_HEAD(&locks.list);
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode = pl_inode_get(this, loc->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
@@ -2828,9 +3254,8 @@ __pl_metalk_ref(pl_meta_lock_t *lock)
pl_meta_lock_t *
new_meta_lock(call_frame_t *frame, xlator_t *this)
{
- pl_meta_lock_t *lock = NULL;
-
- lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_meta_lock_t);
+ pl_meta_lock_t *lock = GF_CALLOC(1, sizeof(*lock),
+ gf_locks_mt_pl_meta_lock_t);
if (!lock) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
@@ -2904,7 +3329,7 @@ pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode)
pl_meta_lock_t *reqlk = NULL;
pl_ctx_t *ctx = NULL;
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
"pl_inode mem allocation failedd");
@@ -2934,15 +3359,15 @@ pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode)
pthread_mutex_lock(&pl_inode->mutex);
{
if (pl_metalock_is_active(pl_inode)) {
- gf_msg(this->name, GF_LOG_WARNING, EINVAL, 0,
- "More than one meta-lock can not be granted on"
- "the inode");
ret = -1;
}
}
pthread_mutex_unlock(&pl_inode->mutex);
if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, 0,
+ "More than one meta-lock cannot be granted on"
+ " the inode");
goto out;
}
@@ -2978,9 +3403,8 @@ out:
return ret;
}
-void
-__unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *tmp_list)
+static void
+__unwind_queued_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
{
if (list_empty(&pl_inode->queued_locks))
return;
@@ -2988,9 +3412,8 @@ __unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode,
list_splice_init(&pl_inode->queued_locks, tmp_list);
}
-void
-__unwind_blocked_locks(xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *tmp_list)
+static void
+__unwind_blocked_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
{
posix_lock_t *lock = NULL;
posix_lock_t *tmp = NULL;
@@ -3038,7 +3461,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
goto out;
}
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
ret = -1;
goto out;
@@ -3049,12 +3472,12 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
pthread_mutex_lock(&pl_inode->mutex);
{
/* Unwind queued locks regardless of migration status */
- __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
/* Unwind blocked locks only for successful migration */
- if (dict_get(dict, "status")) {
+ if (dict_get_sizen(dict, "status")) {
/* unwind all blocked locks */
- __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
}
/* unlock metalk */
@@ -3081,7 +3504,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
inode_unref(pl_inode->inode);
}
- if (dict_get(dict, "status"))
+ if (dict_get_sizen(dict, "status"))
pl_inode->migrated = _gf_true;
else
pl_inode->migrated = _gf_false;
@@ -3110,6 +3533,34 @@ int32_t
pl_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ while (pl_inode->fop_wind_count > 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "waiting for existing fops (count %d) to drain for "
+ "gfid %s",
+ pl_inode->fop_wind_count, uuid_utoa(pl_inode->gfid));
+ pthread_cond_wait(&pl_inode->check_fop_wind_count,
+ &pl_inode->mutex);
+ }
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -3121,15 +3572,16 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int op_ret = 0;
int op_errno = EINVAL;
dict_t *xdata_rsp = NULL;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- if (dict_get(dict, GF_META_LOCK_KEY)) {
+ if (dict_get_sizen(dict, GF_META_LOCK_KEY)) {
op_ret = pl_metalk(frame, this, loc->inode);
- } else if (dict_get(dict, GF_META_UNLOCK_KEY)) {
+ } else if (dict_get_sizen(dict, GF_META_UNLOCK_KEY)) {
op_ret = pl_metaunlock(frame, this, loc->inode, dict);
-
} else {
goto usual;
}
@@ -3139,9 +3591,17 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
return 0;
usual:
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, ((fd_t *)NULL),
+ priv);
+
STACK_WIND(frame, pl_setxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
+
+ return 0;
}
void
@@ -3150,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner,
time_t *blkd_time, gf_boolean_t active)
{
char *type_str = NULL;
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
@@ -3204,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
int count = 0;
@@ -3227,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->entrylk_list, domain_list)
{
- gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec,
+ gf_time_fmt(granted, sizeof(granted), lock->granted_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count);
- if (lock->blkd_time.tv_sec == 0) {
+ if (lock->blkd_time == 0) {
snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
: "ENTRYLK_WRLCK",
@@ -3238,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
lkowner_utoa(&lock->owner), lock->client,
lock->connection_id, granted);
} else {
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
@@ -3255,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
{
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count);
@@ -3307,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id,
- &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
- _gf_true);
+ lock->client, lock->connection_id, &lock->granted_time,
+ &lock->blkd_time, _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3321,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
count);
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id, 0,
- &lock->blkd_time.tv_sec, _gf_false);
+ lock->client, lock->connection_id, 0, &lock->blkd_time,
+ _gf_false);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3355,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode)
gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, NULL, &lock->granted_time.tv_sec,
- &lock->blkd_time.tv_sec,
- (lock->blocked) ? _gf_false : _gf_true);
+ lock->client, lock->client_uid, &lock->granted_time,
+ &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3436,11 +3894,15 @@ unlock:
__dump_inodelks(pl_inode);
}
- count = __get_posixlk_count(this, pl_inode);
+ count = __get_posixlk_count(pl_inode);
if (count) {
gf_proc_dump_write("posixlk-count", "%d", count);
__dump_posixlks(pl_inode);
}
+
+ gf_proc_dump_write("links", "%d", pl_inode->links);
+ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+ gf_proc_dump_write("removed", "%u", pl_inode->removed);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -3549,9 +4011,9 @@ pl_metalk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* unwind all queued and blocked locks to check
* migration status and find the correct
* destination */
- __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
- __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
list_del_init(&meta_lock->list);
@@ -3583,10 +4045,7 @@ unlock:
static int
pl_client_disconnect_cbk(xlator_t *this, client_t *client)
{
- pl_ctx_t *pl_ctx = NULL;
-
- pl_ctx = pl_ctx_get(client, this);
-
+ pl_ctx_t *pl_ctx = pl_ctx_get(client, this);
if (pl_ctx) {
pl_inodelk_client_cleanup(this, pl_ctx);
pl_entrylk_client_cleanup(this, pl_ctx);
@@ -3623,10 +4082,9 @@ pl_client_destroy_cbk(xlator_t *this, client_t *client)
int
reconfigure(xlator_t *this, dict_t *options)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
int ret = -1;
-
- priv = this->private;
+ char *tmp_str = NULL;
GF_OPTION_RECONF("trace", priv->trace, options, bool, out);
@@ -3648,6 +4106,20 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("notify-contention-delay", priv->notify_contention_delay,
options, uint32, out);
+ GF_OPTION_RECONF("mandatory-locking", tmp_str, options, str, out);
+
+ GF_OPTION_RECONF("enforce-mandatory-lock", priv->mlock_enforced, options,
+ bool, out);
+
+ if (!strcmp(tmp_str, "forced"))
+ priv->mandatory_mode = MLK_FORCED;
+ else if (!strcmp(tmp_str, "file"))
+ priv->mandatory_mode = MLK_FILE_BASED;
+ else if (!strcmp(tmp_str, "optimal"))
+ priv->mandatory_mode = MLK_OPTIMAL;
+ else
+ priv->mandatory_mode = MLK_NONE;
+
ret = 0;
out:
@@ -3695,6 +4167,7 @@ init(xlator_t *this)
priv->mandatory_mode = MLK_OPTIMAL;
else
priv->mandatory_mode = MLK_NONE;
+
tmp_str = NULL;
GF_OPTION_INIT("trace", priv->trace, bool, out);
@@ -3714,6 +4187,8 @@ init(xlator_t *this)
GF_OPTION_INIT("notify-contention-delay", priv->notify_contention_delay,
uint32, out);
+ GF_OPTION_INIT("enforce-mandatory-lock", priv->mlock_enforced, bool, out);
+
this->local_pool = mem_pool_new(pl_local_t, 32);
if (!this->local_pool) {
ret = -1;
@@ -3732,19 +4207,21 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
if (!priv)
- return 0;
+ return;
this->private = NULL;
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
GF_FREE(priv->brickname);
GF_FREE(priv);
- return 0;
+ return;
}
int
@@ -3771,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
struct iatt *postoldparent, struct iatt *prenewparent,
struct iatt *postnewparent, dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent,
postoldparent, prenewparent, postnewparent, xdata);
+
return 0;
}
@@ -3780,19 +4260,23 @@ int32_t
pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename,
+ pl_rename_cbk, oldloc, newloc, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
}
posix_lock_t *
gf_lkmig_info_to_posix_lock(call_frame_t *frame, lock_migration_info_t *lmi)
{
- posix_lock_t *lock = NULL;
-
- lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
+ posix_lock_t *lock = GF_CALLOC(1, sizeof(posix_lock_t),
+ gf_locks_mt_posix_lock_t);
if (!lock)
goto out;
@@ -3840,6 +4324,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
/* Just making sure the activelk list is empty. Should not
* happen though*/
if (!list_empty(&pl_inode->ext_list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "invalid locks found");
ret = -1;
@@ -3848,6 +4333,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
/* This list also should not be empty */
if (list_empty(&locklist->list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "empty lock list");
ret = -1;
@@ -3858,6 +4344,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
{
newlock = gf_lkmig_info_to_posix_lock(frame, temp);
if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
"mem allocation failed for newlock");
@@ -3867,12 +4354,10 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
list_add_tail(&newlock->list, &pl_inode->ext_list);
}
}
-
-out:
/*TODO: What if few lock add failed with ENOMEM. Should the already
* added locks be clearted */
pthread_mutex_unlock(&pl_inode->mutex);
-
+out:
return ret;
}
@@ -3880,12 +4365,11 @@ static int
pl_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
lock_migration_info_t *locklist, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
int op_ret = 0;
int op_errno = 0;
int ret = 0;
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode_t *pl_inode = pl_inode_get(this, loc->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
@@ -3908,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -3917,9 +4404,14 @@ int32_t
pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink,
+ pl_unlink_cbk, loc, xflag, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -3937,7 +4429,7 @@ int
pl_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_mkdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
return 0;
@@ -3955,7 +4447,7 @@ pl_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat, loc, xdata);
return 0;
@@ -3975,7 +4467,7 @@ int
pl_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t rdev, mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_mknod_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
return 0;
@@ -3986,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -3995,9 +4490,14 @@ int
pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir,
+ pl_rmdir_cbk, loc, xflags, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -4016,7 +4516,7 @@ int
pl_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_symlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
return 0;
@@ -4027,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ pl_inode_t *pl_inode = (pl_inode_t *)cookie;
+
+ if (op_ret >= 0) {
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* TODO: can happen pl_inode->links == 0 ? */
+ if (pl_inode->links >= 0) {
+ pl_inode->links++;
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
preparent, postparent, xdata);
return 0;
@@ -4036,9 +4549,18 @@ int
pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc);
- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ pl_inode_t *pl_inode;
+
+ pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+ if (pl_inode == NULL) {
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
}
@@ -4112,7 +4634,7 @@ pl_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_statfs_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs, loc, xdata);
return 0;
@@ -4122,6 +4644,28 @@ int32_t
pl_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
+ pl_inode->track_fop_wind_count = _gf_true;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -4131,16 +4675,51 @@ int
pl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ int op_ret = 0;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, loc,
+ ((fd_t *)NULL), priv);
+
STACK_WIND(frame, pl_removexattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
+ NULL);
+
+ return 0;
}
int32_t
pl_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -4150,10 +4729,23 @@ int
pl_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
+
PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this,
+ ((loc_t *)NULL), fd, priv);
+
STACK_WIND(frame, pl_fremovexattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
+ NULL);
+ return 0;
}
int32_t
@@ -4189,7 +4781,7 @@ int
pl_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_xattrop_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata);
return 0;
@@ -4228,7 +4820,7 @@ int
pl_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_setattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
@@ -4289,7 +4881,7 @@ int
pl_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_readlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
return 0;
@@ -4307,7 +4899,7 @@ int
pl_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_access_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access, loc, mask, xdata);
return 0;
@@ -4456,7 +5048,7 @@ struct volume_options options[] = {
"be used in conjunction w/ revocation-clear-all."},
{.key = {"notify-contention"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "no",
+ .default_value = "yes",
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.op_version = {GD_OP_VERSION_4_0_0},
.tags = {"locks", "contention"},
@@ -4479,5 +5071,25 @@ struct volume_options options[] = {
"on the same inode. If multiple lock requests are "
"received during this period, only one upcall will "
"be sent."},
+ {.key = {"enforce-mandatory-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .flags = OPT_FLAG_SETTABLE,
+ .op_version = {GD_OP_VERSION_6_0},
+ .description = "option to enable lock enforcement"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "locks",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
index 8b080dba030..604691fd887 100644
--- a/xlators/features/locks/src/reservelk.c
+++ b/xlators/features/locks/src/reservelk.c
@@ -7,12 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include "locks.h"
#include "common.h"
@@ -31,12 +31,10 @@ reservelks_equal(posix_lock_t *l1, posix_lock_t *l2)
static posix_lock_t *
__reservelk_grantable(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
posix_lock_t *l = NULL;
posix_lock_t *ret_lock = NULL;
- this = THIS;
-
if (list_empty(&pl_inode->reservelk_list)) {
gf_log(this->name, GF_LOG_TRACE, "No reservelks in list");
goto out;
@@ -82,10 +80,9 @@ __matching_reservelk(pl_inode_t *pl_inode, posix_lock_t *lock)
static int
__reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
int ret = 0;
- conf = __matching_reservelk(pl_inode, lock);
+ posix_lock_t *conf = __matching_reservelk(pl_inode, lock);
if (conf) {
gf_log(this->name, GF_LOG_TRACE, "Matching reservelk found");
if (__same_owner_reservelk(lock, conf)) {
@@ -104,29 +101,28 @@ __reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
int
pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+ const int can_block)
{
int ret = 0;
pthread_mutex_lock(&pl_inode->mutex);
{
if (__reservelk_conflict(this, pl_inode, lock)) {
+ lock->blocked = can_block;
+ list_add_tail(&lock->list, &pl_inode->blocked_calls);
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_TRACE,
"Found conflicting reservelk. Blocking until reservelk is "
"unlocked.");
- lock->blocked = can_block;
- list_add_tail(&lock->list, &pl_inode->blocked_calls);
ret = -1;
- goto unlock;
+ goto out;
}
-
- gf_log(this->name, GF_LOG_TRACE,
- "no conflicting reservelk found. Call continuing");
- ret = 0;
}
-unlock:
pthread_mutex_unlock(&pl_inode->mutex);
-
+ gf_log(this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+out:
return ret;
}
@@ -135,12 +131,11 @@ unlock:
*/
static int
__lock_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+ const int can_block)
{
- posix_lock_t *conf = NULL;
int ret = -EINVAL;
- conf = __reservelk_grantable(pl_inode, lock);
+ posix_lock_t *conf = __reservelk_grantable(pl_inode, lock);
if (conf) {
ret = -EAGAIN;
if (can_block == 0)
@@ -183,9 +178,7 @@ find_matching_reservelk(posix_lock_t *lock, pl_inode_t *pl_inode)
static posix_lock_t *
__reserve_unlock_lock(xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
{
- posix_lock_t *conf = NULL;
-
- conf = find_matching_reservelk(lock, pl_inode);
+ posix_lock_t *conf = find_matching_reservelk(lock, pl_inode);
if (!conf) {
gf_log(this->name, GF_LOG_DEBUG, " Matching lock not found for unlock");
goto out;
@@ -319,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode)
ret = pl_setlk(this, pl_inode, lock, can_block);
if (ret == -1) {
if (can_block) {
- pl_trace_block(this, lock->frame, fd, NULL, cmd,
- &lock->user_flock, NULL);
continue;
} else {
gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
@@ -345,6 +336,7 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
retlock = __reserve_unlock_lock(this, lock, pl_inode);
if (!retlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG, "Bad Unlock issued on Inode lock");
ret = -EINVAL;
goto out;
@@ -354,9 +346,8 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
__destroy_lock(retlock);
ret = 0;
}
-out:
pthread_mutex_unlock(&pl_inode->mutex);
-
+out:
grant_blocked_reserve_locks(this, pl_inode);
grant_blocked_lock_calls(this, pl_inode);
@@ -372,19 +363,20 @@ pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
pthread_mutex_lock(&pl_inode->mutex);
{
ret = __lock_reservelk(this, pl_inode, lock, can_block);
- if (ret < 0)
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid, lkowner_utoa(&lock->owner),
- lock->user_flock.l_start, lock->user_flock.l_len);
- else
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid, lkowner_utoa(&lock->owner), lock->fl_start,
- lock->fl_end);
}
pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (ret < 0)
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->fl_start, lock->fl_end);
+
return ret;
}