summaryrefslogtreecommitdiffstats
path: root/xlators/features/locks
diff options
context:
space:
mode:
authorAnoop C S <anoopcs@redhat.com>2015-02-04 10:34:33 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-05-02 04:18:44 -0700
commit4517bf8dd6de310950cc5a612955aa3a2fddb57e (patch)
tree2cdf4e8ec5c9362a325d5a48e07778ea44e1fdfe /xlators/features/locks
parent78c1c6002f0b11afa997a14f8378c04f257ea1c5 (diff)
features/locks: Implement mandatory locks
Initial change to fix/enable the mandatory locking support in GlusterFS as per the following design: https://review.gluster.org/#/c/12014/ Accordingly 'locks.mandatory-locking' option is available as part of this change which will accept one among the following values: * off * file * forced * optimal See design doc for more details Change-Id: I14c489b3f8af5ebcbfa155a03f0c175e9558ac46 BUG: 762184 Signed-off-by: Anoop C S <anoopcs@redhat.com> Reviewed-on: http://review.gluster.org/9768 Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Poornima G <pgurusid@redhat.com> Reviewed-by: Raghavendra Talur <rtalur@redhat.com> Reviewed-by: Rajesh Joseph <rjoseph@redhat.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/features/locks')
-rw-r--r--xlators/features/locks/src/common.c12
-rw-r--r--xlators/features/locks/src/common.h4
-rw-r--r--xlators/features/locks/src/locks.h14
-rw-r--r--xlators/features/locks/src/posix.c688
4 files changed, 567 insertions, 151 deletions
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index c6db18f6ba8..facb078612f 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -446,7 +446,7 @@ unlock:
/* Create a new posix_lock_t */
posix_lock_t *
new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd)
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags)
{
posix_lock_t *lock = NULL;
@@ -480,6 +480,7 @@ new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
lock->fd = fd;
lock->client_pid = client_pid;
lock->owner = *owner;
+ lock->lk_flags = lk_flags;
INIT_LIST_HEAD (&lock->list);
@@ -799,7 +800,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
continue;
if (same_owner (conf, lock)) {
- if (conf->fl_type == lock->fl_type) {
+ if (conf->fl_type == lock->fl_type &&
+ conf->lk_flags == lock->lk_flags) {
sum = add_locks (lock, conf);
sum->fl_type = lock->fl_type;
@@ -810,6 +812,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
sum->fd_num = lock->fd_num;
sum->client_pid = lock->client_pid;
sum->owner = lock->owner;
+ sum->lk_flags = lock->lk_flags;
__delete_lock (conf);
__destroy_lock (conf);
@@ -832,6 +835,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
sum->fd_num = conf->fd_num;
sum->client_pid = conf->client_pid;
sum->owner = conf->owner;
+ sum->lk_flags = conf->lk_flags;
v = subtract_locks (sum, lock);
@@ -988,7 +992,7 @@ pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
unlock_lock = new_posix_lock (&flock, old_lock->client,
old_lock->client_pid, &old_lock->owner,
- old_lock->fd);
+ old_lock->fd, old_lock->lk_flags);
GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out);
ret = 0;
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index be13d29362b..44f5a8484c5 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -35,7 +35,7 @@
posix_lock_t *
new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd);
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags);
pl_inode_t *
pl_inode_get (xlator_t *this, inode_t *inode);
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 3480027c4c9..b8763091d00 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -18,6 +18,13 @@
#include "lkowner.h"
+typedef enum {
+ MLK_NONE,
+ MLK_FILE_BASED,
+ MLK_FORCED,
+ MLK_OPTIMAL
+} mlk_mode_t; /* defines different mandatory locking modes*/
+
struct __pl_fd;
struct __posix_lock {
@@ -26,6 +33,7 @@ struct __posix_lock {
short fl_type;
off_t fl_start;
off_t fl_end;
+ uint32_t lk_flags;
short blocked; /* waiting to acquire */
struct gf_flock user_flock; /* the flock supplied by the user */
@@ -161,7 +169,7 @@ typedef struct __pl_inode pl_inode_t;
typedef struct {
- gf_boolean_t mandatory; /* if mandatory locking is enabled */
+ mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
gf_boolean_t trace; /* trace lock requests in and out */
char *brickname;
} posix_locks_private_t;
@@ -178,7 +186,7 @@ typedef struct {
loc_t loc[2];
fd_t *fd;
off_t offset;
- enum {TRUNCATE, FTRUNCATE} op;
+ glusterfs_fop_t op;
} pl_local_t;
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 1d40c154162..2ff7655a170 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2006-2012, 2016 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -33,7 +33,6 @@
/* Forward declarations */
-
void do_blocked_rw (pl_inode_t *);
static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
static int format_brickname(char *);
@@ -352,6 +351,49 @@ pl_set_xdata_response (xlator_t *this, pl_local_t *local, inode_t *parent,
pl_posixlk_xattr_fill (this, inode, xdata, max_lock);
}
+/* Return true in case we need to ensure mandatory-locking
+ * semnatics under different modes.
+ */
+gf_boolean_t
+pl_is_mandatory_locking_enabled (pl_inode_t *pl_inode)
+{
+ posix_locks_private_t *priv = NULL;
+
+ priv = THIS->private;
+
+ if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
+ return _gf_true;
+ else if (priv->mandatory_mode == MLK_FORCED ||
+ priv->mandatory_mode == MLK_OPTIMAL)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+/* Checks whether the region where fop is acting upon conflicts
+ * with existing locks. If there is no conflict function returns
+ * 1 else returns 0 with can_block boolean set accordingly to
+ * indicate block/fail the fop.
+ */
+int
+pl_is_fop_allowed (pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
+ glusterfs_fop_t op, gf_boolean_t *can_block)
+{
+ int ret = 0;
+
+ if (!__rw_allowable (pl_inode, region, op)) {
+ if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
+ gf_log ("locks", GF_LOG_TRACE, "returning EAGAIN"
+ " because fd is O_NONBLOCK");
+ *can_block = _gf_false;
+ } else
+ *can_block = _gf_true;
+ } else
+ ret = 1;
+
+ return ret;
+}
+
static pl_fdctx_t *
pl_new_fdctx ()
{
@@ -402,6 +444,214 @@ out:
return fdctx;
}
+int32_t
+pl_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int
+pl_discard_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, pl_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+pl_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + len - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_DISCARD,
+ &can_block);
+ if (allowed == 1)
+ goto unlock;
+ else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_discard_stub (frame, pl_discard_cont,
+ fd, offset, len, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+
+ if (allowed == 1)
+ STACK_WIND (frame, pl_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset,
+ len, xdata);
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+pl_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int
+pl_zerofill_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, pl_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+pl_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + len - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_ZEROFILL,
+ &can_block);
+ if (allowed == 1)
+ goto unlock;
+ else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_zerofill_stub (frame, pl_zerofill_cont,
+ fd, offset, len, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+
+ if (allowed == 1)
+ STACK_WIND (frame, pl_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset,
+ len, xdata);
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
int
pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
@@ -411,7 +661,7 @@ pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- if (local->op == TRUNCATE)
+ if (local->op == GF_FOP_TRUNCATE)
loc_wipe (&local->loc[0]);
if (local->xdata)
@@ -419,58 +669,48 @@ pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->fd)
fd_unref (local->fd);
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ if (local->op == GF_FOP_TRUNCATE)
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ else
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
return 0;
}
-
-static int
-truncate_allowed (pl_inode_t *pl_inode,
- client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, off_t offset)
+int
+pl_ftruncate_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
- posix_lock_t *l = NULL;
- posix_lock_t region = {.list = {0, }, };
- int ret = 1;
-
- region.fl_start = offset;
- region.fl_end = LLONG_MAX;
- region.client = client;
- region.client_pid = client_pid;
- region.owner = *owner;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (!l->blocked
- && locks_overlap (&region, l)
- && !same_owner (&region, l)) {
- ret = 0;
- gf_log ("posix-locks", GF_LOG_TRACE, "Truncate "
- "allowed");
- break;
- }
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- return ret;
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
+int
+pl_truncate_cont (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
static int
truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
pl_local_t *local = NULL;
inode_t *inode = NULL;
pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int allowed = 1;
-
- priv = this->private;
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
local = frame->local;
if (op_ret != 0) {
@@ -480,7 +720,7 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- if (local->op == TRUNCATE)
+ if (local->op == GF_FOP_TRUNCATE)
inode = local->loc[0].inode;
else
inode = local->fd->inode;
@@ -492,56 +732,119 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- if (priv->mandatory
- && pl_inode->mandatory
- && !truncate_allowed (pl_inode, frame->root->client,
- frame->root->pid, &frame->root->lk_owner,
- local->offset)) {
- op_ret = -1;
- op_errno = EAGAIN;
- goto unwind;
- }
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
- switch (local->op) {
- case TRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- &local->loc[0], local->offset, local->xdata);
- break;
- case FTRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset, local->xdata);
- break;
- }
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
- return 0;
+ if (enabled) {
+ region.fl_start = local->offset;
+ region.fl_end = LLONG_MAX;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(local->fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed (pl_inode, &region,
+ local->fd, local->op,
+ &can_block);
-unwind:
- gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, "
- "error: %s", op_ret, strerror (op_errno));
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc[0]);
- if (local->xdata)
- dict_unref (local->xdata);
- if (local->fd)
- fd_unref (local->fd);
+ if (allowed == 1)
+ goto unlock;
+ else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ if (local->op == GF_FOP_TRUNCATE)
+ rw->stub = fop_truncate_stub (frame,
+ pl_truncate_cont, &local->loc[0],
+ local->offset, local->xdata);
+ else
+ rw->stub = fop_ftruncate_stub (frame,
+ pl_ftruncate_cont, local->fd,
+ local->offset, local->xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata);
+ if (allowed == 1) {
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ &local->loc[0], local->offset, local->xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ local->fd, local->offset, local->xdata);
+ break;
+ default:
+ break;
+ }
+ }
+unwind:
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "truncate failed with "
+ "ret: %d, error: %s", op_ret, strerror (op_errno));
+ if (local->op == GF_FOP_TRUNCATE)
+ loc_wipe (&local->loc[0]);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ STACK_UNWIND_STRICT (truncate, frame, op_ret,
+ op_errno, buf, NULL, xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret,
+ op_errno, buf, NULL, xdata);
+ break;
+ default:
+ break;
+ }
+ }
return 0;
}
-
int
pl_truncate (call_frame_t *frame, xlator_t *this,
loc_t *loc, off_t offset, dict_t *xdata)
{
pl_local_t *local = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = TRUNCATE;
+ local->op = GF_FOP_TRUNCATE;
local->offset = offset;
loc_copy (&local->loc[0], loc);
if (xdata)
@@ -551,28 +854,30 @@ pl_truncate (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->stat, loc, NULL);
-
- return 0;
-
+ ret = 0;
unwind:
- gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, "
- "error: %s", loc->path, -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
-
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "truncate on %s failed with"
+ " ret: %d, error: %s", loc->path, -1,
+ strerror (ENOMEM));
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ }
return 0;
}
-
int
pl_ftruncate (call_frame_t *frame, xlator_t *this,
fd_t *fd, off_t offset, dict_t *xdata)
{
pl_local_t *local = NULL;
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = FTRUNCATE;
+ local->op = GF_FOP_FTRUNCATE;
local->offset = offset;
local->fd = fd_ref (fd);
if (xdata)
@@ -582,13 +887,14 @@ pl_ftruncate (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-
+ ret = 0;
unwind:
- gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, "
- "error: %s", -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
-
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with"
+ " ret: %d, error: %s", -1, strerror (ENOMEM));
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ }
return 0;
}
@@ -1336,10 +1642,62 @@ int
pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, pl_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ pl_inode_t *pl_inode = NULL;
+ posix_lock_t *l = NULL;
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
+
+ op_ret = 0, op_errno = 0;
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ /* As per design, under forced and file-based mandatory locking modes
+ * it doesn't matter whether inodes's lock list contain advisory or
+ * mandatory type locks. So we just check whether inode's lock list is
+ * empty or not to make sure that no locks are being held for the file.
+ * Whereas under optimal mandatory locking mode, we strictly fail open
+ * if and only if lock list contain mandatory locks.
+ */
+ if (((priv->mandatory_mode == MLK_FILE_BASED) && pl_inode->mandatory) ||
+ priv->mandatory_mode == MLK_FORCED) {
+ if (fd->flags & O_TRUNC) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (!list_empty (&pl_inode->ext_list)) {
+ op_ret = -1;
+ op_errno = EAGAIN;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ } else if (priv->mandatory_mode == MLK_OPTIMAL) {
+ if (fd->flags & O_TRUNC) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->lk_flags & GF_LK_MANDATORY)) {
+ op_ret = -1;
+ op_errno = EAGAIN;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ }
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno,
+ NULL, NULL);
+ else
+ STACK_WIND (frame, pl_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
return 0;
}
@@ -1455,18 +1813,26 @@ do_blocked_rw (pl_inode_t *pl_inode)
return;
}
-
static int
__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
glusterfs_fop_t op)
{
posix_lock_t *l = NULL;
+ posix_locks_private_t *priv = NULL;
int ret = 1;
+ priv = THIS->private;
+
list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (locks_overlap (l, region) && !same_owner (l, region)) {
+ if (!l->blocked && locks_overlap (l, region)
+ && !same_owner (l, region)) {
if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
continue;
+ /* Check for mandatory lock under optimal
+ * mandatory-locking mode */
+ if (priv->mandatory_mode == MLK_OPTIMAL
+ && !(l->lk_flags & GF_LK_MANDATORY))
+ continue;
ret = 0;
break;
}
@@ -1475,7 +1841,6 @@ __rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
return ret;
}
-
int
pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
@@ -1487,26 +1852,35 @@ pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
return 0;
}
-
int
pl_readv (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
int op_ret = 0;
int op_errno = 0;
- char wind_needed = 1;
+ int allowed = 1;
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
- priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
- if (priv->mandatory && pl_inode->mandatory) {
+ if (enabled) {
region.fl_start = offset;
region.fl_end = offset + size - 1;
region.client = frame->root->client;
@@ -1516,15 +1890,11 @@ pl_readv (call_frame_t *frame, xlator_t *this,
pthread_mutex_lock (&pl_inode->mutex);
{
- wind_needed = __rw_allowable (pl_inode, &region,
- GF_FOP_READ);
- if (wind_needed) {
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_READ, &can_block);
+ if (allowed == 1)
goto unlock;
- }
-
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN as fd is O_NONBLOCK");
+ else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
@@ -1556,21 +1926,19 @@ pl_readv (call_frame_t *frame, xlator_t *this,
pthread_mutex_unlock (&pl_inode->mutex);
}
-
- if (wind_needed) {
+ if (allowed == 1) {
STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
fd, size, offset, flags, xdata);
}
-
+unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno,
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
NULL, 0, NULL, NULL, NULL);
return 0;
}
-
int
pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int count, off_t offset,
@@ -1583,26 +1951,36 @@ pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
return 0;
}
-
int
pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {.list = {0, }, };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
int op_ret = 0;
int op_errno = 0;
- char wind_needed = 1;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO ("locks", this, unwind);
- priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);
+ enabled = pl_is_mandatory_locking_enabled (pl_inode);
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
- if (priv->mandatory && pl_inode->mandatory) {
+ if (enabled) {
region.fl_start = offset;
region.fl_end = offset + iov_length (vector, count) - 1;
region.client = frame->root->client;
@@ -1612,15 +1990,11 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
pthread_mutex_lock (&pl_inode->mutex);
{
- wind_needed = __rw_allowable (pl_inode, &region,
- GF_FOP_WRITE);
- if (wind_needed)
+ allowed = pl_is_fop_allowed (pl_inode, &region, fd,
+ GF_FOP_WRITE, &can_block);
+ if (allowed == 1)
goto unlock;
-
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN because fd is "
- "O_NONBLOCK");
+ else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
@@ -1652,16 +2026,15 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
pthread_mutex_unlock (&pl_inode->mutex);
}
-
- if (wind_needed) {
+ if (allowed == 1) {
STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
fd, vector, count, offset, flags, iobref, xdata);
}
-
+unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL,
- NULL);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
return 0;
}
@@ -1689,7 +2062,7 @@ lock_dup (posix_lock_t *lock)
new_lock = new_posix_lock (&lock->user_flock, lock->client,
lock->client_pid, &lock->owner,
- (fd_t *)lock->fd_num);
+ (fd_t *)lock->fd_num, lock->lk_flags);
return new_lock;
}
@@ -1854,6 +2227,21 @@ pl_lk (call_frame_t *frame, xlator_t *this,
posix_lock_t *reqlock = NULL;
posix_lock_t *conf = NULL;
int ret = 0;
+ uint32_t lk_flags = 0;
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = dict_get_uint32 (xdata, "lkmode", &lk_flags);
+ if (ret == 0) {
+ if (priv->mandatory_mode == MLK_NONE)
+ gf_log (this->name, GF_LOG_DEBUG, "Lock flags received "
+ "in a non-mandatory locking environment, "
+ "continuing");
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "Lock flags received, "
+ "continuing");
+ }
if ((flock->l_start < 0) ||
((flock->l_start + flock->l_len) < 0)) {
@@ -1880,7 +2268,7 @@ pl_lk (call_frame_t *frame, xlator_t *this,
}
reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid,
- &frame->root->lk_owner, fd);
+ &frame->root->lk_owner, fd, lk_flags);
if (!reqlock) {
op_ret = -1;
@@ -2856,7 +3244,7 @@ init (xlator_t *this)
{
posix_locks_private_t *priv = NULL;
xlator_list_t *trav = NULL;
- data_t *mandatory = NULL;
+ char *tmp_str = NULL;
int ret = -1;
if (!this->children || this->children->next) {
@@ -2884,10 +3272,16 @@ init (xlator_t *this)
priv = GF_CALLOC (1, sizeof (*priv),
gf_locks_mt_posix_locks_private_t);
- mandatory = dict_get (this->options, "mandatory-locks");
- if (mandatory)
- gf_log (this->name, GF_LOG_WARNING,
- "mandatory locks not supported in this minor release.");
+ GF_OPTION_INIT ("mandatory-locking", tmp_str, str, out);
+ if (!strcmp (tmp_str, "forced"))
+ priv->mandatory_mode = MLK_FORCED;
+ else if (!strcmp (tmp_str, "file"))
+ priv->mandatory_mode = MLK_FILE_BASED;
+ else if (!strcmp (tmp_str, "optimal"))
+ priv->mandatory_mode = MLK_OPTIMAL;
+ else
+ priv->mandatory_mode = MLK_NONE;
+ tmp_str = NULL;
GF_OPTION_INIT ("trace", priv->trace, bool, out);
@@ -3095,6 +3489,8 @@ struct xlator_fops fops = {
.fstat = pl_fstat,
.truncate = pl_truncate,
.ftruncate = pl_ftruncate,
+ .discard = pl_discard,
+ .zerofill = pl_zerofill,
.open = pl_open,
.readv = pl_readv,
.writev = pl_writev,
@@ -3126,13 +3522,21 @@ struct xlator_cbks cbks = {
.client_disconnect = pl_client_disconnect_cbk,
};
-
struct volume_options options[] = {
- { .key = { "mandatory-locks", "mandatory" },
- .type = GF_OPTION_TYPE_BOOL
+ { .key = { "mandatory-locking" },
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "off",
+ .description = "Specifies the mandatory-locking mode. Valid options "
+ "are 'file' to use linux style mandatory locks, "
+ "'forced' to use volume striclty under mandatory lock "
+ "semantics only and 'optimal' to treat advisory and "
+ "mandatory locks separately on their own."
},
{ .key = { "trace" },
- .type = GF_OPTION_TYPE_BOOL
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Trace the different lock requests "
+ "to logs."
},
{ .key = {NULL} },
};