summaryrefslogtreecommitdiffstats
path: root/xlators/features/locks/src
diff options
context:
space:
mode:
authorVikas Gorur <vikas@zresearch.com>2009-02-18 17:36:07 +0530
committerVikas Gorur <vikas@zresearch.com>2009-02-18 17:36:07 +0530
commit77adf4cd648dce41f89469dd185deec6b6b53a0b (patch)
tree02e155a5753b398ee572b45793f889b538efab6b /xlators/features/locks/src
parentf3b2e6580e5663292ee113c741343c8a43ee133f (diff)
Added all files
Diffstat (limited to 'xlators/features/locks/src')
-rw-r--r--xlators/features/locks/src/Makefile.am20
-rw-r--r--xlators/features/locks/src/common.c561
-rw-r--r--xlators/features/locks/src/common.h59
-rw-r--r--xlators/features/locks/src/internal.c762
-rw-r--r--xlators/features/locks/src/locks.h111
-rw-r--r--xlators/features/locks/src/posix.c834
6 files changed, 2347 insertions, 0 deletions
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
new file mode 100644
index 00000000000..ec4a953eb91
--- /dev/null
+++ b/xlators/features/locks/src/Makefile.am
@@ -0,0 +1,20 @@
+xlator_LTLIBRARIES = locks.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+locks_la_LDFLAGS = -module -avoidversion
+
+locks_la_SOURCES = common.c posix.c internal.c
+locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = locks.h common.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+
+CLEANFILES =
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
+
+install-data-hook:
+ ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so \ No newline at end of file
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
new file mode 100644
index 00000000000..9ac1250cc57
--- /dev/null
+++ b/xlators/features/locks/src/common.c
@@ -0,0 +1,561 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+
+
+int
+pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom);
+static void
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom);
+
+
+pl_inode_t *
+pl_inode_get (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ mode_t st_mode = 0;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto out;
+ }
+
+ pl_inode = CALLOC (1, sizeof (*pl_inode));
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ st_mode = inode->st_mode;
+ if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP))
+ pl_inode->mandatory = 1;
+
+
+ pthread_mutex_init (&pl_inode->mutex, NULL);
+
+ INIT_LIST_HEAD (&pl_inode->dir_list);
+ INIT_LIST_HEAD (&pl_inode->ext_list);
+ INIT_LIST_HEAD (&pl_inode->int_list);
+ INIT_LIST_HEAD (&pl_inode->rw_list);
+
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)pl_inode);
+ }
+out:
+ UNLOCK (&inode->lock);
+ return pl_inode;
+}
+
+
+/* Create a new posix_lock_t */
+posix_lock_t *
+new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid)
+{
+ posix_lock_t *lock = NULL;
+
+ lock = CALLOC (1, sizeof (posix_lock_t));
+ if (!lock) {
+ return NULL;
+ }
+
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
+
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ lock->transport = transport;
+ lock->client_pid = client_pid;
+
+ INIT_LIST_HEAD (&lock->list);
+
+ return lock;
+}
+
+
+/* Delete a lock from the inode's lock list */
+void
+__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ list_del_init (&lock->list);
+}
+
+
+/* Destroy a posix_lock */
+void
+__destroy_lock (posix_lock_t *lock)
+{
+ free (lock);
+}
+
+
+/* Convert a posix_lock to a struct flock */
+void
+posix_lock_to_flock (posix_lock_t *lock, struct flock *flock)
+{
+ flock->l_pid = lock->client_pid;
+ flock->l_type = lock->fl_type;
+ flock->l_start = lock->fl_start;
+
+ if (lock->fl_end == 0)
+ flock->l_len = LLONG_MAX;
+ else
+ flock->l_len = lock->fl_end - lock->fl_start + 1;
+}
+
+
+/* Insert the lock into the inode's lock list */
+void
+pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+{
+ list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom));
+
+ return;
+}
+
+
+/* Return true if the locks overlap, false otherwise */
+int
+locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
+{
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
+}
+
+
+/* Return true if the locks have the same owner */
+int
+same_owner (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return ((l1->client_pid == l2->client_pid) &&
+ (l1->transport == l2->transport));
+}
+
+
+/* Delete all F_UNLCK locks */
+void
+__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+}
+
+
+/* Add two locks */
+static posix_lock_t *
+add_locks (posix_lock_t *l1, posix_lock_t *l2)
+{
+ posix_lock_t *sum = NULL;
+
+ sum = CALLOC (1, sizeof (posix_lock_t));
+ if (!sum)
+ return NULL;
+
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
+
+ return sum;
+}
+
+/* Subtract two locks */
+struct _values {
+ posix_lock_t *locks[3];
+};
+
+/* {big} must always be contained inside {small} */
+static struct _values
+subtract_locks (posix_lock_t *big, posix_lock_t *small)
+{
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ }
+ else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[1]);
+ v.locks[2] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[2]);
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ memcpy (v.locks[2], big, sizeof (posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ }
+ /* one edge coincides with big */
+ else if (small->fl_start == big->fl_start) {
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ }
+ else if (small->fl_end == big->fl_end) {
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ }
+ else {
+ gf_log ("posix-locks", GF_LOG_DEBUG,
+ "unexpected case in subtract_locks");
+ }
+
+ return v;
+}
+
+/*
+ Start searching from {begin}, and return the first lock that
+ conflicts, NULL if no conflict
+ If {begin} is NULL, then start from the beginning of the list
+*/
+static posix_lock_t *
+first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom)
+{
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (l->blocked)
+ continue;
+
+ if (locks_overlap (l, lock))
+ return l;
+ }
+
+ return NULL;
+}
+
+
+
+/* Return true if lock is grantable */
+int
+pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom)
+{
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (!l->blocked && locks_overlap (lock, l)) {
+ if (((l->fl_type == F_WRLCK)
+ || (lock->fl_type == F_WRLCK))
+ && (lock->fl_type != F_UNLCK)
+ && !same_owner (l, lock)) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ return ret;
+}
+
+
+extern void do_blocked_rw (pl_inode_t *);
+
+
+static void
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom)
+{
+ posix_lock_t *conf = NULL;
+ posix_lock_t *t = NULL;
+ posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = { .locks = {0, 0, 0} };
+
+ list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (!locks_overlap (conf, lock))
+ continue;
+
+ if (same_owner (conf, lock)) {
+ if (conf->fl_type == lock->fl_type) {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = lock->fl_type;
+ sum->transport = lock->transport;
+ sum->client_pid = lock->client_pid;
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __destroy_lock (lock);
+ __insert_and_merge (pl_inode, sum, dom);
+
+ return;
+ } else {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = conf->fl_type;
+ sum->transport = conf->transport;
+ sum->client_pid = conf->client_pid;
+
+ v = subtract_locks (sum, lock);
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __delete_lock (pl_inode, lock);
+ __destroy_lock (lock);
+
+ __destroy_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ if (v.locks[i]->fl_type == F_UNLCK) {
+ __destroy_lock (v.locks[i]);
+ continue;
+ }
+ __insert_and_merge (pl_inode,
+ v.locks[i], dom);
+ }
+
+ __delete_unlck_locks (pl_inode, dom);
+ return;
+ }
+ }
+
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
+
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ pl_insert_lock (pl_inode, lock, dom);
+ return;
+ }
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ pl_insert_lock (pl_inode, lock, dom);
+ } else {
+ __destroy_lock (lock);
+ }
+}
+
+
+void
+__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode,
+ gf_lk_domain_t dom, struct list_head *granted)
+{
+ struct list_head tmp_list;
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *conf = NULL;
+
+ INIT_LIST_HEAD (&tmp_list);
+
+ list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (l->blocked) {
+ conf = first_overlap (pl_inode, l, dom);
+ if (conf)
+ continue;
+
+ l->blocked = 0;
+ list_move_tail (&l->list, &tmp_list);
+ }
+ }
+
+ list_for_each_entry_safe (l, tmp, &tmp_list, list) {
+ list_del_init (&l->list);
+
+ if (pl_is_lock_grantable (pl_inode, l, dom)) {
+ conf = CALLOC (1, sizeof (*conf));
+
+ if (!conf) {
+ l->blocked = 1;
+ pl_insert_lock (pl_inode, l, dom);
+ continue;
+ }
+
+ conf->frame = l->frame;
+ l->frame = NULL;
+
+ posix_lock_to_flock (l, &conf->user_flock);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => Granted",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ l->client_pid,
+ l->user_flock.l_start,
+ l->user_flock.l_len);
+
+ __insert_and_merge (pl_inode, l, dom);
+
+ list_add (&conf->list, granted);
+ } else {
+ l->blocked = 1;
+ pl_insert_lock (pl_inode, l, dom);
+ }
+ }
+}
+
+
+void
+grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom)
+{
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
+
+ INIT_LIST_HEAD (&granted_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_locks (this, pl_inode, dom, &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted_list, list) {
+ list_del_init (&lock->list);
+
+ STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
+
+ FREE (lock);
+ }
+
+ return;
+}
+
+
+int
+pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block, gf_lk_domain_t dom)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (pl_is_lock_grantable (pl_inode, lock, dom)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ __insert_and_merge (pl_inode, lock, dom);
+ } else if (can_block) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ lock->blocked = 1;
+ pl_insert_lock (pl_inode, lock, dom);
+ ret = -1;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ errno = EAGAIN;
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_locks (this, pl_inode, dom);
+
+ do_blocked_rw (pl_inode);
+
+ return ret;
+}
+
+
+posix_lock_t *
+pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+{
+ posix_lock_t *conf = NULL;
+
+ conf = first_overlap (pl_inode, lock, dom);
+
+ if (conf == NULL) {
+ lock->fl_type = F_UNLCK;
+ return lock;
+ }
+
+ return conf;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
new file mode 100644
index 00000000000..135f33011bf
--- /dev/null
+++ b/xlators/features/locks/src/common.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+posix_lock_t *
+new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid);
+
+pl_inode_t *
+pl_inode_get (xlator_t *this, inode_t *inode);
+
+posix_lock_t *
+pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain);
+
+int
+pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock,
+ int can_block, gf_lk_domain_t domain);
+
+int
+pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom);
+
+void
+pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom);
+
+void
+grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain);
+
+void
+posix_lock_to_flock (posix_lock_t *lock, struct flock *flock);
+
+int
+locks_overlap (posix_lock_t *l1, posix_lock_t *l2);
+
+int
+same_owner (posix_lock_t *l1, posix_lock_t *l2);
+
+void __delete_lock (pl_inode_t *, posix_lock_t *);
+
+void __destroy_lock (posix_lock_t *);
+
+#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/internal.c b/xlators/features/locks/src/internal.c
new file mode 100644
index 00000000000..7f454a78e22
--- /dev/null
+++ b/xlators/features/locks/src/internal.c
@@ -0,0 +1,762 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+
+
+static int
+delete_locks_of_transport (pl_inode_t *pinode, transport_t *trans)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry_safe (l, tmp, &pinode->dir_list, list) {
+ if (l->transport == trans) {
+ __delete_lock (pinode, tmp);
+ __destroy_lock (tmp);
+ }
+ }
+
+ return 0;
+}
+
+
+static posix_lock_t *
+__find_exact_matching_lock (pl_inode_t *pinode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *match = NULL;
+
+ list_for_each_entry (l, DOMAIN_HEAD (pinode, GF_LOCK_INTERNAL), list) {
+ if (same_owner (l, lock)
+ && (l->fl_start == lock->fl_start)
+ && (l->fl_end == lock->fl_end)) {
+ match = l;
+ break;
+ }
+ }
+
+ return match;
+}
+
+/**
+ * pl_inodelk:
+ *
+ * This fop provides fcntl-style locking on files for internal
+ * purposes. Locks held through this fop reside in a domain different
+ * from those held by applications. This fop is for the use of AFR.
+ */
+
+
+static int
+pl_inodelk_common (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t cmd, struct flock *flock)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int can_block = 0;
+
+ posix_locks_private_t * priv = NULL;
+ transport_t * transport = NULL;
+ pid_t client_pid = -1;
+ pl_inode_t * pinode = NULL;
+
+ posix_lock_t * reqlock = NULL;
+ posix_lock_t * matchlock = NULL; /* steady, fire! */
+
+ VALIDATE_OR_GOTO (frame, unwind);
+ VALIDATE_OR_GOTO (inode, unwind);
+ VALIDATE_OR_GOTO (flock, unwind);
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ transport = frame->root->trans;
+ client_pid = frame->root->pid;
+
+ priv = (posix_locks_private_t *) this->private;
+
+ VALIDATE_OR_GOTO (priv, unwind);
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (client_pid == 0) {
+ /*
+ special case: this means release all locks
+ from this transport
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing all locks from transport %p", transport);
+
+ delete_locks_of_transport (pinode, transport);
+ goto unwind;
+ }
+
+ reqlock = new_posix_lock (flock, transport, client_pid);
+ if (!reqlock) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ switch (cmd) {
+ case F_SETLKW:
+ can_block = 1;
+ reqlock->frame = frame;
+ reqlock->this = this;
+
+ /* fall through */
+
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
+
+ switch (flock->l_type) {
+
+ case F_WRLCK:
+ if (!pl_is_lock_grantable (pinode, reqlock, GF_LOCK_INTERNAL)) {
+ if (can_block) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => blocked",
+ reqlock->fl_type == F_UNLCK ? "unlock" : "lock",
+ reqlock->client_pid,
+ reqlock->user_flock.l_start,
+ reqlock->user_flock.l_len);
+ pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL);
+
+ goto unlock;
+ }
+
+ __destroy_lock (reqlock);
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => NOK",
+ reqlock->fl_type == F_UNLCK ? "unlock" : "lock",
+ reqlock->client_pid, reqlock->user_flock.l_start,
+ reqlock->user_flock.l_len);
+ op_errno = EAGAIN;
+
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => OK",
+ reqlock->fl_type == F_UNLCK ? "unlock" : "lock",
+ reqlock->client_pid,
+ reqlock->user_flock.l_start,
+ reqlock->user_flock.l_len);
+ pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL);
+
+ break;
+
+ case F_UNLCK:
+ matchlock = __find_exact_matching_lock (pinode, reqlock);
+
+ __destroy_lock (reqlock);
+ if (!matchlock) {
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ __delete_lock (pinode, matchlock);
+ __destroy_lock (matchlock);
+
+ break;
+
+ default:
+ op_errno = ENOTSUP;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lock type %d not supported for [F]INODELK",
+ flock->l_type);
+ goto unlock;
+ }
+
+
+ break;
+
+ default:
+ op_errno = ENOTSUP;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lock command F_GETLK not supported for [F]INODELK (cmd=%d)",
+ cmd);
+ goto unlock;
+ }
+
+ op_ret = 0;
+
+ unlock:
+ if (pinode)
+ pthread_mutex_unlock (&pinode->mutex);
+ }
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *flock)
+{
+ return pl_inodelk_common (frame, this, loc->inode, cmd, flock);
+}
+
+
+int
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *flock)
+{
+ return pl_inodelk_common (frame, this, fd->inode, cmd, flock);
+}
+
+
+/**
+ * types_conflict - do two types of lock conflict?
+ * @t1: type
+ * @t2: type
+ *
+ * two read locks do not conflict
+ * any other case conflicts
+ */
+
+static int
+types_conflict (entrylk_type t1, entrylk_type t2)
+{
+ return !((t1 == ENTRYLK_RDLCK) && (t2 == ENTRYLK_RDLCK));
+}
+
+/**
+ * all_names - does a basename represent all names?
+ * @basename: name to check
+ */
+
+#define all_names(basename) ((basename == NULL) ? 1 : 0)
+
+/**
+ * names_conflict - do two names conflict?
+ * @n1: name
+ * @n2: name
+ */
+
+static int
+names_conflict (const char *n1, const char *n2)
+{
+ return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
+}
+
+
+static int
+names_equal (const char *n1, const char *n2)
+{
+ return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
+}
+
+/**
+ * lock_grantable - is this lock grantable?
+ * @inode: inode in which to look
+ * @basename: name we're trying to lock
+ * @type: type of lock
+ */
+
+static pl_entry_lock_t *
+__lock_grantable (pl_inode_t *pinode, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ if (list_empty (&pinode->dir_list))
+ return NULL;
+
+ list_for_each_entry (lock, &pinode->dir_list, inode_list) {
+ if (names_conflict (lock->basename, basename) &&
+ types_conflict (lock->type, type))
+ return lock;
+ }
+
+ return NULL;
+}
+
+/**
+ * find_most_matching_lock - find the lock struct which most matches in order of:
+ * lock on the exact basename ||
+ * an all_names lock
+ *
+ *
+ * @inode: inode in which to look
+ * @basename: name to search for
+ */
+
+static pl_entry_lock_t *
+__find_most_matching_lock (pl_inode_t *pinode, const char *basename)
+{
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *all = NULL;
+ pl_entry_lock_t *exact = NULL;
+
+ if (list_empty (&pinode->dir_list))
+ return NULL;
+
+ list_for_each_entry (lock, &pinode->dir_list, inode_list) {
+ if (all_names (lock->basename))
+ all = lock;
+ else if (names_equal (lock->basename, basename))
+ exact = lock;
+ }
+
+ return (exact ? exact : all);
+}
+
+
+/**
+ * insert_new_lock - insert a new dir lock into the inode with the given parameters
+ * @pinode: inode to insert into
+ * @basename: basename for the lock
+ * @type: type of the lock
+ */
+
+static pl_entry_lock_t *
+new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ transport_t *trans)
+{
+ pl_entry_lock_t *newlock = NULL;
+
+ newlock = CALLOC (sizeof (pl_entry_lock_t), 1);
+ if (!newlock) {
+ goto out;
+ }
+
+ newlock->basename = basename ? strdup (basename) : NULL;
+ newlock->type = type;
+ newlock->trans = trans;
+
+ if (type == ENTRYLK_RDLCK)
+ newlock->read_count = 1;
+
+ INIT_LIST_HEAD (&newlock->inode_list);
+ INIT_LIST_HEAD (&newlock->blocked_locks);
+
+out:
+ return newlock;
+}
+
+/**
+ * lock_name - lock a name in a directory
+ * @inode: inode for the directory in which to lock
+ * @basename: name of the entry to lock
+ * if null, lock the entire directory
+ *
+ * the entire directory being locked is represented as: a single
+ * pl_entry_lock_t present in the entrylk_locks list with its
+ * basename = NULL
+ */
+
+int
+__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ call_frame_t *frame, xlator_t *this, int nonblock)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *conf = NULL;
+
+ transport_t *trans = NULL;
+
+ int ret = -EINVAL;
+
+ trans = frame->root->trans;
+
+ conf = __lock_grantable (pinode, basename, type);
+ if (conf) {
+ ret = -EAGAIN;
+ if (nonblock)
+ goto out;
+
+ lock = new_entrylk_lock (pinode, basename, type, trans);
+
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "blocking lock: {pinode=%p, basename=%s}",
+ pinode, basename);
+
+ lock->frame = frame;
+ lock->this = this;
+ lock->blocked = 1;
+
+ list_add (&lock->blocked_locks, &conf->blocked_locks);
+
+
+ goto out;
+ }
+
+ switch (type) {
+ case ENTRYLK_RDLCK:
+ lock = __find_most_matching_lock (pinode, basename);
+
+ if (lock && names_equal (lock->basename, basename)) {
+ lock->read_count++;
+
+ FREE (lock->basename);
+ FREE (lock);
+
+ lock = NULL;
+ } else {
+ lock = new_entrylk_lock (pinode, basename, type, trans);
+
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add (&lock->inode_list, &pinode->dir_list);
+ }
+ break;
+
+ case ENTRYLK_WRLCK:
+ lock = new_entrylk_lock (pinode, basename, type, trans);
+
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add (&lock->inode_list, &pinode->dir_list);
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/**
+ * unlock_name - unlock a name in a directory
+ * @inode: inode for the directory to unlock in
+ * @basename: name of the entry to unlock
+ * if null, unlock the entire directory
+ */
+
+pl_entry_lock_t *
+__unlock_name (pl_inode_t *pinode, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *ret_lock = NULL;
+
+ lock = __find_most_matching_lock (pinode, basename);
+
+ if (!lock) {
+ gf_log ("locks", GF_LOG_DEBUG,
+ "unlock on %s (type=%s) attempted but no matching lock found",
+ basename, type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK");
+ goto out;
+ }
+
+ if (names_equal (lock->basename, basename)
+ && lock->type == type) {
+ if (type == ENTRYLK_RDLCK) {
+ lock->read_count--;
+ }
+ if (type == ENTRYLK_WRLCK || lock->read_count == 0) {
+ list_del (&lock->inode_list);
+ ret_lock = lock;
+ }
+ } else {
+ gf_log ("locks", GF_LOG_ERROR,
+ "unlock for a non-existing lock!");
+ goto out;
+ }
+
+out:
+ return ret_lock;
+}
+
+
+void
+__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *lock,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ pl_entry_lock_t *bl = NULL;
+ pl_entry_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (bl, tmp, &lock->blocked_locks,
+ blocked_locks) {
+ list_del_init (&bl->blocked_locks);
+
+ /* TODO: error checking */
+
+ gf_log ("locks", GF_LOG_DEBUG,
+ "trying to unblock: {pinode=%p, basename=%s}",
+ pl_inode, bl->basename);
+
+ bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
+ bl->frame, bl->this, 0);
+
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
+ } else {
+ if (bl->basename)
+ FREE (bl->basename);
+ FREE (bl);
+ }
+ }
+ return;
+}
+
+
+void
+grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *unlocked)
+{
+ struct list_head granted_list;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lock = NULL;
+
+ INIT_LIST_HEAD (&granted_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_entry_locks (this, pl_inode, unlocked,
+ &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND (lock->frame, 0, 0);
+
+ FREE (lock->basename);
+ FREE (lock);
+ }
+
+ FREE (unlocked->basename);
+ FREE (unlocked);
+
+ return;
+}
+
+
+/**
+ * release_entry_locks_for_transport: release all entry locks from this
+ * transport for this loc_t
+ */
+
+static int
+release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
+ transport_t *trans)
+{
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *tmp;
+ struct list_head granted;
+
+ INIT_LIST_HEAD (&granted);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ if (list_empty (&pinode->dir_list)) {
+ goto unlock;
+ }
+
+ list_for_each_entry_safe (lock, tmp, &pinode->dir_list,
+ inode_list) {
+ if (lock->trans != trans)
+ continue;
+
+ list_del_init (&lock->inode_list);
+ __grant_blocked_entry_locks (this, pinode, lock,
+ &granted);
+
+ FREE (lock->basename);
+ FREE (lock);
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&pinode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND (lock->frame, 0, 0);
+
+ FREE (lock->basename);
+ FREE (lock);
+ }
+
+ return 0;
+}
+
+
+/**
+ * pl_entrylk:
+ *
+ * Locking on names (directory entries)
+ */
+
+int
+pl_entrylk_common (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ transport_t * transport = NULL;
+ pid_t pid = -1;
+
+ pl_inode_t * pinode = NULL;
+ int ret = -1;
+ pl_entry_lock_t *unlocked = NULL;
+ char unwind = 1;
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pid = frame->root->pid;
+ transport = frame->root->trans;
+
+ if (pid == 0) {
+ /*
+ this is a special case that means release
+ all locks from this transport
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing locks for transport %p", transport);
+
+ release_entry_locks_for_transport (this, pinode, transport);
+ op_ret = 0;
+
+ goto out;
+ }
+
+ switch (cmd) {
+ case ENTRYLK_LOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, this, 0);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ unwind = 0;
+ op_errno = -ret;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_LOCK_NB:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, this, 1);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_UNLOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ unlocked = __unlock_name (pinode, basename, type);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (unlocked)
+ grant_blocked_entry_locks (this, pinode, unlocked);
+
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unexpected case!");
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (unwind) {
+ STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+
+int
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ return pl_entrylk_common (frame, this, loc->inode, basename, cmd, type);
+}
+
+
+int
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ return pl_entrylk_common (frame, this, fd->inode, basename, cmd, type);
+}
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
new file mode 100644
index 00000000000..8ed7bb63f1c
--- /dev/null
+++ b/xlators/features/locks/src/locks.h
@@ -0,0 +1,111 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __POSIX_LOCKS_H__
+#define __POSIX_LOCKS_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "compat-errno.h"
+#include "transport.h"
+#include "stack.h"
+#include "call-stub.h"
+
+struct __pl_fd;
+
+struct __posix_lock {
+ struct list_head list;
+
+ short fl_type;
+ off_t fl_start;
+ off_t fl_end;
+
+ short blocked; /* waiting to acquire */
+ struct flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ fd_t *fd;
+
+ call_frame_t *frame;
+
+ /* These two together serve to uniquely identify each process
+ across nodes */
+
+ transport_t *transport; /* to identify client node */
+ pid_t client_pid; /* pid of client process */
+};
+typedef struct __posix_lock posix_lock_t;
+
+struct __pl_rw_req_t {
+ struct list_head list;
+ call_stub_t *stub;
+ posix_lock_t region;
+};
+typedef struct __pl_rw_req_t pl_rw_req_t;
+
+
+struct __entry_lock {
+ struct list_head inode_list; /* list_head back to pl_inode_t */
+ struct list_head blocked_locks; /* locks blocked due to this lock */
+
+ call_frame_t *frame;
+ xlator_t *this;
+ int blocked;
+
+ const char *basename;
+ entrylk_type type;
+ unsigned int read_count; /* number of read locks */
+ transport_t *trans;
+};
+typedef struct __entry_lock pl_entry_lock_t;
+
+
+/* The "simulated" inode. This contains a list of all the locks associated
+ with this file */
+
+struct __pl_inode {
+ pthread_mutex_t mutex;
+
+ struct list_head dir_list; /* list of entry locks */
+ struct list_head ext_list; /* list of fcntl locks */
+ struct list_head int_list; /* list of internal locks */
+ struct list_head rw_list; /* list of waiting r/w requests */
+ int mandatory; /* if mandatory locking is enabled */
+};
+typedef struct __pl_inode pl_inode_t;
+
+#define DOMAIN_HEAD(pl_inode, dom) (dom == GF_LOCK_POSIX \
+ ? &pl_inode->ext_list \
+ : &pl_inode->int_list)
+
+
+struct __pl_fd {
+ gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
+};
+typedef struct __pl_fd pl_fd_t;
+
+
+typedef struct {
+ gf_boolean_t mandatory; /* if mandatory locking is enabled */
+} posix_locks_private_t;
+
+
+#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
new file mode 100644
index 00000000000..e2b336607c4
--- /dev/null
+++ b/xlators/features/locks/src/posix.c
@@ -0,0 +1,834 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+#include "common.h"
+
+#ifndef LLONG_MAX
+#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
+#endif /* LLONG_MAX */
+
+/* Forward declarations */
+
+
+void do_blocked_rw (pl_inode_t *);
+static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
+
+struct _truncate_ops {
+ loc_t loc;
+ fd_t *fd;
+ off_t offset;
+ enum {TRUNCATE, FTRUNCATE} op;
+};
+
+
+int
+pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct _truncate_ops *local = NULL;
+
+ local = frame->local;
+
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+static int
+truncate_allowed (pl_inode_t *pl_inode,
+ transport_t *transport, pid_t client_pid,
+ off_t offset)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int ret = 1;
+
+ region.fl_start = offset;
+ region.fl_end = LLONG_MAX;
+ region.transport = transport;
+ region.client_pid = client_pid;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (!l->blocked
+ && locks_overlap (&region, l)
+ && !same_owner (&region, l)) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return ret;
+}
+
+
+static int
+truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ posix_locks_private_t *priv = NULL;
+ struct _truncate_ops *local = NULL;
+ inode_t *inode = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got error (errno=%d, stderror=%s) from child",
+ op_errno, strerror (op_errno));
+ goto unwind;
+ }
+
+ if (local->op == TRUNCATE)
+ inode = local->loc.inode;
+ else
+ inode = local->fd->inode;
+
+ pl_inode = pl_inode_get (this, inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to get pl_inode from %p", inode);
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (priv->mandatory
+ && pl_inode->mandatory
+ && !truncate_allowed (pl_inode, frame->root->trans,
+ frame->root->pid, local->offset)) {
+ op_errno = EAGAIN;
+ goto unwind;
+ }
+
+ switch (local->op) {
+ case TRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ &local->loc, local->offset);
+ break;
+ case FTRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ local->fd, local->offset);
+ break;
+ }
+
+ return 0;
+
+unwind:
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
+
+ STACK_UNWIND (frame, -1, ENOMEM, buf);
+ return 0;
+}
+
+
+int
+pl_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset)
+{
+ struct _truncate_ops *local = NULL;
+
+ local = CALLOC (1, sizeof (struct _truncate_ops));
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto unwind;
+ }
+
+ local->op = TRUNCATE;
+ local->offset = offset;
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+int
+pl_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ struct _truncate_ops *local = NULL;
+
+ local = CALLOC (1, sizeof (struct _truncate_ops));
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto unwind;
+ }
+
+ local->op = FTRUNCATE;
+ local->offset = offset;
+ local->fd = fd;
+
+ frame->local = local;
+
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd);
+ return 0;
+
+unwind:
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+static void
+__delete_locks_of_owner (pl_inode_t *pl_inode,
+ transport_t *transport, pid_t pid)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ /* TODO: what if it is a blocked lock with pending l->frame */
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if ((l->transport == transport)
+ && (l->client_pid == pid)) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) {
+ if ((l->transport == transport)
+ && (l->client_pid == pid)) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+
+ return;
+}
+
+
+int
+pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+
+int
+pl_flush (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __delete_locks_of_owner (pl_inode, frame->root->trans,
+ frame->root->pid);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX);
+ grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL);
+
+ do_blocked_rw (pl_inode);
+
+ STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd);
+ return 0;
+}
+
+
+int
+pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}
+
+
+int
+pl_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd)
+{
+ /* why isn't O_TRUNC being handled ? */
+ STACK_WIND (frame, pl_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags & ~O_TRUNC, fd);
+
+ return 0;
+}
+
+
+int
+pl_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+
+ return 0;
+}
+
+
+int
+pl_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ STACK_WIND (frame, pl_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+
+int
+pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+
+ return 0;
+}
+
+int
+pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+
+ return 0;
+}
+
+
+void
+do_blocked_rw (pl_inode_t *pl_inode)
+{
+ struct list_head wind_list;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&wind_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) {
+ if (__rw_allowable (pl_inode, &rw->region,
+ rw->stub->fop)) {
+ list_del_init (&rw->list);
+ list_add_tail (&rw->list, &wind_list);
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (rw, tmp, &wind_list, list) {
+ list_del_init (&rw->list);
+ call_resume (rw->stub);
+ free (rw);
+ }
+
+ return;
+}
+
+
+static int
+__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
+ glusterfs_fop_t op)
+{
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (locks_overlap (l, region) && !same_owner (l, region)) {
+ if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
+ continue;
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+
+int
+pl_readv_cont (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ STACK_WIND (frame, pl_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset);
+
+ return 0;
+}
+
+
+int
+pl_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char allowable = 0;
+
+
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + size - 1;
+ region.transport = frame->root->trans;
+ region.client_pid = frame->root->pid;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowable = __rw_allowable (pl_inode, &region,
+ GF_FOP_READ);
+ if (allowable)
+ goto unlock;
+
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "returning EWOULDBLOCK");
+ op_errno = EWOULDBLOCK;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = CALLOC (1, sizeof (*rw));
+ if (!rw) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_readv_stub (frame, pl_readv_cont,
+ fd, size, offset);
+ if (!rw->stub) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ free (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ goto unwind;
+ }
+
+
+ STACK_WIND (frame, pl_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset);
+ return 0;
+
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int
+pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t offset)
+{
+ STACK_WIND (frame, pl_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset);
+
+ return 0;
+}
+
+
+int
+pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char allowable = 0;
+
+
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + iov_length (vector, count) - 1;
+ region.transport = frame->root->trans;
+ region.client_pid = frame->root->pid;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowable = __rw_allowable (pl_inode, &region,
+ GF_FOP_WRITE);
+ if (allowable)
+ goto unlock;
+
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "returning EWOULDBLOCK");
+ op_errno = EWOULDBLOCK;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = CALLOC (1, sizeof (*rw));
+ if (!rw) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_writev_stub (frame, pl_writev_cont,
+ fd, vector, count, offset);
+ if (!rw->stub) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ free (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ goto unwind;
+ }
+
+
+ STACK_WIND (frame, pl_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset);
+ return 0;
+
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int
+pl_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *flock)
+{
+ transport_t *transport = NULL;
+ pid_t client_pid = 0;
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ transport = frame->root->trans;
+ client_pid = frame->root->pid;
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ reqlock = new_posix_lock (flock, transport, client_pid);
+ if (!reqlock) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ switch (cmd) {
+
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX);
+ posix_lock_to_flock (conf, flock);
+ __destroy_lock (reqlock);
+
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ can_block = 1;
+ reqlock->frame = frame;
+ reqlock->this = this;
+ reqlock->fd = fd;
+
+ /* fall through */
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
+ ret = pl_setlk (this, pl_inode, reqlock,
+ can_block, GF_LOCK_POSIX);
+
+ if (ret == -1) {
+ if (can_block)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock (reqlock);
+ }
+ }
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, flock);
+out:
+ return 0;
+}
+
+
+/* TODO: this function just logs, no action required?? */
+int
+pl_forget (xlator_t *this,
+ inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+
+ pl_inode = pl_inode_get (this, inode);
+
+ if (!list_empty (&pl_inode->rw_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "pending R/W requests found!");
+ }
+
+ if (!list_empty (&pl_inode->ext_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Pending fcntl locks found!");
+ }
+
+ if (!list_empty (&pl_inode->int_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Pending internal locks found!");
+ }
+
+ if (!list_empty (&pl_inode->dir_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Pending entry locks found!");
+ }
+
+ FREE (pl_inode);
+
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ posix_locks_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ data_t *mandatory = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: posix-locks should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ trav = this->children;
+ while (trav->xlator->children)
+ trav = trav->xlator->children;
+
+ if (strncmp ("storage/", trav->xlator->type, 8)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'posix-locks' not loaded over storage translator");
+ return -1;
+ }
+
+ priv = CALLOC (1, sizeof (*priv));
+
+ mandatory = dict_get (this->options, "mandatory-locks");
+ if (mandatory) {
+ if (gf_string2boolean (mandatory->data,
+ &priv->mandatory) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'mandatory-locks' takes only boolean "
+ "options");
+ return -1;
+ }
+ }
+
+ this->private = priv;
+ return 0;
+}
+
+
+int
+fini (xlator_t *this)
+{
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+ free (priv);
+
+ return 0;
+}
+
+
+int
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *flock);
+
+int
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *flock);
+
+int
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+int
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+struct xlator_fops fops = {
+ .create = pl_create,
+ .truncate = pl_truncate,
+ .ftruncate = pl_ftruncate,
+ .open = pl_open,
+ .readv = pl_readv,
+ .writev = pl_writev,
+ .lk = pl_lk,
+ .inodelk = pl_inodelk,
+ .finodelk = pl_finodelk,
+ .entrylk = pl_entrylk,
+ .fentrylk = pl_fentrylk,
+ .flush = pl_flush,
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = pl_forget,
+};
+
+
+struct volume_options options[] = {
+ { .key = { "mandatory-locks", "mandatory" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};