summaryrefslogtreecommitdiffstats
path: root/xlators/features/locks/src/inodelk.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/features/locks/src/inodelk.c')
-rw-r--r--xlators/features/locks/src/inodelk.c1522
1 files changed, 976 insertions, 546 deletions
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 0017e3ed23b..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -1,744 +1,1174 @@
/*
- Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
+#include "clear.h"
#include "common.h"
void
-__delete_inode_lock (pl_inode_lock_t *lock)
+__delete_inode_lock(pl_inode_lock_t *lock)
{
- list_del (&lock->list);
+ list_del_init(&lock->list);
+}
+
+static void
+__pl_inodelk_ref(pl_inode_lock_t *lock)
+{
+ lock->ref++;
}
void
-__destroy_inode_lock (pl_inode_lock_t *lock)
+__pl_inodelk_unref(pl_inode_lock_t *lock)
{
- FREE (lock);
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE(lock->connection_id);
+ GF_FREE(lock);
+ }
}
-/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */
+/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't
+ * conflict */
static int
-inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+inodelk_type_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
- return 1;
+ if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
+ return 1;
- return 0;
+ return 0;
}
void
-pl_print_inodelk (char *str, int size, int cmd, struct flock *flock, const char *domain)
+pl_print_inodelk(char *str, int size, int cmd, struct gf_flock *flock,
+ const char *domain)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
- switch (cmd) {
+ switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
#endif
case F_GETLK:
- cmd_str = "GETLK";
- break;
+ cmd_str = "GETLK";
+ break;
#if F_SETLK != F_SETLK64
case F_SETLK64:
#endif
case F_SETLK:
- cmd_str = "SETLK";
- break;
+ cmd_str = "SETLK";
+ break;
#if F_SETLKW != F_SETLKW64
case F_SETLKW64:
#endif
case F_SETLKW:
- cmd_str = "SETLKW";
- break;
+ cmd_str = "SETLKW";
+ break;
default:
- cmd_str = "UNKNOWN";
- break;
- }
+ cmd_str = "UNKNOWN";
+ break;
+ }
- switch (flock->l_type) {
+ switch (flock->l_type) {
case F_RDLCK:
- type_str = "READ";
- break;
+ type_str = "READ";
+ break;
case F_WRLCK:
- type_str = "WRITE";
- break;
+ type_str = "WRITE";
+ break;
case F_UNLCK:
- type_str = "UNLOCK";
- break;
+ type_str = "UNLOCK";
+ break;
default:
- type_str = "UNKNOWN";
- break;
- }
-
- snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "domain: %s, start=%llu, len=%llu, pid=%llu",
- cmd_str, type_str, domain,
- (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf(str, size,
+ "lock=INODELK, cmd=%s, type=%s, "
+ "domain: %s, start=%llu, len=%llu, pid=%llu",
+ cmd_str, type_str, domain, (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len,
+ (unsigned long long)flock->l_pid);
}
/* Determine if the two inodelks overlap reach other's lock regions */
static int
-inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+inodelk_overlap(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return ((l1->fl_end >= l2->fl_start) &&
- (l2->fl_end >= l1->fl_start));
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
}
/* Returns true if the 2 inodelks have the same owner */
-static int same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+static int
+same_inodelk_owner(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner(&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
/* Returns true if the 2 inodelks conflict with each other */
static int
-inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+inodelk_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ return (inodelk_overlap(l1, l2) && inodelk_type_conflict(l1, l2));
+}
+
+/*
+ * Check to see if the candidate lock overlaps/conflicts with the
+ * requested lock. If so, determine how old the lock is and return
+ * true if it exceeds the configured threshold, false otherwise.
+ */
+static inline gf_boolean_t
+__stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
+ pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
- if (same_inodelk_owner (l1, l2))
- return 0;
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (inodelk_conflict(candidate_lock, requested_lock)) {
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* Examine any locks held on this inode and potentially revoke the lock
+ * if the age exceeds revocation_secs. We will clear _only_ those locks
+ * which are granted, and then grant those locks which are blocked.
+ *
+ * Depending on how this patch works in the wild, we may expand this and
+ * introduce a heuristic which clears blocked locks as well if they
+ * are beyond a threshold.
+ */
+static gf_boolean_t
+__inodelk_prune_stale(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_inode_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+
+ args.type = CLRLK_INODE;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+ if (list_empty(&dom->inodelk_list))
+ goto out;
+
+ pthread_mutex_lock(&pinode->mutex);
+ list_for_each_entry_safe(lk, tmp, &dom->inodelk_list, list)
+ {
+ if (__stale_inodelk(this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
+
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe(lk, tmp, &dom->blocked_inodelks, blocked_locks)
+ {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pinode->mutex);
- if (!inodelk_overlap (l1, l2))
- return 0;
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_inodelk(this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log(this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Inode lock revoked: %d granted & %d "
+ "blocked locks cleared",
+ reason_str, uuid_utoa(pinode->gfid), dom->domain, lk_age_sec,
+ gcount, bcount);
+ }
+ return revoke_lock;
+}
- return (inodelk_type_conflict(l1, l2));
+void
+inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
+{
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+
+ priv = this->private;
+
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pl_inode->inode);
+ __pl_inodelk_ref(lock);
+
+ lock->contention_time = *now;
+
+ list_add_tail(&lock->contend, contend);
+}
+
+void
+inodelk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+ struct gf_upcall up;
+ struct gf_upcall_inodelk_contention lc;
+ pl_inode_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, contend);
+
+ pl_inode = lock->pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock));
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pl_inode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the inodelk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_INODELK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "sent");
+ }
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ list_del_init(&lock->contend);
+ __pl_inodelk_unref(lock);
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ inode_unref(pl_inode->inode);
+ }
}
/* Determine if lock is grantable or not */
static pl_inode_lock_t *
-__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
-{
- pl_inode_lock_t *l = NULL;
- pl_inode_lock_t *ret = NULL;
- if (list_empty (&dom->inodelk_list))
- goto out;
- list_for_each_entry (l, &dom->inodelk_list, list){
- if (inodelk_conflict (lock, l)) {
- ret = l;
- goto out;
- }
- }
-out:
- return ret;
+__inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_inode_lock_t *l = NULL;
+ pl_inode_lock_t *ret = NULL;
+
+ list_for_each_entry(l, &dom->inodelk_list, list)
+ {
+ if (inodelk_conflict(lock, l) && !same_inodelk_owner(lock, l)) {
+ if (ret == NULL) {
+ ret = l;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ inodelk_contention_notify_check(this, l, now, contend);
+ }
+ }
+
+ return ret;
}
static pl_inode_lock_t *
-__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+__blocked_lock_conflict(pl_dom_list_t *dom, pl_inode_lock_t *lock)
{
- pl_inode_lock_t *l = NULL;
- pl_inode_lock_t *ret = NULL;
-
- if (list_empty (&dom->blocked_entrylks))
- return NULL;
+ pl_inode_lock_t *l = NULL;
- list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
- if (inodelk_conflict (lock, l)) {
- ret = l;
- goto out;
- }
- }
+ list_for_each_entry(l, &dom->blocked_inodelks, blocked_locks)
+ {
+ if (inodelk_conflict(lock, l)) {
+ return l;
+ }
+ }
-out:
- return ret;
+ return NULL;
}
static int
-__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
+__owner_has_lock(pl_dom_list_t *dom, pl_inode_lock_t *newlock)
{
- pl_inode_lock_t *lock = NULL;
+ pl_inode_lock_t *lock = NULL;
- list_for_each_entry (lock, &dom->entrylk_list, list) {
- if (same_inodelk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ if (same_inodelk_owner(lock, newlock))
+ return 1;
+ }
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (same_inodelk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks)
+ {
+ if (same_inodelk_owner(lock, newlock))
+ return 1;
+ }
- return 0;
+ return 0;
}
+static int
+__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ int can_block)
+{
+ if (can_block == 0) {
+ goto out;
+ }
+
+ lock->blkd_time = gf_time();
+ list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_msg_trace(this->name, 0,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64
+ " - "
+ "%" PRId64 " => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ lock->volume);
+out:
+ return -EAGAIN;
+}
/* Determines if lock can be granted and adds the lock. If the lock
* is blocking, adds it to the blocked_inodelks list of the domain.
*/
static int
-__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+__lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ int can_block, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
- pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
-
- conf = __inodelk_grantable (dom, lock);
- if (conf){
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
-
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ pl_inode_lock_t *conf = NULL;
+ int ret;
- goto out;
- }
-
- if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
-
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
-
- goto out;
+ ret = pl_inode_remove_inodelk(pl_inode, lock);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret == 0) {
+ conf = __inodelk_grantable(this, dom, lock, now, contend);
+ }
+ if ((ret > 0) || (conf != NULL)) {
+ return __lock_blocked_add(this, dom, lock, can_block);
+ }
+
+ /* To prevent blocked locks starvation, check if there are any blocked
+ * locks thay may conflict with this lock. If there is then don't grant
+ * the lock. BUT grant the lock if the owner already has lock to allow
+ * nested locks.
+ * Example:
+ * SHD from Machine1 takes (gfid, 0-infinity) and is granted.
+ * SHD from machine2 takes (gfid, 0-infinity) and is blocked.
+ * When SHD from Machine1 takes (gfid, 0-128KB) it
+ * needs to be granted, without which the earlier lock on 0-infinity
+ * will not be unlocked by SHD from Machine1.
+ * TODO: Find why 'owner_has_lock' is checked even for blocked locks.
+ */
+ if (__blocked_lock_conflict(dom, lock) && !(__owner_has_lock(dom, lock))) {
+ if (can_block != 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
}
- list_add (&lock->list, &dom->inodelk_list);
- ret = 0;
+ return __lock_blocked_add(this, dom, lock, can_block);
+ }
+ __pl_inodelk_ref(lock);
+ lock->granted_time = gf_time();
+ list_add(&lock->list, &dom->inodelk_list);
-out:
- return ret;
+ return 0;
}
/* Return true if the two inodelks have exactly same lock boundaries */
static int
-inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+inodelks_equal(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- if ((l1->fl_start == l2->fl_start) &&
- (l1->fl_end == l2->fl_end))
- return 1;
+ if ((l1->fl_start == l2->fl_start) && (l1->fl_end == l2->fl_end))
+ return 1;
- return 0;
+ return 0;
}
-
static pl_inode_lock_t *
-find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom)
+find_matching_inodelk(pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
- pl_inode_lock_t *l = NULL;
- list_for_each_entry (l, &dom->inodelk_list, list) {
- if (inodelks_equal (l, lock))
- return l;
- }
- return NULL;
+ pl_inode_lock_t *l = NULL;
+ list_for_each_entry(l, &dom->inodelk_list, list)
+ {
+ if (inodelks_equal(l, lock) && same_inodelk_owner(l, lock))
+ return l;
+ }
+ return NULL;
}
/* Set F_UNLCK removes a lock which has the exact same lock boundaries
* as the UNLCK lock specifies. If such a lock is not found, returns invalid
*/
static pl_inode_lock_t *
-__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
+__inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
-
- pl_inode_lock_t *conf = NULL;
-
- conf = find_matching_inodelk (lock, dom);
- if (!conf) {
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock not found for unlock");
- goto out;
- }
- __delete_inode_lock (conf);
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock");
- __destroy_inode_lock (lock);
-
+ pl_inode_lock_t *conf = NULL;
+ inode_t *inode = NULL;
+
+ inode = lock->pl_inode->inode;
+
+ conf = find_matching_inodelk(lock, dom);
+ if (!conf) {
+ gf_log(this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+ "on %p for gfid:%s",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner),
+ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
+ goto out;
+ }
+ __delete_inode_lock(conf);
+ gf_log(this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s",
+ (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end,
+ lkowner_utoa(&lock->owner), lock->client,
+ inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
out:
- return conf;
-
-
+ return conf;
}
-static void
-__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *granted, pl_dom_list_t *dom)
-{
- int bl_ret = 0;
- pl_inode_lock_t *bl = NULL;
- pl_inode_lock_t *tmp = NULL;
- struct list_head blocked_list;
+void
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_inode_lock_t *bl = NULL;
+ pl_inode_lock_t *tmp = NULL;
- INIT_LIST_HEAD (&blocked_list);
- list_splice_init (&dom->blocked_inodelks, &blocked_list);
+ struct list_head blocked_list;
- list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
+ INIT_LIST_HEAD(&blocked_list);
+ list_splice_init(&dom->blocked_inodelks, &blocked_list);
- list_del_init (&bl->blocked_locks);
+ list_for_each_entry_safe(bl, tmp, &blocked_list, blocked_locks)
+ {
+ list_del_init(&bl->blocked_locks);
- bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
- if (bl_ret == 0) {
- list_add (&bl->blocked_locks, granted);
- }
+ if (bl->status != -EAGAIN) {
+ list_add_tail(&bl->blocked_locks, granted);
}
- return;
+ }
}
-/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom)
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- struct list_head granted;
- pl_inode_lock_t *lock;
- pl_inode_lock_t *tmp;
+ pl_inode_lock_t *lock;
+ pl_inode_lock_t *tmp;
+ int32_t op_ret;
+ int32_t op_errno;
+
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
+ {
+ if (lock->status == 0) {
+ op_ret = 0;
+ op_errno = 0;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ } else {
+ op_ret = -1;
+ op_errno = -lock->status;
+ }
+ pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ op_ret, op_errno, lock->volume);
- INIT_LIST_HEAD (&granted);
+ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
+ lock->frame = NULL;
+ }
- if (list_empty (&dom->blocked_inodelks)) {
- gf_log (this->name, GF_LOG_TRACE,
- "No blocked locks to be granted for domain: %s", dom->domain);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
+ {
+ list_del_init(&lock->blocked_locks);
+ __pl_inodelk_unref(lock);
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+}
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ struct list_head granted;
- pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
- &lock->user_flock, 0, 0, lock->volume);
+ INIT_LIST_HEAD(&granted);
- STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0);
- }
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ unwind_granted_inodes(this, pl_inode, &granted);
}
-/* Release all inodelks from this transport */
-static int
-release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom,
- inode_t *inode, transport_t *trans)
+static void
+pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
{
- pl_inode_lock_t *tmp = NULL;
- pl_inode_lock_t *l = NULL;
-
- pl_inode_t * pinode = NULL;
-
- struct list_head granted;
- struct list_head released;
+ pl_inode_t *pl_inode = NULL;
- char *path = NULL;
+ pl_inode = lock->pl_inode;
- INIT_LIST_HEAD (&granted);
- INIT_LIST_HEAD (&released);
-
- pinode = pl_inode_get (this, inode);
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%" PRId64 " lk-owner=%s}",
+ uuid_utoa(pl_inode->gfid), lock->client, (uint64_t)lock->client_pid,
+ lkowner_utoa(&lock->owner));
+}
- pthread_mutex_lock (&pinode->mutex);
+/* Release all inodelks from this client */
+int
+pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
+{
+ posix_locks_private_t *priv;
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pl_inode = NULL;
+ struct list_head *pcontend = NULL;
+ struct list_head released;
+ struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = {};
+
+ priv = this->private;
+
+ INIT_LIST_HEAD(&released);
+ INIT_LIST_HEAD(&unwind);
+
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock(&ctx->lock);
+ {
+ list_for_each_entry_safe(l, tmp, &ctx->inodelk_lockers, client_list)
{
-
- list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
- if (l->transport != trans)
- continue;
-
- list_del_init (&l->blocked_locks);
-
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
-
- list_add (&l->blocked_locks, &released);
-
+ pl_inodelk_log_cleanup(l);
+
+ pl_inode = l->pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /* If the inodelk object is part of granted list but not
+ * blocked list, then perform the following actions:
+ * i. delete the object from granted list;
+ * ii. grant other locks (from other clients) that may
+ * have been blocked on this inodelk; and
+ * iii. unref the object.
+ *
+ * If the inodelk object (L1) is part of both granted
+ * and blocked lists, then this means that a parallel
+ * unlock on another inodelk (L2 say) may have 'granted'
+ * L1 and added it to 'granted' list in
+ * __grant_blocked_inode_locks() (although using the
+ * 'blocked_locks' member). In that case, the cleanup
+ * codepath must try and grant other overlapping
+ * blocked inodelks from other clients, now that L1 is
+ * out of their way and then unref L1 in the end, and
+ * leave it to the other thread (the one executing
+ * unlock codepath) to unwind L1's frame, delete it from
+ * blocked_locks list, and perform the last unref on L1.
+ *
+ * If the inodelk object (L1) is part of blocked list
+ * only, the cleanup code path must:
+ * i. delete it from the blocked_locks list inside
+ * this critical section,
+ * ii. unwind its frame with EAGAIN,
+ * iii. try and grant blocked inode locks from other
+ * clients that were otherwise grantable, but just
+ * got blocked to avoid leaving L1 to starve
+ * forever.
+ * iv. unref the object.
+ */
+ list_del_init(&l->client_list);
+
+ if (!list_empty(&l->list)) {
+ __delete_inode_lock(l);
+ list_add_tail(&l->client_list, &released);
+ } else {
+ list_del_init(&l->blocked_locks);
+ list_add_tail(&l->client_list, &unwind);
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+ }
+ pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
- if (l->transport != trans)
- continue;
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- __delete_inode_lock (l);
- __destroy_inode_lock (l);
+ if (l->frame)
+ STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
+ }
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
- }
+ pl_inode = l->pl_inode;
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
+ dom = get_domain(pl_inode, l->volume);
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
- }
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __pl_inodelk_unref(l);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ inode_unref(pl_inode->inode);
}
-unlock:
- if (path)
- FREE (path);
-
- pthread_mutex_unlock (&pinode->mutex);
+ }
- list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
- list_del_init (&l->blocked_locks);
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN);
- FREE (l);
- }
-
- grant_blocked_inode_locks (this, pinode, dom);
- return 0;
+ return 0;
}
-
static int
-pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
-{
- int ret = -EINVAL;
- pl_inode_lock_t *retlock = NULL;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- if (lock->fl_type != F_UNLCK) {
- ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
- if (ret == 0)
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->fl_start,
- lock->fl_end);
-
- if (ret == -EAGAIN)
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- goto out;
- }
-
-
- retlock = __inode_unlock_lock (this, lock, dom);
- if (!retlock) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Bad Unlock issued on Inode lock");
- ret = -EINVAL;
- goto out;
+pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+ inode_t *inode)
+{
+ posix_locks_private_t *priv = NULL;
+ int ret = -EINVAL;
+ pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+ gf_boolean_t need_inode_unref = _gf_false;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct list_head wake;
+ struct timespec now = {};
+ short fl_type;
+
+ lock->pl_inode = pl_inode;
+ fl_type = lock->fl_type;
+
+ priv = this->private;
+
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pl_inode. Now when
+ * the io-thread executing this function tries to access pl_inode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pl_inode is still alive until after the
+ * current stack unwinds.
+ */
+ pl_inode->inode = inode_ref(inode);
+
+ if (priv->revocation_secs != 0) {
+ if (lock->fl_type != F_UNLCK) {
+ __inodelk_prune_stale(this, pl_inode, dom, lock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock()) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __pl_inodelk_unref(lock);
}
- __destroy_inode_lock (retlock);
-
- ret = 0;
+ pthread_mutex_unlock(&pl_inode->mutex);
+ inode_unref(pl_inode->inode);
+ gf_log(this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ return 0;
+ }
+ }
+ }
+
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ INIT_LIST_HEAD(&wake);
+
+ if (ctx)
+ pthread_mutex_lock(&ctx->lock);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (lock->fl_type != F_UNLCK) {
+ ret = __lock_inodelk(this, pl_inode, lock, can_block, dom, &now,
+ pcontend);
+ if (ret == 0) {
+ lock->frame = NULL;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->fl_start, lock->fl_end);
+ } else if (ret == -EAGAIN) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ if (can_block) {
+ unref = _gf_false;
+ }
+ }
+ /* For all but the case where a non-blocking lock attempt fails
+ * with -EAGAIN, the extra ref taken at the start of this function
+ * must be negated. */
+ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
+ if (ctx && !need_inode_unref) {
+ list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
+ }
+ } else {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done at the start of
+ * this function must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
+ retlock = __inode_unlock_lock(this, lock, dom);
+ if (!retlock) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+ list_del_init(&retlock->client_list);
+ __pl_inodelk_unref(retlock);
+ pl_inode_remove_unlocked(this, pl_inode, &wake);
- }
-out:
- pthread_mutex_unlock (&pl_inode->mutex);
- grant_blocked_inode_locks (this, pl_inode, dom);
- return ret;
+ ret = 0;
+ }
+ out:
+ if (unref)
+ __pl_inodelk_unref(lock);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ if (ctx)
+ pthread_mutex_unlock(&ctx->lock);
+
+ pl_inode_remove_wake(&wake);
+
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((fl_type == F_UNLCK) && (ret == 0)) {
+ inode_unref(pl_inode->inode);
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
+ }
+
+ if (need_inode_unref) {
+ inode_unref(pl_inode->inode);
+ }
+
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+
+ return ret;
}
/* Create a new inode_lock_t */
-pl_inode_lock_t *
-new_inode_lock (struct flock *flock, transport_t *transport, pid_t client_pid,
- uint64_t owner, const char *volume)
+static pl_inode_lock_t *
+new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
+ call_frame_t *frame, xlator_t *this, const char *volume,
+ char *conn_id, int32_t *op_errno)
{
- pl_inode_lock_t *lock = NULL;
+ pl_inode_lock_t *lock = NULL;
+
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t);
+ if (!lock) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
+
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ lock->client = client;
+ lock->client_pid = client_pid;
+ lock->volume = volume;
+ lock->owner = frame->root->lk_owner;
+ lock->frame = frame;
+ lock->this = this;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup(conn_id);
+ }
+
+ INIT_LIST_HEAD(&lock->list);
+ INIT_LIST_HEAD(&lock->blocked_locks);
+ INIT_LIST_HEAD(&lock->client_list);
+ INIT_LIST_HEAD(&lock->contend);
+ __pl_inodelk_ref(lock);
+
+out:
+ return lock;
+}
- lock = CALLOC (1, sizeof (*lock));
- if (!lock) {
- return NULL;
- }
+int32_t
+_pl_convert_volume(const char *volume, char **res)
+{
+ char *mdata_vol = NULL;
+ int ret = 0;
- lock->fl_start = flock->l_start;
- lock->fl_type = flock->l_type;
+ mdata_vol = strrchr(volume, ':');
+ // if the volume already ends with :metadata don't bother
+ if (mdata_vol && (strcmp(mdata_vol, ":metadata") == 0))
+ return 0;
- if (flock->l_len == 0)
- lock->fl_end = LLONG_MAX;
- else
- lock->fl_end = flock->l_start + flock->l_len - 1;
+ ret = gf_asprintf(res, "%s:metadata", volume);
+ if (ret <= 0)
+ return ENOMEM;
+ return 0;
+}
- lock->transport = transport;
- lock->client_pid = client_pid;
- lock->owner = owner;
- lock->volume = volume;
+int32_t
+_pl_convert_volume_for_special_range(struct gf_flock *flock, const char *volume,
+ char **res)
+{
+ int32_t ret = 0;
- INIT_LIST_HEAD (&lock->list);
- INIT_LIST_HEAD (&lock->blocked_locks);
+ if ((flock->l_start == LLONG_MAX - 1) && (flock->l_len == 0)) {
+ ret = _pl_convert_volume(volume, res);
+ }
- return lock;
+ return ret;
}
/* Common inodelk code called from pl_inodelk and pl_finodelk */
int
-pl_common_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, inode_t *inode, int32_t cmd,
- struct flock *flock, loc_t *loc, fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- transport_t * transport = NULL;
- pid_t client_pid = -1;
- uint64_t owner = -1;
- pl_inode_t * pinode = NULL;
- pl_inode_lock_t * reqlock = NULL;
- pl_dom_list_t * dom = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (inode, unwind);
- VALIDATE_OR_GOTO (flock, unwind);
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_errno = EINVAL;
- goto unwind;
- }
-
- pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
-
- pinode = pl_inode_get (this, inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- dom = get_domain (pinode, volume);
-
- if (client_pid == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, dom, inode, transport);
- goto unwind;
- }
-
- reqlock = new_inode_lock (flock, transport, client_pid, owner, volume);
-
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- switch (cmd) {
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
-
- /* fall through */
-
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_inode_setlk (this, pinode, reqlock,
- can_block, dom);
-
- if (ret < 0) {
- if (can_block) {
- pl_trace_block (this, frame, fd, loc,
- cmd, flock, volume);
- goto out;
- }
- gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
- op_errno = -ret;
- __destroy_inode_lock (reqlock);
- goto unwind;
- }
- break;
-
- default:
- op_errno = ENOTSUP;
- gf_log (this->name, GF_LOG_DEBUG,
- "Lock command F_GETLK not supported for [f]inodelk "
- "(cmd=%d)",
- cmd);
- goto unwind;
+pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ inode_t *inode, int32_t cmd, struct gf_flock *flock,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
+ short lock_type = 0;
+ pl_inode_t *pinode = NULL;
+ pl_inode_lock_t *reqlock = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *res = NULL;
+ char *res1 = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str(xdata, "connection-id", &conn_id);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(inode, unwind);
+ VALIDATE_OR_GOTO(flock, unwind);
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ op_errno = _pl_convert_volume_for_special_range(flock, volume, &res);
+ if (op_errno)
+ goto unwind;
+ if (res)
+ volume = res;
+
+ pl_trace_in(this, frame, fd, loc, cmd, flock, volume);
+
+ if (frame->root->client) {
+ ctx = pl_ctx_get(frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log(this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
}
+ }
+
+ pinode = pl_inode_get(this, inode, NULL);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ dom = get_domain(pinode, volume);
+ if (!dom) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid,
+ frame, this, dom->domain, conn_id, &op_errno);
+
+ if (!reqlock) {
+ op_ret = -1;
+ goto unwind;
+ }
+
+ switch (cmd) {
+ case F_SETLKW:
+ can_block = 1;
+
+ /* fall through */
+
+ case F_SETLK:
+ lock_type = flock->l_type;
+ memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock));
+ ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom,
+ inode);
+
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ if (can_block && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
+ }
+ op_errno = -ret;
+ goto unwind;
+ }
+ break;
+
+ default:
+ op_errno = ENOTSUP;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Lock command F_GETLK not supported for [f]inodelk "
+ "(cmd=%d)",
+ cmd);
+ goto unwind;
+ }
- op_ret = 0;
+ op_ret = 0;
unwind:
- if ((inode != NULL) && (flock !=NULL)) {
- pl_update_refkeeper (this, inode);
- pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
- }
+ if (flock != NULL)
+ pl_trace_out(this, frame, fd, loc, cmd, flock, op_ret, op_errno,
+ volume);
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT(inodelk, frame, op_ret, op_errno, NULL);
out:
- return 0;
+ GF_FREE(res);
+ GF_FREE(res1);
+ return 0;
}
int
-pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+pl_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
+ pl_common_inodelk(frame, this, volume, loc->inode, cmd, flock, loc, NULL,
+ xdata);
- pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, loc, NULL);
-
- return 0;
+ return 0;
}
int
-pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+pl_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
+ pl_common_inodelk(frame, this, volume, fd->inode, cmd, flock, NULL, fd,
+ xdata);
- pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, NULL, fd);
-
- return 0;
-
+ return 0;
}
-
static int32_t
-__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode)
-{
- int32_t count = 0;
- pl_inode_lock_t *lock = NULL;
- pl_dom_list_t *dom = NULL;
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- list_for_each_entry (lock, &dom->inodelk_list, list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Active",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- count++;
- }
-
- list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Blocked",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+__get_inodelk_dom_count(pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry(lock, &dom->inodelk_list, list) { count++; }
+ list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks)
+ {
+ count++;
+ }
+ return count;
+}
- count++;
- }
+/* Returns the no. of locks (blocked/granted) held on a given domain name
+ * If @domname is NULL, returns the no. of locks in all the domains present.
+ * If @domname is non-NULL and non-existent, returns 0 */
+int32_t
+__get_inodelk_count(xlator_t *this, pl_inode_t *pl_inode, char *domname)
+{
+ int32_t count = 0;
+ pl_dom_list_t *dom = NULL;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ if (domname) {
+ if (strcmp(domname, dom->domain) == 0) {
+ count = __get_inodelk_dom_count(dom);
+ goto out;
+ }
+ } else {
+ /* Counting locks from all domains */
+ count += __get_inodelk_dom_count(dom);
}
+ }
- return count;
+out:
+ return count;
}
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode)
+get_inodelk_count(xlator_t *this, inode_t *inode, char *domname)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t tmp_pl_inode = 0;
- int ret = 0;
- int32_t count = 0;
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
- if (ret != 0) {
- goto out;
- }
+ ret = inode_ctx_get(inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
- pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- pthread_mutex_lock (&pl_inode->mutex);
- {
- count = __get_inodelk_count (this, pl_inode);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ count = __get_inodelk_count(this, pl_inode, domname);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
out:
- return count;
+ return count;
}