summaryrefslogtreecommitdiffstats
path: root/xlators/features/locks
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/features/locks')
-rw-r--r--xlators/features/locks/src/clear.c36
-rw-r--r--xlators/features/locks/src/common.h12
-rw-r--r--xlators/features/locks/src/entrylk.c290
-rw-r--r--xlators/features/locks/src/inodelk.c309
-rw-r--r--xlators/features/locks/src/locks.h8
-rw-r--r--xlators/features/locks/src/pl-messages.h4
-rw-r--r--xlators/features/locks/src/posix.c40
7 files changed, 540 insertions, 159 deletions
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index a76d6beacb1..1609fc416d2 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -200,6 +200,7 @@ int
clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
clrlk_args *args, int *blkd, int *granted, int *op_errno)
{
+ posix_locks_private_t *priv;
pl_inode_lock_t *ilock = NULL;
pl_inode_lock_t *tmp = NULL;
struct gf_flock ulock = {0, };
@@ -207,9 +208,20 @@ clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
int bcount = 0;
int gcount = 0;
gf_boolean_t chk_range = _gf_false;
+ struct list_head *pcontend = NULL;
struct list_head released;
+ struct list_head contend;
+ struct timespec now = { };
INIT_LIST_HEAD (&released);
+
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
*op_errno = EINVAL;
goto out;
@@ -283,7 +295,10 @@ granted:
ret = 0;
out:
- grant_blocked_inode_locks (this, pl_inode, dom);
+ grant_blocked_inode_locks (this, pl_inode, dom, &now, pcontend);
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
*blkd = bcount;
*granted = gcount;
return ret;
@@ -294,15 +309,27 @@ int
clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
clrlk_args *args, int *blkd, int *granted, int *op_errno)
{
+ posix_locks_private_t *priv;
pl_entry_lock_t *elock = NULL;
pl_entry_lock_t *tmp = NULL;
int bcount = 0;
int gcount = 0;
int ret = -1;
+ struct list_head *pcontend = NULL;
struct list_head removed;
struct list_head released;
+ struct list_head contend;
+ struct timespec now;
INIT_LIST_HEAD (&released);
+
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
if (args->kind & CLRLK_BLOCKED)
goto blkd;
@@ -361,12 +388,15 @@ granted:
list_del_init (&elock->domain_list);
list_add_tail (&elock->domain_list, &removed);
- __pl_entrylk_unref (elock);
+ __pl_entrylk_unref (elock);
}
}
pthread_mutex_unlock (&pl_inode->mutex);
- grant_blocked_entry_locks (this, pl_inode, dom);
+ grant_blocked_entry_locks (this, pl_inode, dom, &now, pcontend);
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
ret = 0;
out:
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 3729ca24bed..50c156feb38 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -69,7 +69,11 @@ get_domain (pl_inode_t *pl_inode, const char *volume);
void
grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom);
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend);
+
+void
+inodelk_contention_notify (xlator_t *this, struct list_head *contend);
void
__delete_inode_lock (pl_inode_lock_t *lock);
@@ -79,7 +83,11 @@ __pl_inodelk_unref (pl_inode_lock_t *lock);
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom);
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify (xlator_t *this, struct list_head *contend);
void pl_update_refkeeper (xlator_t *this, inode_t *inode);
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 6698516fc83..008d05a34c4 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -13,17 +13,19 @@
#include "logging.h"
#include "common-utils.h"
#include "list.h"
+#include "upcall-utils.h"
#include "locks.h"
#include "clear.h"
#include "common.h"
+#include "pl-messages.h"
void
__pl_entrylk_unref (pl_entry_lock_t *lock)
{
lock->ref--;
if (!lock->ref) {
- GF_FREE ((char *)lock->basename);
+ GF_FREE ((char *)lock->basename);
GF_FREE (lock->connection_id);
GF_FREE (lock);
}
@@ -39,7 +41,7 @@ __pl_entrylk_ref (pl_entry_lock_t *lock)
static pl_entry_lock_t *
new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- const char *domain, call_frame_t *frame, char *conn_id)
+ const char *domain, call_frame_t *frame, char *conn_id)
{
pl_entry_lock_t *newlock = NULL;
@@ -55,7 +57,7 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
newlock->client_pid = frame->root->pid;
newlock->volume = domain;
newlock->owner = frame->root->lk_owner;
- newlock->frame = frame;
+ newlock->frame = frame;
newlock->this = frame->this;
if (conn_id) {
@@ -64,9 +66,9 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
INIT_LIST_HEAD (&newlock->domain_list);
INIT_LIST_HEAD (&newlock->blocked_locks);
- INIT_LIST_HEAD (&newlock->client_list);
+ INIT_LIST_HEAD (&newlock->client_list);
- __pl_entrylk_ref (newlock);
+ __pl_entrylk_ref (newlock);
out:
return newlock;
}
@@ -201,6 +203,113 @@ out:
return revoke_lock;
}
+static gf_boolean_t
+__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now)
+{
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+
+ priv = this->private;
+
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return _gf_false;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return _gf_false;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pinode->inode);
+ __pl_entrylk_ref(lock);
+
+ lock->contention_time = *now;
+
+ return _gf_true;
+}
+
+void
+entrylk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+ struct gf_upcall up;
+ struct gf_upcall_entrylk_contention lc;
+ pl_entry_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_entry_lock_t, contend);
+
+ pl_inode = lock->pinode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->domain_list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ lc.type = lock->type;
+ lc.name = lock->basename;
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pinode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the entrylk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_ENTRYLK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "sent");
+ }
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ list_del_init(&lock->contend);
+ __pl_entrylk_unref(lock);
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ inode_unref(pl_inode->inode);
+ }
+}
+
+
/**
* entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
@@ -208,19 +317,27 @@ out:
* @type: type of lock
*/
static pl_entry_lock_t *
-__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock)
+__entrylk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
pl_entry_lock_t *tmp = NULL;
-
- if (list_empty (&dom->entrylk_list))
- return NULL;
+ pl_entry_lock_t *ret = NULL;
list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
- if (__conflicting_entrylks (tmp, lock))
- return tmp;
+ if (__conflicting_entrylks (tmp, lock)) {
+ if (ret == NULL) {
+ ret = tmp;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ if (__entrylk_needs_contention_notify(this, tmp, now)) {
+ list_add_tail(&tmp->contend, contend);
+ }
+ }
}
- return NULL;
+ return ret;
}
static pl_entry_lock_t *
@@ -228,9 +345,6 @@ __blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
pl_entry_lock_t *tmp = NULL;
- if (list_empty (&dom->blocked_entrylks))
- return NULL;
-
list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) {
if (names_conflict (tmp->basename, lock->basename))
return lock;
@@ -426,6 +540,27 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
return NULL;
}
+static int
+__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock, int nonblock)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ if (nonblock)
+ goto out;
+
+ lock->blkd_time = now;
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_msg_trace (this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, lock->basename);
+
+out:
+ return -EAGAIN;
+}
+
/**
* __lock_entrylk - lock a name in a directory
* @inode: inode for the directory in which to lock
@@ -439,24 +574,15 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
int
__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
- int nonblock, pl_dom_list_t *dom)
+ int nonblock, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
pl_entry_lock_t *conf = NULL;
int ret = -EAGAIN;
- conf = __entrylk_grantable (dom, lock);
+ conf = __entrylk_grantable (this, dom, lock, now, contend);
if (conf) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, lock->basename);
-
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
goto out;
}
@@ -471,20 +597,15 @@ __lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
* granted, without which self-heal can't progress.
* TODO: Find why 'owner_has_lock' is checked even for blocked locks.
*/
- if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, lock->basename);
+ if (__blocked_entrylk_conflict (dom, lock) &&
+ !(__owner_has_lock (dom, lock))) {
+ if (nonblock == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
+ }
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
goto out;
}
@@ -551,7 +672,8 @@ out:
void
__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct list_head *granted)
+ pl_dom_list_t *dom, struct list_head *granted,
+ struct timespec *now, struct list_head *contend)
{
int bl_ret = 0;
pl_entry_lock_t *bl = NULL;
@@ -566,7 +688,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
list_del_init (&bl->blocked_locks);
- bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom);
+ bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom, now,
+ contend);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
@@ -578,7 +701,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grants locks if possible which are blocked on a lock */
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom)
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
struct list_head granted_list;
pl_entry_lock_t *tmp = NULL;
@@ -589,7 +713,7 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_lock (&pl_inode->mutex);
{
__grant_blocked_entry_locks (this, pl_inode, dom,
- &granted_list);
+ &granted_list, now, contend);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -610,8 +734,6 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
}
}
pthread_mutex_unlock (&pl_inode->mutex);
-
- return;
}
@@ -637,9 +759,18 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
int nonblock = 0;
gf_boolean_t need_inode_unref = _gf_false;
posix_locks_private_t *priv = NULL;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct timespec now = { };
priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -722,7 +853,8 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
{
reqlock->pinode = pinode;
- ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom);
+ ret = __lock_entrylk (this, pinode, reqlock, nonblock,
+ dom, &now, pcontend);
if (ret == 0) {
reqlock->frame = NULL;
op_ret = 0;
@@ -778,7 +910,7 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
if (ctx)
pthread_mutex_unlock (&ctx->lock);
- grant_blocked_entry_locks (this, pinode, dom);
+ grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
break;
@@ -810,6 +942,10 @@ unwind:
cmd, type);
}
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
+
return 0;
}
@@ -868,27 +1004,37 @@ pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
int
pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ posix_locks_private_t *priv;
pl_entry_lock_t *tmp = NULL;
pl_entry_lock_t *l = NULL;
- pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom = NULL;
pl_inode_t *pinode = NULL;
-
+ struct list_head *pcontend = NULL;
struct list_head released;
struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = { };
INIT_LIST_HEAD (&released);
INIT_LIST_HEAD (&unwind);
- pthread_mutex_lock (&ctx->lock);
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock (&ctx->lock);
{
list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
- client_list) {
- pl_entrylk_log_cleanup (l);
+ client_list) {
+ pl_entrylk_log_cleanup (l);
- pinode = l->pinode;
+ pinode = l->pinode;
- pthread_mutex_lock (&pinode->mutex);
- {
+ pthread_mutex_lock (&pinode->mutex);
+ {
/* If the entrylk object is part of granted list but not
* blocked list, then perform the following actions:
* i. delete the object from granted list;
@@ -931,38 +1077,42 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
&unwind);
}
}
- pthread_mutex_unlock (&pinode->mutex);
+ pthread_mutex_unlock (&pinode->mutex);
}
- }
+ }
pthread_mutex_unlock (&ctx->lock);
list_for_each_entry_safe (l, tmp, &unwind, client_list) {
list_del_init (&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
- NULL);
+ if (l->frame)
+ STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
+ NULL);
list_add_tail (&l->client_list, &released);
}
list_for_each_entry_safe (l, tmp, &released, client_list) {
list_del_init (&l->client_list);
- pinode = l->pinode;
+ pinode = l->pinode;
- dom = get_domain (pinode, l->volume);
+ dom = get_domain (pinode, l->volume);
- grant_blocked_entry_locks (this, pinode, dom);
+ grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
- pthread_mutex_lock (&pinode->mutex);
- {
- __pl_entrylk_unref (l);
- }
- pthread_mutex_unlock (&pinode->mutex);
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (l);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
inode_unref (pinode->inode);
}
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
+
return 0;
}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 64ffb00c18c..890ac8b6d00 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -13,10 +13,12 @@
#include "logging.h"
#include "common-utils.h"
#include "list.h"
+#include "upcall-utils.h"
#include "locks.h"
#include "clear.h"
#include "common.h"
+#include "pl-messages.h"
void
__delete_inode_lock (pl_inode_lock_t *lock)
@@ -229,22 +231,134 @@ out:
return revoke_lock;
}
+static gf_boolean_t
+__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now)
+{
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+
+ priv = this->private;
+
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return _gf_false;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return _gf_false;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pl_inode->inode);
+ __pl_inodelk_ref(lock);
+
+ lock->contention_time = *now;
+
+ return _gf_true;
+}
+
+void
+inodelk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+ struct gf_upcall up;
+ struct gf_upcall_inodelk_contention lc;
+ pl_inode_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, contend);
+
+ pl_inode = lock->pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock));
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pl_inode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the inodelk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_INODELK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "sent");
+ }
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ list_del_init(&lock->contend);
+ __pl_inodelk_unref(lock);
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ inode_unref(pl_inode->inode);
+ }
+}
+
/* Determine if lock is grantable or not */
static pl_inode_lock_t *
-__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+__inodelk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
pl_inode_lock_t *l = NULL;
pl_inode_lock_t *ret = NULL;
- if (list_empty (&dom->inodelk_list))
- goto out;
+
list_for_each_entry (l, &dom->inodelk_list, list){
if (inodelk_conflict (lock, l) &&
!same_inodelk_owner (lock, l)) {
- ret = l;
- goto out;
+ if (ret == NULL) {
+ ret = l;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ if (__inodelk_needs_contention_notify(this, l, now)) {
+ list_add_tail(&l->contend, contend);
+ }
}
}
-out:
+
return ret;
}
@@ -252,20 +366,14 @@ static pl_inode_lock_t *
__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
{
pl_inode_lock_t *l = NULL;
- pl_inode_lock_t *ret = NULL;
-
- if (list_empty (&dom->blocked_inodelks))
- return NULL;
list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
if (inodelk_conflict (lock, l)) {
- ret = l;
- goto out;
+ return l;
}
}
-out:
- return ret;
+ return NULL;
}
static int
@@ -286,35 +394,45 @@ __owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
return 0;
}
+static int
+__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ int can_block)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ if (can_block == 0) {
+ goto out;
+ }
+
+ lock->blkd_time = now;
+ list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_msg_trace (this->name, 0, "%s (pid=%d) (lk-owner=%s) %"PRId64" - "
+ "%"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+
+out:
+ return -EAGAIN;
+}
/* Determines if lock can be granted and adds the lock. If the lock
* is blocking, adds it to the blocked_inodelks list of the domain.
*/
static int
__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+ int can_block, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
pl_inode_lock_t *conf = NULL;
int ret = -EINVAL;
- conf = __inodelk_grantable (dom, lock);
+ conf = __inodelk_grantable (this, dom, lock, now, contend);
if (conf) {
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
-
+ ret = __lock_blocked_add(this, dom, lock, can_block);
goto out;
}
@@ -330,25 +448,15 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
* will not be unlocked by SHD from Machine1.
* TODO: Find why 'owner_has_lock' is checked even for blocked locks.
*/
- if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ if (__blocked_lock_conflict (dom, lock) &&
+ !(__owner_has_lock (dom, lock))) {
+ if (can_block != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
+ }
+ ret = __lock_blocked_add(this, dom, lock, can_block);
goto out;
}
__pl_inodelk_ref (lock);
@@ -417,7 +525,8 @@ out:
static void
__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *granted, pl_dom_list_t *dom)
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend)
{
int bl_ret = 0;
pl_inode_lock_t *bl = NULL;
@@ -432,7 +541,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
list_del_init (&bl->blocked_locks);
- bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+ bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom, now,
+ contend);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
@@ -444,7 +554,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grant all inodelks blocked on a lock */
void
grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom)
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
struct list_head granted;
pl_inode_lock_t *lock;
@@ -454,7 +565,8 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_lock (&pl_inode->mutex);
{
- __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
+ __grant_blocked_inode_locks (this, pl_inode, &granted, dom,
+ now, contend);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -471,7 +583,7 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
&lock->user_flock, 0, 0, lock->volume);
STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
- lock->frame = NULL;
+ lock->frame = NULL;
}
pthread_mutex_lock (&pl_inode->mutex);
@@ -488,9 +600,9 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
static void
pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
{
- pl_inode_t *pl_inode = NULL;
+ pl_inode_t *pl_inode = NULL;
- pl_inode = lock->pl_inode;
+ pl_inode = lock->pl_inode;
gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by "
"{client=%p, pid=%"PRId64" lk-owner=%s}",
@@ -503,27 +615,38 @@ pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
int
pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ posix_locks_private_t *priv;
pl_inode_lock_t *tmp = NULL;
pl_inode_lock_t *l = NULL;
- pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom = NULL;
pl_inode_t *pl_inode = NULL;
-
+ struct list_head *pcontend = NULL;
struct list_head released;
struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = { };
+
+ priv = this->private;
INIT_LIST_HEAD (&released);
INIT_LIST_HEAD (&unwind);
- pthread_mutex_lock (&ctx->lock);
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock (&ctx->lock);
{
list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
- client_list) {
- pl_inodelk_log_cleanup (l);
+ client_list) {
+ pl_inodelk_log_cleanup (l);
- pl_inode = l->pl_inode;
+ pl_inode = l->pl_inode;
- pthread_mutex_lock (&pl_inode->mutex);
- {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
/* If the inodelk object is part of granted list but not
* blocked list, then perform the following actions:
* i. delete the object from granted list;
@@ -567,45 +690,49 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
&unwind);
}
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ pthread_mutex_unlock (&pl_inode->mutex);
}
- }
+ }
pthread_mutex_unlock (&ctx->lock);
list_for_each_entry_safe (l, tmp, &unwind, client_list) {
list_del_init (&l->client_list);
if (l->frame)
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
- NULL);
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
+ NULL);
list_add_tail (&l->client_list, &released);
-
}
list_for_each_entry_safe (l, tmp, &released, client_list) {
list_del_init (&l->client_list);
- pl_inode = l->pl_inode;
+ pl_inode = l->pl_inode;
- dom = get_domain (pl_inode, l->volume);
+ dom = get_domain (pl_inode, l->volume);
- grant_blocked_inode_locks (this, pl_inode, dom);
+ grant_blocked_inode_locks (this, pl_inode, dom, &now,
+ pcontend);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __pl_inodelk_unref (l);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (l);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
inode_unref (pl_inode->inode);
}
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+
return 0;
}
static int
pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
- pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
inode_t *inode)
{
posix_locks_private_t *priv = NULL;
@@ -613,9 +740,12 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
pl_inode_lock_t *retlock = NULL;
gf_boolean_t unref = _gf_true;
gf_boolean_t need_inode_unref = _gf_false;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct timespec now = { };
short fl_type;
- lock->pl_inode = pl_inode;
+ lock->pl_inode = pl_inode;
fl_type = lock->fl_type;
priv = this->private;
@@ -657,12 +787,19 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
}
}
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
if (ctx)
pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pl_inode->mutex);
{
if (lock->fl_type != F_UNLCK) {
- ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
+ ret = __lock_inodelk (this, pl_inode, lock, can_block,
+ dom, &now, pcontend);
if (ret == 0) {
lock->frame = NULL;
gf_log (this->name, GF_LOG_TRACE,
@@ -725,13 +862,18 @@ out:
*/
if ((fl_type == F_UNLCK) && (ret == 0)) {
inode_unref (pl_inode->inode);
- grant_blocked_inode_locks (this, pl_inode, dom);
+ grant_blocked_inode_locks (this, pl_inode, dom, &now,
+ pcontend);
}
if (need_inode_unref) {
inode_unref (pl_inode->inode);
}
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+
return ret;
}
@@ -771,7 +913,8 @@ new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
INIT_LIST_HEAD (&lock->list);
INIT_LIST_HEAD (&lock->blocked_locks);
- INIT_LIST_HEAD (&lock->client_list);
+ INIT_LIST_HEAD (&lock->client_list);
+ INIT_LIST_HEAD (&lock->contend);
__pl_inodelk_ref (lock);
return lock;
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 3d3b327f56c..c2edfff8f00 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -70,6 +70,7 @@ typedef struct __posix_lock posix_lock_t;
struct __pl_inode_lock {
struct list_head list;
struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ struct list_head contend; /* list of contending locks */
int ref;
short fl_type;
@@ -86,6 +87,8 @@ struct __pl_inode_lock {
struct timeval blkd_time; /*time at which lock was queued into blkd list*/
struct timeval granted_time; /*time at which lock was queued into active list*/
+ /*last time at wich lock contention was detected and notified*/
+ struct timespec contention_time;
/* These two together serve to uniquely identify each process
across nodes */
@@ -120,6 +123,7 @@ typedef struct _pl_dom_list pl_dom_list_t;
struct __entry_lock {
struct list_head domain_list; /* list_head back to pl_dom_list_t */
struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+ struct list_head contend; /* list of contending locks */
int ref;
call_frame_t *frame;
@@ -133,6 +137,8 @@ struct __entry_lock {
struct timeval blkd_time; /*time at which lock was queued into blkd list*/
struct timeval granted_time; /*time at which lock was queued into active list*/
+ /*last time at wich lock contention was detected and notified*/
+ struct timespec contention_time;
void *client;
gf_lkowner_t owner;
@@ -194,6 +200,8 @@ typedef struct {
uint32_t revocation_secs;
gf_boolean_t revocation_clear_all;
uint32_t revocation_max_blocked;
+ gf_boolean_t notify_contention;
+ uint32_t notify_contention_delay;
} posix_locks_private_t;
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
index 7a1e3f488e7..e5a276f35b5 100644
--- a/xlators/features/locks/src/pl-messages.h
+++ b/xlators/features/locks/src/pl-messages.h
@@ -24,7 +24,9 @@
*/
GLFS_MSGID(PL,
- PL_MSG_LOCK_NUMBER
+ PL_MSG_LOCK_NUMBER,
+ PL_MSG_INODELK_CONTENTION_FAILED,
+ PL_MSG_ENTRYLK_CONTENTION_FAILED
);
#endif /* !_PL_MESSAGES_H_ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 78bf160058c..82d77db1164 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -3641,6 +3641,13 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("revocation-max-blocked",
priv->revocation_max_blocked, options,
uint32, out);
+
+ GF_OPTION_RECONF ("notify-contention", priv->notify_contention,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("notify-contention-delay",
+ priv->notify_contention_delay, options, uint32, out);
+
ret = 0;
out:
@@ -3705,6 +3712,12 @@ init (xlator_t *this)
GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked,
uint32, out);
+ GF_OPTION_INIT ("notify-contention", priv->notify_contention, bool,
+ out);
+
+ GF_OPTION_INIT ("notify-contention-delay",
+ priv->notify_contention_delay, uint32, out);
+
this->local_pool = mem_pool_new (pl_local_t, 32);
if (!this->local_pool) {
ret = -1;
@@ -4461,5 +4474,32 @@ struct volume_options options[] = {
"will be revoked to allow the others to proceed. Can "
"be used in conjunction w/ revocation-clear-all."
},
+ { .key = {"notify-contention"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = { GD_OP_VERSION_4_0_0 },
+ .tags = { "locks", "contention" },
+ .description = "When this option is enabled and a lock request "
+ "conflicts with a currently granted lock, an upcall "
+ "notification will be sent to the current owner of "
+ "the lock to request it to be released as soon as "
+ "possible."
+ },
+ { .key = {"notify-contention-delay"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0, /* An upcall notification is sent every time a conflict is
+ * detected. */
+ .max = 60,
+ .default_value = "5",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = { GD_OP_VERSION_4_0_0 },
+ .tags = { "locks", "contention", "timeout" },
+ .description = "This value determines the minimum amount of time "
+ "(in seconds) between upcall contention notifications "
+ "on the same inode. If multiple lock requests are "
+ "received during this period, only one upcall will "
+ "be sent."
+ },
{ .key = {NULL} },
};