summaryrefslogtreecommitdiffstats
path: root/xlators/features/locks/src/entrylk.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/features/locks/src/entrylk.c')
-rw-r--r--xlators/features/locks/src/entrylk.c290
1 files changed, 220 insertions, 70 deletions
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 6698516fc83..008d05a34c4 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -13,17 +13,19 @@
#include "logging.h"
#include "common-utils.h"
#include "list.h"
+#include "upcall-utils.h"
#include "locks.h"
#include "clear.h"
#include "common.h"
+#include "pl-messages.h"
void
__pl_entrylk_unref (pl_entry_lock_t *lock)
{
lock->ref--;
if (!lock->ref) {
- GF_FREE ((char *)lock->basename);
+ GF_FREE ((char *)lock->basename);
GF_FREE (lock->connection_id);
GF_FREE (lock);
}
@@ -39,7 +41,7 @@ __pl_entrylk_ref (pl_entry_lock_t *lock)
static pl_entry_lock_t *
new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- const char *domain, call_frame_t *frame, char *conn_id)
+ const char *domain, call_frame_t *frame, char *conn_id)
{
pl_entry_lock_t *newlock = NULL;
@@ -55,7 +57,7 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
newlock->client_pid = frame->root->pid;
newlock->volume = domain;
newlock->owner = frame->root->lk_owner;
- newlock->frame = frame;
+ newlock->frame = frame;
newlock->this = frame->this;
if (conn_id) {
@@ -64,9 +66,9 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
INIT_LIST_HEAD (&newlock->domain_list);
INIT_LIST_HEAD (&newlock->blocked_locks);
- INIT_LIST_HEAD (&newlock->client_list);
+ INIT_LIST_HEAD (&newlock->client_list);
- __pl_entrylk_ref (newlock);
+ __pl_entrylk_ref (newlock);
out:
return newlock;
}
@@ -201,6 +203,113 @@ out:
return revoke_lock;
}
+static gf_boolean_t
+__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now)
+{
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+
+ priv = this->private;
+
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return _gf_false;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return _gf_false;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pinode->inode);
+ __pl_entrylk_ref(lock);
+
+ lock->contention_time = *now;
+
+ return _gf_true;
+}
+
+void
+entrylk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+ struct gf_upcall up;
+ struct gf_upcall_entrylk_contention lc;
+ pl_entry_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_entry_lock_t, contend);
+
+ pl_inode = lock->pinode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->domain_list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ lc.type = lock->type;
+ lc.name = lock->basename;
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pinode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the entrylk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_ENTRYLK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "sent");
+ }
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ list_del_init(&lock->contend);
+ __pl_entrylk_unref(lock);
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ inode_unref(pl_inode->inode);
+ }
+}
+
+
/**
* entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
@@ -208,19 +317,27 @@ out:
* @type: type of lock
*/
static pl_entry_lock_t *
-__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock)
+__entrylk_grantable (xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
pl_entry_lock_t *tmp = NULL;
-
- if (list_empty (&dom->entrylk_list))
- return NULL;
+ pl_entry_lock_t *ret = NULL;
list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
- if (__conflicting_entrylks (tmp, lock))
- return tmp;
+ if (__conflicting_entrylks (tmp, lock)) {
+ if (ret == NULL) {
+ ret = tmp;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ if (__entrylk_needs_contention_notify(this, tmp, now)) {
+ list_add_tail(&tmp->contend, contend);
+ }
+ }
}
- return NULL;
+ return ret;
}
static pl_entry_lock_t *
@@ -228,9 +345,6 @@ __blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
pl_entry_lock_t *tmp = NULL;
- if (list_empty (&dom->blocked_entrylks))
- return NULL;
-
list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) {
if (names_conflict (tmp->basename, lock->basename))
return lock;
@@ -426,6 +540,27 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
return NULL;
}
+static int
+__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock, int nonblock)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ if (nonblock)
+ goto out;
+
+ lock->blkd_time = now;
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_msg_trace (this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, lock->basename);
+
+out:
+ return -EAGAIN;
+}
+
/**
* __lock_entrylk - lock a name in a directory
* @inode: inode for the directory in which to lock
@@ -439,24 +574,15 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock)
int
__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
- int nonblock, pl_dom_list_t *dom)
+ int nonblock, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
pl_entry_lock_t *conf = NULL;
int ret = -EAGAIN;
- conf = __entrylk_grantable (dom, lock);
+ conf = __entrylk_grantable (this, dom, lock, now, contend);
if (conf) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, lock->basename);
-
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
goto out;
}
@@ -471,20 +597,15 @@ __lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
* granted, without which self-heal can't progress.
* TODO: Find why 'owner_has_lock' is checked even for blocked locks.
*/
- if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, lock->basename);
+ if (__blocked_entrylk_conflict (dom, lock) &&
+ !(__owner_has_lock (dom, lock))) {
+ if (nonblock == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
+ }
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
goto out;
}
@@ -551,7 +672,8 @@ out:
void
__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct list_head *granted)
+ pl_dom_list_t *dom, struct list_head *granted,
+ struct timespec *now, struct list_head *contend)
{
int bl_ret = 0;
pl_entry_lock_t *bl = NULL;
@@ -566,7 +688,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
list_del_init (&bl->blocked_locks);
- bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom);
+ bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom, now,
+ contend);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
@@ -578,7 +701,8 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grants locks if possible which are blocked on a lock */
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom)
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
struct list_head granted_list;
pl_entry_lock_t *tmp = NULL;
@@ -589,7 +713,7 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_lock (&pl_inode->mutex);
{
__grant_blocked_entry_locks (this, pl_inode, dom,
- &granted_list);
+ &granted_list, now, contend);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -610,8 +734,6 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
}
}
pthread_mutex_unlock (&pl_inode->mutex);
-
- return;
}
@@ -637,9 +759,18 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
int nonblock = 0;
gf_boolean_t need_inode_unref = _gf_false;
posix_locks_private_t *priv = NULL;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct timespec now = { };
priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -722,7 +853,8 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
{
reqlock->pinode = pinode;
- ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom);
+ ret = __lock_entrylk (this, pinode, reqlock, nonblock,
+ dom, &now, pcontend);
if (ret == 0) {
reqlock->frame = NULL;
op_ret = 0;
@@ -778,7 +910,7 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
if (ctx)
pthread_mutex_unlock (&ctx->lock);
- grant_blocked_entry_locks (this, pinode, dom);
+ grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
break;
@@ -810,6 +942,10 @@ unwind:
cmd, type);
}
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
+
return 0;
}
@@ -868,27 +1004,37 @@ pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
int
pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ posix_locks_private_t *priv;
pl_entry_lock_t *tmp = NULL;
pl_entry_lock_t *l = NULL;
- pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom = NULL;
pl_inode_t *pinode = NULL;
-
+ struct list_head *pcontend = NULL;
struct list_head released;
struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = { };
INIT_LIST_HEAD (&released);
INIT_LIST_HEAD (&unwind);
- pthread_mutex_lock (&ctx->lock);
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD (pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock (&ctx->lock);
{
list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
- client_list) {
- pl_entrylk_log_cleanup (l);
+ client_list) {
+ pl_entrylk_log_cleanup (l);
- pinode = l->pinode;
+ pinode = l->pinode;
- pthread_mutex_lock (&pinode->mutex);
- {
+ pthread_mutex_lock (&pinode->mutex);
+ {
/* If the entrylk object is part of granted list but not
* blocked list, then perform the following actions:
* i. delete the object from granted list;
@@ -931,38 +1077,42 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
&unwind);
}
}
- pthread_mutex_unlock (&pinode->mutex);
+ pthread_mutex_unlock (&pinode->mutex);
}
- }
+ }
pthread_mutex_unlock (&ctx->lock);
list_for_each_entry_safe (l, tmp, &unwind, client_list) {
list_del_init (&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
- NULL);
+ if (l->frame)
+ STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
+ NULL);
list_add_tail (&l->client_list, &released);
}
list_for_each_entry_safe (l, tmp, &released, client_list) {
list_del_init (&l->client_list);
- pinode = l->pinode;
+ pinode = l->pinode;
- dom = get_domain (pinode, l->volume);
+ dom = get_domain (pinode, l->volume);
- grant_blocked_entry_locks (this, pinode, dom);
+ grant_blocked_entry_locks (this, pinode, dom, &now, pcontend);
- pthread_mutex_lock (&pinode->mutex);
- {
- __pl_entrylk_unref (l);
- }
- pthread_mutex_unlock (&pinode->mutex);
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (l);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
inode_unref (pinode->inode);
}
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
+
return 0;
}