summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/features/lock_revocation.t52
-rw-r--r--xlators/features/locks/src/clear.c4
-rw-r--r--xlators/features/locks/src/common.c13
-rw-r--r--xlators/features/locks/src/common.h3
-rw-r--r--xlators/features/locks/src/entrylk.c124
-rw-r--r--xlators/features/locks/src/inodelk.c125
-rw-r--r--xlators/features/locks/src/locks.h4
-rw-r--r--xlators/features/locks/src/posix.c56
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c21
9 files changed, 394 insertions, 8 deletions
diff --git a/tests/features/lock_revocation.t b/tests/features/lock_revocation.t
new file mode 100644
index 00000000000..cbf21b71650
--- /dev/null
+++ b/tests/features/lock_revocation.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+logdir=$(gluster --print-logdir)
+BRICK_LOGFILES="$logdir/bricks/d-backends-brick?.log"
+rm -f $BRICK_LOGFILES &> /dev/null
+
+# Test that lock revocation works
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+function deadlock_fop() {
+ local MNT=$1
+ for i in {1..1000}; do
+ dd if=/dev/zero of=$MNT/testfile bs=1k count=10 &> /dev/null
+ if grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null; then
+ break
+ fi
+ done
+}
+
+function monkey_unlock() {
+ grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null && echo SUCCESS
+ return 0
+}
+
+function append_to_file() {
+ local FILE_PATH=$1
+ echo "hello" >> $FILE_PATH
+ return 0
+}
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 features.locks-monkey-unlocking on
+TEST $CLI volume set $V0 features.locks-revocation-secs 2
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 -s $H0 $M0;
+TEST $GFS --volfile-id=$V0 -s $H0 $M1;
+
+# Deadlock writes to a file using monkey unlocking
+deadlock_fop $M0 &
+EXPECT_WITHIN 60 "SUCCESS" monkey_unlock
+
+# Sleep > unlock timeout and attempt to write to the file
+sleep 3
+TEST append_to_file $M1/testfile
+
+cleanup
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 640c6bb5553..d7c210f24a5 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -234,6 +234,7 @@ blkd:
continue;
bcount++;
+ list_del_init (&ilock->client_list);
list_del_init (&ilock->blocked_locks);
list_add (&ilock->blocked_locks, &released);
}
@@ -268,6 +269,7 @@ granted:
continue;
gcount++;
+ list_del_init (&ilock->client_list);
list_del_init (&ilock->list);
list_add (&ilock->list, &released);
}
@@ -321,6 +323,7 @@ blkd:
bcount++;
+ list_del_init (&elock->client_list);
list_del_init (&elock->blocked_locks);
list_add_tail (&elock->blocked_locks, &released);
}
@@ -355,6 +358,7 @@ granted:
}
gcount++;
+ list_del_init (&elock->client_list);
list_del_init (&elock->domain_list);
list_add_tail (&elock->domain_list, &removed);
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 123c1101e94..796b538f6f2 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1121,3 +1121,16 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
return conf;
}
+
+gf_boolean_t
+pl_does_monkey_want_stuck_lock()
+{
+ long int monkey_unlock_rand = 0;
+ long int monkey_unlock_rand_rem = 0;
+
+ monkey_unlock_rand = random ();
+ monkey_unlock_rand_rem = monkey_unlock_rand % 100;
+ if (monkey_unlock_rand_rem == 0)
+ return _gf_true;
+ return _gf_false;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 5486f9b8314..3729ca24bed 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -161,4 +161,7 @@ pl_metalock_is_active (pl_inode_t *pl_inode);
int
__pl_queue_lock (pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block);
+
+gf_boolean_t
+pl_does_monkey_want_stuck_lock();
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 783c57e6381..4231d760cdc 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -16,9 +16,9 @@
#include "list.h"
#include "locks.h"
+#include "clear.h"
#include "common.h"
-
void
__pl_entrylk_unref (pl_entry_lock_t *lock)
{
@@ -111,6 +111,97 @@ __conflicting_entrylks (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
return 0;
}
+/* See comments in inodelk.c for details */
+static inline gf_boolean_t
+__stale_entrylk (xlator_t *this, pl_entry_lock_t *candidate_lock,
+ pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
+{
+ posix_locks_private_t *priv = NULL;
+ struct timeval curr;
+ gettimeofday (&curr, NULL);
+
+ priv = this->private;
+
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (names_conflict (candidate_lock->basename,
+ requested_lock->basename)) {
+ *lock_age_sec = curr.tv_sec -
+ candidate_lock->granted_time.tv_sec;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* See comments in inodelk.c for details */
+static gf_boolean_t
+__entrylk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+ args.type = CLRLK_ENTRY;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+
+ if (list_empty (&dom->entrylk_list))
+ goto out;
+
+ pthread_mutex_lock (&pinode->mutex);
+ lock->pinode = pinode;
+ list_for_each_entry_safe (lk, tmp, &dom->entrylk_list, domain_list) {
+ if (__stale_entrylk (this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe (lk, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_entrylk (this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log (this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Entry lock revoked: %d granted & %d "
+ "blocked locks cleared", reason_str,
+ uuid_utoa (pinode->gfid), dom->domain, lk_age_sec,
+ gcount, bcount);
+ }
+
+ return revoke_lock;
+}
+
/**
* entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
@@ -546,6 +637,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
pl_ctx_t *ctx = NULL;
int nonblock = 0;
gf_boolean_t need_inode_unref = _gf_false;
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -599,6 +693,24 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
* current stack unwinds.
*/
pinode->inode = inode_ref (inode);
+ if (priv->revocation_secs != 0) {
+ if (cmd != ENTRYLK_UNLOCK) {
+ __entrylk_prune_stale (this, pinode, dom, reqlock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock ()) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ op_ret = 0;
+ need_inode_unref = _gf_true;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (reqlock);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ goto out;
+ }
+ }
+ }
switch (cmd) {
case ENTRYLK_LOCK_NB:
@@ -678,9 +790,6 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
"a bug report at http://bugs.gluster.com", cmd);
goto out;
}
- if (need_inode_unref)
- inode_unref (pinode->inode);
-
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -689,6 +798,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
out:
+ if (need_inode_unref)
+ inode_unref (pinode->inode);
+
if (unwind) {
entrylk_trace_out (this, frame, volume, fd, loc, basename,
cmd, type, op_ret, op_errno);
@@ -772,8 +884,6 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
client_list) {
- list_del_init (&l->client_list);
-
pl_entrylk_log_cleanup (l);
pinode = l->pinode;
@@ -810,6 +920,8 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
* blocked to avoid leaving L1 to starve forever.
* iv. unref the object.
*/
+ list_del_init (&l->client_list);
+
if (!list_empty (&l->domain_list)) {
list_del_init (&l->domain_list);
list_add_tail (&l->client_list,
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 1564f26b8fb..e1702c78ba1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -16,6 +16,7 @@
#include "list.h"
#include "locks.h"
+#include "clear.h"
#include "common.h"
void
@@ -130,6 +131,105 @@ inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
inodelk_type_conflict (l1, l2));
}
+/*
+ * Check to see if the candidate lock overlaps/conflicts with the
+ * requested lock. If so, determine how old the lock is and return
+ * true if it exceeds the configured threshold, false otherwise.
+ */
+static inline gf_boolean_t
+__stale_inodelk (xlator_t *this, pl_inode_lock_t *candidate_lock,
+ pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
+{
+ posix_locks_private_t *priv = NULL;
+ struct timeval curr;
+
+ priv = this->private;
+ gettimeofday (&curr, NULL);
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (inodelk_conflict (candidate_lock, requested_lock)) {
+ *lock_age_sec = curr.tv_sec -
+ candidate_lock->granted_time.tv_sec;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* Examine any locks held on this inode and potentially revoke the lock
+ * if the age exceeds revocation_secs. We will clear _only_ those locks
+ * which are granted, and then grant those locks which are blocked.
+ *
+ * Depending on how this patch works in the wild, we may expand this and
+ * introduce a heuristic which clears blocked locks as well if they
+ * are beyond a threshold.
+ */
+static gf_boolean_t
+__inodelk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_inode_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+
+ args.type = CLRLK_INODE;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+ if (list_empty (&dom->inodelk_list))
+ goto out;
+
+ pthread_mutex_lock (&pinode->mutex);
+ list_for_each_entry_safe (lk, tmp, &dom->inodelk_list, list) {
+ if (__stale_inodelk (this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
+
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe (lk, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_inodelk (this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log (this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Inode lock revoked: %d granted & %d "
+ "blocked locks cleared",
+ reason_str, uuid_utoa (pinode->gfid), dom->domain,
+ lk_age_sec, gcount, bcount);
+ }
+ return revoke_lock;
+}
+
/* Determine if lock is grantable or not */
static pl_inode_lock_t *
__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
@@ -419,8 +519,6 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
client_list) {
- list_del_init (&l->client_list);
-
pl_inodelk_log_cleanup (l);
pl_inode = l->pl_inode;
@@ -458,6 +556,8 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
* forever.
* iv. unref the object.
*/
+ list_del_init (&l->client_list);
+
if (!list_empty (&l->list)) {
__delete_inode_lock (l);
list_add_tail (&l->client_list,
@@ -509,6 +609,7 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
inode_t *inode)
{
+ posix_locks_private_t *priv = NULL;
int ret = -EINVAL;
pl_inode_lock_t *retlock = NULL;
gf_boolean_t unref = _gf_true;
@@ -518,6 +619,8 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->pl_inode = pl_inode;
fl_type = lock->fl_type;
+ priv = this->private;
+
/* Ideally, AFTER a successful lock (both blocking and non-blocking) or
* an unsuccessful blocking lock operation, the inode needs to be ref'd.
*
@@ -537,6 +640,24 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
*/
pl_inode->inode = inode_ref (inode);
+ if (priv->revocation_secs != 0) {
+ if (lock->fl_type != F_UNLCK) {
+ __inodelk_prune_stale (this, pl_inode, dom, lock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock ()) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (lock);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ inode_unref (pl_inode->inode);
+ gf_log (this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ return 0;
+ }
+ }
+ }
+
if (ctx)
pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pl_inode->mutex);
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index e363f425b65..8eb35da44be 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -190,6 +190,10 @@ typedef struct {
mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
gf_boolean_t trace; /* trace lock requests in and out */
char *brickname;
+ gf_boolean_t monkey_unlocking;
+ uint32_t revocation_secs;
+ gf_boolean_t revocation_clear_all;
+ uint32_t revocation_max_blocked;
} posix_locks_private_t;
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index dff17e70aaf..8a142c9991a 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -3628,7 +3628,21 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("trace", priv->trace, options, bool, out);
+ GF_OPTION_RECONF ("monkey-unlocking", priv->monkey_unlocking, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("revocation-secs",
+ priv->revocation_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF ("revocation-clear-all", priv->revocation_clear_all,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("revocation-max-blocked",
+ priv->revocation_max_blocked, options,
+ uint32, out);
ret = 0;
+
out:
return ret;
}
@@ -3679,6 +3693,18 @@ init (xlator_t *this)
GF_OPTION_INIT ("trace", priv->trace, bool, out);
+ GF_OPTION_INIT ("monkey-unlocking", priv->monkey_unlocking,
+ bool, out);
+
+ GF_OPTION_INIT ("revocation-secs", priv->revocation_secs,
+ uint32, out);
+
+ GF_OPTION_INIT ("revocation-clear-all", priv->revocation_clear_all,
+ bool, out);
+
+ GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked,
+ uint32, out);
+
this->local_pool = mem_pool_new (pl_local_t, 32);
if (!this->local_pool) {
ret = -1;
@@ -3935,5 +3961,35 @@ struct volume_options options[] = {
.description = "Trace the different lock requests "
"to logs."
},
+ { .key = { "monkey-unlocking" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "Ignore a random number of unlock requests. Useful "
+ "for testing/creating robust lock recovery mechanisms."
+ },
+ { .key = {"revocation-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .description = "Maximum time a lock can be taken out, before"
+ "being revoked.",
+ },
+ { .key = {"revocation-clear-all"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "If set to true, will revoke BOTH granted and blocked "
+ "(pending) lock requests if a revocation threshold is "
+ "hit.",
+ },
+ { .key = {"revocation-max-blocked"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .description = "A number of blocked lock requests after which a lock "
+ "will be revoked to allow the others to proceed. Can "
+ "be used in conjunction w/ revocation-clear-all."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 6de83a7ef4c..2e6aa378097 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2998,6 +2998,27 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_8_0,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .option = "revocation-secs",
+ .key = "features.locks-revocation-secs",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "revocation-clear-all",
+ .key = "features.locks-revocation-clear-all",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "revocation-max-blocked",
+ .key = "features.locks-revocation-max-blocked",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "monkey-unlocking",
+ .key = "features.locks-monkey-unlocking",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .type = NO_DOC,
+ },
{ .key = NULL
}
};