summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorRichard Wareing <rwareing@fb.com>2015-11-20 10:59:00 -0800
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-07-18 02:30:38 -0700
commit8cbee639520bf4631ce658e2da9b4bc3010d2eaa (patch)
tree77f55db668591926f95f7104bd835035badf3768 /xlators
parentf630fb742a603083a6adc610296458bcd2e57062 (diff)
features/locks: Add lock revocation functionality to posix locks translator
Summary: - Motivation: Prevents cluster instability by mis-behaving clients causing bricks to OOM due to inode/entry lock pile-ups. - Adds option to strip clients of entry/inode locks after N seconds - Adds option to clear ALL locks should the revocation threshold get hit - Adds option to clear all or granted locks should the max-blocked threshold get hit (can be used in combination w/ revocation-clear-all). - Options are: features.locks-revocation-secs <integer; 0 to disable> features.locks-revocation-clear-all [on/off] features.locks-revocation-max-blocked <integer> - Adds monkey-locking option to ignore 1% of unlock requests (dev only) features.locks-monkey-unlocking [on/off] - Adds logging to indicate revocation event & reason Test Plan: First you will need TWO fuse mounts for this repro. Call them /mnt/patchy1 & /mnt/patchy2. 1. Enable monkey unlocking on the volume: gluster vol set patchy features.locks-monkey-unlocking on 2. From the "patchy1", use DD or some other utility to begin writing to a file, eventually the dd will hang due to the dropped unlocked requests. This now simulates the broken client. Run: for i in {1..1000};do dd if=/dev/zero of=/mnt/patchy1/testfile bs=1k count=10;done' ...this will eventually hang as the unlock request has been lost. 3. Goto another window and setup the mount "patchy2" @ /mnt/patchy2, and observe that 'echo "hello" >> /mnt/patchy2/testfile" will hang due to the inability of the client to take out the required lock. 4. Next, re-start the test this time enabling lock revocation; use a timeout of 2-5 seconds for testing: 'gluster vol set patchy features.locks-revocation-secs <2-5>' 5. Wait 2-5 seconds before executing step 3 above this time. Observe that this time the access to the file will succeed, and the writes on patchy1 will unblock until they hit another failed unlock request due to "monkey-unlocking". BUG: 1350867 Change-Id: I814b9f635fec53834a26db634d1300d9a61057d8 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/14816 NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/features/locks/src/clear.c4
-rw-r--r--xlators/features/locks/src/common.c13
-rw-r--r--xlators/features/locks/src/common.h3
-rw-r--r--xlators/features/locks/src/entrylk.c124
-rw-r--r--xlators/features/locks/src/inodelk.c125
-rw-r--r--xlators/features/locks/src/locks.h4
-rw-r--r--xlators/features/locks/src/posix.c56
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c21
8 files changed, 342 insertions, 8 deletions
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 640c6bb5553..d7c210f24a5 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -234,6 +234,7 @@ blkd:
continue;
bcount++;
+ list_del_init (&ilock->client_list);
list_del_init (&ilock->blocked_locks);
list_add (&ilock->blocked_locks, &released);
}
@@ -268,6 +269,7 @@ granted:
continue;
gcount++;
+ list_del_init (&ilock->client_list);
list_del_init (&ilock->list);
list_add (&ilock->list, &released);
}
@@ -321,6 +323,7 @@ blkd:
bcount++;
+ list_del_init (&elock->client_list);
list_del_init (&elock->blocked_locks);
list_add_tail (&elock->blocked_locks, &released);
}
@@ -355,6 +358,7 @@ granted:
}
gcount++;
+ list_del_init (&elock->client_list);
list_del_init (&elock->domain_list);
list_add_tail (&elock->domain_list, &removed);
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 123c1101e94..796b538f6f2 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1121,3 +1121,16 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
return conf;
}
+
+gf_boolean_t
+pl_does_monkey_want_stuck_lock()
+{
+ long int monkey_unlock_rand = 0;
+ long int monkey_unlock_rand_rem = 0;
+
+ monkey_unlock_rand = random ();
+ monkey_unlock_rand_rem = monkey_unlock_rand % 100;
+ if (monkey_unlock_rand_rem == 0)
+ return _gf_true;
+ return _gf_false;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 5486f9b8314..3729ca24bed 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -161,4 +161,7 @@ pl_metalock_is_active (pl_inode_t *pl_inode);
int
__pl_queue_lock (pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block);
+
+gf_boolean_t
+pl_does_monkey_want_stuck_lock();
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 783c57e6381..4231d760cdc 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -16,9 +16,9 @@
#include "list.h"
#include "locks.h"
+#include "clear.h"
#include "common.h"
-
void
__pl_entrylk_unref (pl_entry_lock_t *lock)
{
@@ -111,6 +111,97 @@ __conflicting_entrylks (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
return 0;
}
+/* See comments in inodelk.c for details */
+static inline gf_boolean_t
+__stale_entrylk (xlator_t *this, pl_entry_lock_t *candidate_lock,
+ pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
+{
+ posix_locks_private_t *priv = NULL;
+ struct timeval curr;
+ gettimeofday (&curr, NULL);
+
+ priv = this->private;
+
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (names_conflict (candidate_lock->basename,
+ requested_lock->basename)) {
+ *lock_age_sec = curr.tv_sec -
+ candidate_lock->granted_time.tv_sec;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* See comments in inodelk.c for details */
+static gf_boolean_t
+__entrylk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+ args.type = CLRLK_ENTRY;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+
+ if (list_empty (&dom->entrylk_list))
+ goto out;
+
+ pthread_mutex_lock (&pinode->mutex);
+ lock->pinode = pinode;
+ list_for_each_entry_safe (lk, tmp, &dom->entrylk_list, domain_list) {
+ if (__stale_entrylk (this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe (lk, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_entrylk (this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log (this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Entry lock revoked: %d granted & %d "
+ "blocked locks cleared", reason_str,
+ uuid_utoa (pinode->gfid), dom->domain, lk_age_sec,
+ gcount, bcount);
+ }
+
+ return revoke_lock;
+}
+
/**
* entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
@@ -546,6 +637,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
pl_ctx_t *ctx = NULL;
int nonblock = 0;
gf_boolean_t need_inode_unref = _gf_false;
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -599,6 +693,24 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
* current stack unwinds.
*/
pinode->inode = inode_ref (inode);
+ if (priv->revocation_secs != 0) {
+ if (cmd != ENTRYLK_UNLOCK) {
+ __entrylk_prune_stale (this, pinode, dom, reqlock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock ()) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ op_ret = 0;
+ need_inode_unref = _gf_true;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (reqlock);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ goto out;
+ }
+ }
+ }
switch (cmd) {
case ENTRYLK_LOCK_NB:
@@ -678,9 +790,6 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
"a bug report at http://bugs.gluster.com", cmd);
goto out;
}
- if (need_inode_unref)
- inode_unref (pinode->inode);
-
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -689,6 +798,9 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
out:
+ if (need_inode_unref)
+ inode_unref (pinode->inode);
+
if (unwind) {
entrylk_trace_out (this, frame, volume, fd, loc, basename,
cmd, type, op_ret, op_errno);
@@ -772,8 +884,6 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
client_list) {
- list_del_init (&l->client_list);
-
pl_entrylk_log_cleanup (l);
pinode = l->pinode;
@@ -810,6 +920,8 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
* blocked to avoid leaving L1 to starve forever.
* iv. unref the object.
*/
+ list_del_init (&l->client_list);
+
if (!list_empty (&l->domain_list)) {
list_del_init (&l->domain_list);
list_add_tail (&l->client_list,
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 1564f26b8fb..e1702c78ba1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -16,6 +16,7 @@
#include "list.h"
#include "locks.h"
+#include "clear.h"
#include "common.h"
void
@@ -130,6 +131,105 @@ inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
inodelk_type_conflict (l1, l2));
}
+/*
+ * Check to see if the candidate lock overlaps/conflicts with the
+ * requested lock. If so, determine how old the lock is and return
+ * true if it exceeds the configured threshold, false otherwise.
+ */
+static inline gf_boolean_t
+__stale_inodelk (xlator_t *this, pl_inode_lock_t *candidate_lock,
+ pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
+{
+ posix_locks_private_t *priv = NULL;
+ struct timeval curr;
+
+ priv = this->private;
+ gettimeofday (&curr, NULL);
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (inodelk_conflict (candidate_lock, requested_lock)) {
+ *lock_age_sec = curr.tv_sec -
+ candidate_lock->granted_time.tv_sec;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* Examine any locks held on this inode and potentially revoke the lock
+ * if the age exceeds revocation_secs. We will clear _only_ those locks
+ * which are granted, and then grant those locks which are blocked.
+ *
+ * Depending on how this patch works in the wild, we may expand this and
+ * introduce a heuristic which clears blocked locks as well if they
+ * are beyond a threshold.
+ */
+static gf_boolean_t
+__inodelk_prune_stale (xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_inode_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+
+ args.type = CLRLK_INODE;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+ if (list_empty (&dom->inodelk_list))
+ goto out;
+
+ pthread_mutex_lock (&pinode->mutex);
+ list_for_each_entry_safe (lk, tmp, &dom->inodelk_list, list) {
+ if (__stale_inodelk (this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
+
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe (lk, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_inodelk (this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log (this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Inode lock revoked: %d granted & %d "
+ "blocked locks cleared",
+ reason_str, uuid_utoa (pinode->gfid), dom->domain,
+ lk_age_sec, gcount, bcount);
+ }
+ return revoke_lock;
+}
+
/* Determine if lock is grantable or not */
static pl_inode_lock_t *
__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
@@ -419,8 +519,6 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
client_list) {
- list_del_init (&l->client_list);
-
pl_inodelk_log_cleanup (l);
pl_inode = l->pl_inode;
@@ -458,6 +556,8 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
* forever.
* iv. unref the object.
*/
+ list_del_init (&l->client_list);
+
if (!list_empty (&l->list)) {
__delete_inode_lock (l);
list_add_tail (&l->client_list,
@@ -509,6 +609,7 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
inode_t *inode)
{
+ posix_locks_private_t *priv = NULL;
int ret = -EINVAL;
pl_inode_lock_t *retlock = NULL;
gf_boolean_t unref = _gf_true;
@@ -518,6 +619,8 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->pl_inode = pl_inode;
fl_type = lock->fl_type;
+ priv = this->private;
+
/* Ideally, AFTER a successful lock (both blocking and non-blocking) or
* an unsuccessful blocking lock operation, the inode needs to be ref'd.
*
@@ -537,6 +640,24 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
*/
pl_inode->inode = inode_ref (inode);
+ if (priv->revocation_secs != 0) {
+ if (lock->fl_type != F_UNLCK) {
+ __inodelk_prune_stale (this, pl_inode, dom, lock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock ()) {
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (lock);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ inode_unref (pl_inode->inode);
+ gf_log (this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ return 0;
+ }
+ }
+ }
+
if (ctx)
pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pl_inode->mutex);
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index e363f425b65..8eb35da44be 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -190,6 +190,10 @@ typedef struct {
mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
gf_boolean_t trace; /* trace lock requests in and out */
char *brickname;
+ gf_boolean_t monkey_unlocking;
+ uint32_t revocation_secs;
+ gf_boolean_t revocation_clear_all;
+ uint32_t revocation_max_blocked;
} posix_locks_private_t;
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index dff17e70aaf..8a142c9991a 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -3628,7 +3628,21 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("trace", priv->trace, options, bool, out);
+ GF_OPTION_RECONF ("monkey-unlocking", priv->monkey_unlocking, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("revocation-secs",
+ priv->revocation_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF ("revocation-clear-all", priv->revocation_clear_all,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("revocation-max-blocked",
+ priv->revocation_max_blocked, options,
+ uint32, out);
ret = 0;
+
out:
return ret;
}
@@ -3679,6 +3693,18 @@ init (xlator_t *this)
GF_OPTION_INIT ("trace", priv->trace, bool, out);
+ GF_OPTION_INIT ("monkey-unlocking", priv->monkey_unlocking,
+ bool, out);
+
+ GF_OPTION_INIT ("revocation-secs", priv->revocation_secs,
+ uint32, out);
+
+ GF_OPTION_INIT ("revocation-clear-all", priv->revocation_clear_all,
+ bool, out);
+
+ GF_OPTION_INIT ("revocation-max-blocked", priv->revocation_max_blocked,
+ uint32, out);
+
this->local_pool = mem_pool_new (pl_local_t, 32);
if (!this->local_pool) {
ret = -1;
@@ -3935,5 +3961,35 @@ struct volume_options options[] = {
.description = "Trace the different lock requests "
"to logs."
},
+ { .key = { "monkey-unlocking" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "Ignore a random number of unlock requests. Useful "
+ "for testing/creating robust lock recovery mechanisms."
+ },
+ { .key = {"revocation-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .description = "Maximum time a lock can be taken out, before"
+ "being revoked.",
+ },
+ { .key = {"revocation-clear-all"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "If set to true, will revoke BOTH granted and blocked "
+ "(pending) lock requests if a revocation threshold is "
+ "hit.",
+ },
+ { .key = {"revocation-max-blocked"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .description = "A number of blocked lock requests after which a lock "
+ "will be revoked to allow the others to proceed. Can "
+ "be used in conjunction w/ revocation-clear-all."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 6de83a7ef4c..2e6aa378097 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2998,6 +2998,27 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_8_0,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .option = "revocation-secs",
+ .key = "features.locks-revocation-secs",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "revocation-clear-all",
+ .key = "features.locks-revocation-clear-all",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "revocation-max-blocked",
+ .key = "features.locks-revocation-max-blocked",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ },
+ { .option = "monkey-unlocking",
+ .key = "features.locks-monkey-unlocking",
+ .voltype = "features/locks",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .type = NO_DOC,
+ },
{ .key = NULL
}
};