path: root/xlators/cluster/afr/src/afr-self-heal-entry.c
diff options
authorPranith Kumar K <>2016-12-05 13:20:51 +0530
committerPranith Kumar Karampuri <>2016-12-07 00:51:56 -0800
commitc4b39198df40535f589c9304fd07b06d948df2f5 (patch)
tree354d9790aa5f9ce55e9c0ec13601c615116e91d0 /xlators/cluster/afr/src/afr-self-heal-entry.c
parenta7d7ed90c9272a42168a91f92754d3a4be605da5 (diff)
cluster/afr: Remove backward compatibility for locks with v1
When we have cascading locks with same lk-owner there is a possibility for a deadlock to happen. One example is as follows: self-heal takes a lock in data-domain for big name with 256 chars of "aaaa...a" and starts heal in a 3-way replication when brick-0 is offline and healing from brick-1 to brick-2 is in progress. So this lock is active on brick-1 and brick-2. Now brick-0 comes online and an operation wants to take full lock and the lock is granted at brick-0 and it is waiting for lock on brick-1. As part of entry healing it takes full locks on all the available bricks and then proceeds with healing the entry. Now this lock will start waiting on brick-0 because some other operation already has a granted lock on it. This leads to a deadlock. Operation is waiting for unlock on "aaaa..." by heal where as heal is waiting for the operation to unlock on brick-0. Initially I thought this is happening because healing is trying to take a lock on all the available bricks instead of just the bricks that are participating in heal. But later realized that same kind of deadlock can happen if a brick goes down after the heal starts but comes back before it completes. So the essential problem is the cascading locks with same lk-owner which were added for backward compatibility with afr-v1 which can be safely removed now that versions with afr-v1 are already EOL. This patch removes the compatibility with v1 which requires cascading locks with same lk-owner. In the next version we can make locking-scheme option a dummy and switch completely to v2. BUG: 1401404 Change-Id: Ic9afab8260f5ff4dff5329eb0429811bcb879079 Signed-off-by: Pranith Kumar K <> Reviewed-on: Smoke: Gluster Build System <> Reviewed-by: Ravishankar N <> NetBSD-regression: NetBSD Build System <> CentOS-regression: Gluster Build System <>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-entry.c')
1 files changed, 1 insertions, 33 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index d8fe5422372..08a45787027 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -21,11 +21,6 @@
* this name can ever come, entry-lock with this name is going to prevent
* self-heals from older versions while the granular entry-self-heal is going
* on in newer version.*/
-#define LONG_FILENAME "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"\
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
static int
afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,
@@ -1050,21 +1045,16 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
afr_private_t *priv = NULL;
unsigned char *locked_on = NULL;
- unsigned char *long_name_locked = NULL;
fd_t *fd = NULL;
int ret = 0;
- gf_boolean_t granular_locks = _gf_false;
priv = this->private;
- if (strcmp ("granular", priv->locking_scheme) == 0)
- granular_locks = _gf_true;
fd = afr_selfheal_data_opendir (this, inode);
if (!fd)
return -EIO;
locked_on = alloca0 (priv->child_count);
- long_name_locked = alloca0 (priv->child_count);
ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode,
priv->sh_domain, NULL,
@@ -1084,29 +1074,7 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
goto unlock;
- if (!granular_locks) {
- ret = afr_selfheal_tryentrylk (frame, this, inode,
- this->name, LONG_FILENAME,
- long_name_locked);
- }
- {
- if (!granular_locks && ret < 1) {
- gf_msg_debug (this->name, 0, "%s: Skipping"
- " entry self-heal as only %d "
- "sub-volumes could be "
- "locked in special-filename "
- "domain",
- uuid_utoa (fd->inode->gfid),
- ret);
- ret = -ENOTCONN;
- goto unlock;
- }
- ret = __afr_selfheal_entry (frame, this, fd, locked_on);
- }
- if (!granular_locks)
- afr_selfheal_unentrylk (frame, this, inode, this->name,
- LONG_FILENAME, long_name_locked,
- NULL);
+ ret = __afr_selfheal_entry (frame, this, fd, locked_on);
afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,