From 8deedef565df49def75083678f8d1558c7b1f7d3 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Thu, 17 Mar 2016 19:42:00 +0530 Subject: cluster/afr: Fix partial heals in 3-way replication Problem: When there are 2 sources and one sink and if two self-heal daemons try to acquire locks at the same time, there is a chance that it gets a lock on one source and sink leading partial to heal. This will need one more heal from the remaining source to sink for the complete self-heal. This is not optimal. Fix: Upgrade non-blocking locks to blocking lock on all the subvolumes, if the number of locks acquired is majority and there were eagains. BUG: 1318751 Change-Id: Iae10b8d3402756c4164b98cc49876056ff7a61e5 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/13766 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Ravishankar N --- xlators/cluster/afr/src/afr-self-heal-entry.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'xlators/cluster/afr/src/afr-self-heal-entry.c') diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index e4d616ea20a..c8e2c98db0e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -699,7 +699,7 @@ postop_unlock: out: if (did_sh) afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source, - healed_sinks); + sources, healed_sinks); else ret = 1; @@ -754,8 +754,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode) locked_on = alloca0 (priv->child_count); long_name_locked = alloca0 (priv->child_count); - ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL, - locked_on); + ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode, + priv->sh_domain, NULL, + locked_on); { if (ret < AFR_SH_MIN_PARTICIPANTS) { gf_msg_debug (this->name, 0, "%s: Skipping " -- cgit