From 559fd060c59edec69ba66be7e0a447c8e0408d51 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Tue, 11 Feb 2020 14:34:48 +0530 Subject: afr: prevent spurious entry heals leading to gfid split-brain Problem: In a hyperconverged setup with granular-entry-heal enabled, if a file is recreated while one of the bricks is down, and an index heal is triggered (with the brick still down), entry-self heal was doing a spurious heal with just the 2 good bricks. It was doing a post-op leading to removal of the filename from .glusterfs/indices/entry-changes as well as erroneous setting of afr xattrs on the parent. When the brick came up, the xattrs were cleared, resulting in the renamed file not getting healed and leading to gfid split-brain and EIO on the mount. Fix: Proceed with entry heal only when shd can connect to all bricks of the replica, just like in data and metadata heal. fixes: bz#1804594 Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e Signed-off-by: Ravishankar N (cherry picked from commit 06453d77d056fbaa393a137ca277a20e38d2f67e) --- xlators/cluster/afr/src/afr-self-heal-entry.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'xlators/cluster/afr/src/afr-self-heal-entry.c') diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 810eeb05f9a..5475fca6342 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -584,7 +584,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes " @@ -973,7 +973,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, data_lock); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes could " @@ -1097,7 +1097,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, NULL, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes could " -- cgit