From 4a8384fe76aeef652554c48df737d3178eb0160c Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Thu, 19 Feb 2015 19:35:17 +0530 Subject: cluster/afr: Do not increment healed_count if no healing was performed PROBLEM: When file modifications are happening while index heal is launched, index healer could pick up entries which appeared in indices/xattrop transiently during the course of the operations on the mount point, and do not really need any heal. This will cause index healer to keep doing index-heal in a loop as long as it finds this entry, by believing that it did successfully heal some gfids even when it didn't. FIX: afr_selfheal() now returns a 1 to indicate that it did not (need to) heal a given gfid. afr_shd_selfheal() will not increment healed_count whenever afr_selfheal() returns a 1. Change-Id: I0d97e11392a032a852e8c6508f691300ef0e5b98 BUG: 1194305 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/9713 Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri Reviewed-by: Ravishankar N Tested-by: Gluster Build System --- xlators/cluster/afr/src/afr-common.c | 54 ++++++---------------- xlators/cluster/afr/src/afr-self-heal-common.c | 59 ++++++++++++++++-------- xlators/cluster/afr/src/afr-self-heal-data.c | 16 ++++++- xlators/cluster/afr/src/afr-self-heal-entry.c | 15 +++++- xlators/cluster/afr/src/afr-self-heal-metadata.c | 10 ++-- xlators/cluster/afr/src/afr-self-heal.h | 3 ++ xlators/cluster/afr/src/pump.c | 2 +- 7 files changed, 92 insertions(+), 67 deletions(-) (limited to 'xlators/cluster/afr/src') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f954b1399db..c31fcba6c3b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4617,14 +4617,10 @@ out: int32_t afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc) { - gf_boolean_t data_selfheal = _gf_false; - gf_boolean_t metadata_selfheal = _gf_false; - gf_boolean_t entry_selfheal = _gf_false; + int ret = 0; + int op_errno = 0; dict_t *dict = NULL; afr_local_t *local = NULL; - inode_t *inode = NULL; - int entry_ret = 0, metadata_ret = 0, data_ret = 0; - int ret = 0, op_errno = 0; local = frame->local; dict = dict_new (); @@ -4634,17 +4630,9 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc) goto out; } - ret = afr_selfheal_unlocked_inspect (frame, this, loc->gfid, &inode, - &data_selfheal, - &metadata_selfheal, - &entry_selfheal); - if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } + ret = afr_selfheal_do (frame, this, loc->gfid); - if (!data_selfheal && !metadata_selfheal && !entry_selfheal) { + if (ret == 1) { ret = dict_set_str (dict, "sh-fail-msg", "File not in split-brain"); if (ret) @@ -4652,36 +4640,20 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc) "Failed to set sh-fail-msg in dict"); ret = 0; goto out; - } - - if (data_selfheal) - data_ret = afr_selfheal_data (frame, this, inode); - - if (metadata_selfheal) - metadata_ret = afr_selfheal_metadata (frame, this, inode); - - if (entry_selfheal) - entry_ret = afr_selfheal_entry (frame, this, inode); - - ret = (data_ret | metadata_ret | entry_ret); - - if (local->xdata_rsp) { - /* 'sh-fail-msg' has been set in the dict during self-heal.*/ - dict_copy (local->xdata_rsp, dict); - ret = 0; - } else if (ret) { - /*Some other error during self-heal. Just propagate it.*/ - op_errno = -ret; - ret = -1; + } else { + if (local->xdata_rsp) { + /* 'sh-fail-msg' has been set in the dict during self-heal.*/ + dict_copy (local->xdata_rsp, dict); + ret = 0; + } else if (ret < 0) { + op_errno = -ret; + ret = -1; + } } out: AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL); if (dict) dict_unref(dict); - if (inode) { - inode_forget (inode, 1); - inode_unref (inode); - } return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index e9d853c4ecd..74d340bc808 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1310,23 +1310,18 @@ out: return ret; } -/* - * This is the entry point for healing a given GFID - */ - int -afr_selfheal (xlator_t *this, uuid_t gfid) +afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid) { - inode_t *inode = NULL; - call_frame_t *frame = NULL; - int ret = -1, entry_ret = 0, metadata_ret = 0, data_ret = 0; - gf_boolean_t data_selfheal = _gf_false; - gf_boolean_t metadata_selfheal = _gf_false; - gf_boolean_t entry_selfheal = _gf_false; - - frame = afr_frame_create (this); - if (!frame) - goto out; + int ret = -1; + int entry_ret = 1; + int metadata_ret = 1; + int data_ret = 1; + int or_ret = 0; + inode_t *inode = NULL; + gf_boolean_t data_selfheal = _gf_false; + gf_boolean_t metadata_selfheal = _gf_false; + gf_boolean_t entry_selfheal = _gf_false; ret = afr_selfheal_unlocked_inspect (frame, this, gfid, &inode, &data_selfheal, @@ -1344,14 +1339,42 @@ afr_selfheal (xlator_t *this, uuid_t gfid) if (entry_selfheal) entry_ret = afr_selfheal_entry (frame, this, inode); + or_ret = (data_ret | metadata_ret | entry_ret); + if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO) ret = -EIO; + else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1) + ret = 1; + else if (or_ret < 0) + ret = or_ret; else - ret = (data_ret | metadata_ret | entry_ret); + ret = 0; - inode_forget (inode, 1); - inode_unref (inode); out: + if (inode) { + inode_forget (inode, 1); + inode_unref (inode); + } + return ret; +} +/* + * This is the entry point for healing a given GFID + * The function returns 0 if self-heal was successful, appropriate errno + * in case of a failure and 1 in case self-heal was never needed on the gfid. + */ + +int +afr_selfheal (xlator_t *this, uuid_t gfid) +{ + int ret = -1; + call_frame_t *frame = NULL; + + frame = afr_frame_create (this); + if (!frame) + return ret; + + ret = afr_selfheal_do (frame, this, gfid); + if (frame) AFR_STACK_DESTROY (frame); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 1b1d57d0048..c0f444a776e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -645,6 +645,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, struct afr_reply *locked_replies = NULL; int source = -1; gf_boolean_t compat = _gf_false; + gf_boolean_t did_sh = _gf_true; unsigned char *compat_lock = NULL; priv = this->private; @@ -676,6 +677,11 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret < 0) goto unlock; + if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { + did_sh = _gf_false; + goto unlock; + } + source = ret; ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks, @@ -701,6 +707,9 @@ unlock: if (ret < 0) goto out; + if (!did_sh) + goto out; + ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks, locked_replies); if (ret) @@ -714,8 +723,11 @@ out: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, LLONG_MAX - 2, 1, compat_lock); - afr_log_selfheal (fd->inode->gfid, this, ret, "data", source, - healed_sinks); + if (did_sh) + afr_log_selfheal (fd->inode->gfid, this, ret, "data", source, + healed_sinks); + else + ret = 1; if (locked_replies) afr_replies_wipe (locked_replies, priv->child_count); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 6af9488f9a4..13825e1ebad 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -578,6 +578,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *healed_sinks = NULL; struct afr_reply *locked_replies = NULL; afr_private_t *priv = NULL; + gf_boolean_t did_sh = _gf_true; priv = this->private; @@ -606,6 +607,10 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, data_lock, sources, sinks, healed_sinks, locked_replies, &source); + if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { + did_sh = _gf_false; + goto unlock; + } } unlock: afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL, @@ -613,6 +618,9 @@ unlock: if (ret < 0) goto out; + if (!did_sh) + goto out; + ret = afr_selfheal_entry_do (frame, this, fd, source, sources, healed_sinks); if (ret) @@ -648,8 +656,11 @@ postop_unlock: afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL, postop_lock); out: - afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source, - healed_sinks); + if (did_sh) + afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source, + healed_sinks); + else + ret = 1; if (locked_replies) afr_replies_wipe (locked_replies, priv->child_count); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index cd8bb688a11..7333f939ae5 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -377,6 +377,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) unsigned char *data_lock = NULL; unsigned char *healed_sinks = NULL; struct afr_reply *locked_replies = NULL; + gf_boolean_t did_sh = _gf_true; int source = -1; priv = this->private; @@ -406,7 +407,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) source = ret; if (AFR_COUNT (healed_sinks, priv->child_count) == 0) { - ret = -ENOTCONN; + did_sh = _gf_false; goto unlock; } @@ -424,8 +425,11 @@ unlock: afr_selfheal_uninodelk (frame, this, inode, this->name, LLONG_MAX -1, 0, data_lock); - afr_log_selfheal (inode->gfid, this, ret, "metadata", source, - healed_sinks); + if (did_sh) + afr_log_selfheal (inode->gfid, this, ret, "metadata", source, + healed_sinks); + else + ret = 1; if (locked_replies) afr_replies_wipe (locked_replies, priv->child_count); diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 74cc9608cf6..32be2480234 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -245,4 +245,7 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, gf_boolean_t *data_selfheal, gf_boolean_t *metadata_selfheal, gf_boolean_t *entry_selfheal); + +int +afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 747b5ba2592..70038b9406e 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -531,7 +531,7 @@ gf_pump_traverse_directory (loc_t *loc) } ret = afr_selfheal (this, iatt.ia_gfid); - if (ret) { + if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "%s: self-heal failed (%s)", entry_loc.path, uuid_utoa (iatt.ia_gfid)); -- cgit