summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2015-02-19 19:35:17 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-03-04 16:37:56 -0800
commit4a8384fe76aeef652554c48df737d3178eb0160c (patch)
treeac00907a88517c7ad4e23ff7af516de08752422a /xlators/cluster
parenta42db19a5dd796f5387b0765f5fc05cef462df22 (diff)
cluster/afr: Do not increment healed_count if no healing was performed
PROBLEM: When file modifications are happening while index heal is launched, index healer could pick up entries which appeared in indices/xattrop transiently during the course of the operations on the mount point, and do not really need any heal. This will cause index healer to keep doing index-heal in a loop as long as it finds this entry, by believing that it did successfully heal some gfids even when it didn't. FIX: afr_selfheal() now returns a 1 to indicate that it did not (need to) heal a given gfid. afr_shd_selfheal() will not increment healed_count whenever afr_selfheal() returns a 1. Change-Id: I0d97e11392a032a852e8c6508f691300ef0e5b98 BUG: 1194305 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/9713 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Reviewed-by: Ravishankar N <ravishankar@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c54
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c59
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c16
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c15
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c10
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h3
-rw-r--r--xlators/cluster/afr/src/pump.c2
7 files changed, 92 insertions, 67 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index f954b1399db..c31fcba6c3b 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4617,14 +4617,10 @@ out:
int32_t
afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- gf_boolean_t data_selfheal = _gf_false;
- gf_boolean_t metadata_selfheal = _gf_false;
- gf_boolean_t entry_selfheal = _gf_false;
+ int ret = 0;
+ int op_errno = 0;
dict_t *dict = NULL;
afr_local_t *local = NULL;
- inode_t *inode = NULL;
- int entry_ret = 0, metadata_ret = 0, data_ret = 0;
- int ret = 0, op_errno = 0;
local = frame->local;
dict = dict_new ();
@@ -4634,17 +4630,9 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- ret = afr_selfheal_unlocked_inspect (frame, this, loc->gfid, &inode,
- &data_selfheal,
- &metadata_selfheal,
- &entry_selfheal);
- if (ret) {
- op_errno = -ret;
- ret = -1;
- goto out;
- }
+ ret = afr_selfheal_do (frame, this, loc->gfid);
- if (!data_selfheal && !metadata_selfheal && !entry_selfheal) {
+ if (ret == 1) {
ret = dict_set_str (dict, "sh-fail-msg",
"File not in split-brain");
if (ret)
@@ -4652,36 +4640,20 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
"Failed to set sh-fail-msg in dict");
ret = 0;
goto out;
- }
-
- if (data_selfheal)
- data_ret = afr_selfheal_data (frame, this, inode);
-
- if (metadata_selfheal)
- metadata_ret = afr_selfheal_metadata (frame, this, inode);
-
- if (entry_selfheal)
- entry_ret = afr_selfheal_entry (frame, this, inode);
-
- ret = (data_ret | metadata_ret | entry_ret);
-
- if (local->xdata_rsp) {
- /* 'sh-fail-msg' has been set in the dict during self-heal.*/
- dict_copy (local->xdata_rsp, dict);
- ret = 0;
- } else if (ret) {
- /*Some other error during self-heal. Just propagate it.*/
- op_errno = -ret;
- ret = -1;
+ } else {
+ if (local->xdata_rsp) {
+ /* 'sh-fail-msg' has been set in the dict during self-heal.*/
+ dict_copy (local->xdata_rsp, dict);
+ ret = 0;
+ } else if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ }
}
out:
AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL);
if (dict)
dict_unref(dict);
- if (inode) {
- inode_forget (inode, 1);
- inode_unref (inode);
- }
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index e9d853c4ecd..74d340bc808 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1310,23 +1310,18 @@ out:
return ret;
}
-/*
- * This is the entry point for healing a given GFID
- */
-
int
-afr_selfheal (xlator_t *this, uuid_t gfid)
+afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid)
{
- inode_t *inode = NULL;
- call_frame_t *frame = NULL;
- int ret = -1, entry_ret = 0, metadata_ret = 0, data_ret = 0;
- gf_boolean_t data_selfheal = _gf_false;
- gf_boolean_t metadata_selfheal = _gf_false;
- gf_boolean_t entry_selfheal = _gf_false;
-
- frame = afr_frame_create (this);
- if (!frame)
- goto out;
+ int ret = -1;
+ int entry_ret = 1;
+ int metadata_ret = 1;
+ int data_ret = 1;
+ int or_ret = 0;
+ inode_t *inode = NULL;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
ret = afr_selfheal_unlocked_inspect (frame, this, gfid, &inode,
&data_selfheal,
@@ -1344,14 +1339,42 @@ afr_selfheal (xlator_t *this, uuid_t gfid)
if (entry_selfheal)
entry_ret = afr_selfheal_entry (frame, this, inode);
+ or_ret = (data_ret | metadata_ret | entry_ret);
+
if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO)
ret = -EIO;
+ else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1)
+ ret = 1;
+ else if (or_ret < 0)
+ ret = or_ret;
else
- ret = (data_ret | metadata_ret | entry_ret);
+ ret = 0;
- inode_forget (inode, 1);
- inode_unref (inode);
out:
+ if (inode) {
+ inode_forget (inode, 1);
+ inode_unref (inode);
+ }
+ return ret;
+}
+/*
+ * This is the entry point for healing a given GFID
+ * The function returns 0 if self-heal was successful, appropriate errno
+ * in case of a failure and 1 in case self-heal was never needed on the gfid.
+ */
+
+int
+afr_selfheal (xlator_t *this, uuid_t gfid)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+
+ frame = afr_frame_create (this);
+ if (!frame)
+ return ret;
+
+ ret = afr_selfheal_do (frame, this, gfid);
+
if (frame)
AFR_STACK_DESTROY (frame);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 1b1d57d0048..c0f444a776e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -645,6 +645,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct afr_reply *locked_replies = NULL;
int source = -1;
gf_boolean_t compat = _gf_false;
+ gf_boolean_t did_sh = _gf_true;
unsigned char *compat_lock = NULL;
priv = this->private;
@@ -676,6 +677,11 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (ret < 0)
goto unlock;
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
source = ret;
ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
@@ -701,6 +707,9 @@ unlock:
if (ret < 0)
goto out;
+ if (!did_sh)
+ goto out;
+
ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks,
locked_replies);
if (ret)
@@ -714,8 +723,11 @@ out:
afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
LLONG_MAX - 2, 1, compat_lock);
- afr_log_selfheal (fd->inode->gfid, this, ret, "data", source,
- healed_sinks);
+ if (did_sh)
+ afr_log_selfheal (fd->inode->gfid, this, ret, "data", source,
+ healed_sinks);
+ else
+ ret = 1;
if (locked_replies)
afr_replies_wipe (locked_replies, priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 6af9488f9a4..13825e1ebad 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -578,6 +578,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
unsigned char *healed_sinks = NULL;
struct afr_reply *locked_replies = NULL;
afr_private_t *priv = NULL;
+ gf_boolean_t did_sh = _gf_true;
priv = this->private;
@@ -606,6 +607,10 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
data_lock, sources, sinks,
healed_sinks,
locked_replies, &source);
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
}
unlock:
afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
@@ -613,6 +618,9 @@ unlock:
if (ret < 0)
goto out;
+ if (!did_sh)
+ goto out;
+
ret = afr_selfheal_entry_do (frame, this, fd, source, sources,
healed_sinks);
if (ret)
@@ -648,8 +656,11 @@ postop_unlock:
afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
postop_lock);
out:
- afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source,
- healed_sinks);
+ if (did_sh)
+ afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source,
+ healed_sinks);
+ else
+ ret = 1;
if (locked_replies)
afr_replies_wipe (locked_replies, priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index cd8bb688a11..7333f939ae5 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -377,6 +377,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
unsigned char *data_lock = NULL;
unsigned char *healed_sinks = NULL;
struct afr_reply *locked_replies = NULL;
+ gf_boolean_t did_sh = _gf_true;
int source = -1;
priv = this->private;
@@ -406,7 +407,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
source = ret;
if (AFR_COUNT (healed_sinks, priv->child_count) == 0) {
- ret = -ENOTCONN;
+ did_sh = _gf_false;
goto unlock;
}
@@ -424,8 +425,11 @@ unlock:
afr_selfheal_uninodelk (frame, this, inode, this->name,
LLONG_MAX -1, 0, data_lock);
- afr_log_selfheal (inode->gfid, this, ret, "metadata", source,
- healed_sinks);
+ if (did_sh)
+ afr_log_selfheal (inode->gfid, this, ret, "metadata", source,
+ healed_sinks);
+ else
+ ret = 1;
if (locked_replies)
afr_replies_wipe (locked_replies, priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 74cc9608cf6..32be2480234 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -245,4 +245,7 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal,
gf_boolean_t *entry_selfheal);
+
+int
+afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 747b5ba2592..70038b9406e 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -531,7 +531,7 @@ gf_pump_traverse_directory (loc_t *loc)
}
ret = afr_selfheal (this, iatt.ia_gfid);
- if (ret) {
+ if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"%s: self-heal failed (%s)",
entry_loc.path, uuid_utoa (iatt.ia_gfid));