summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-03-05 11:29:25 +0000
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-03-17 09:14:44 -0700
commitc8be9af5f54aa3e42dffee4024d2915743258242 (patch)
tree8b33b576cc37b318d77357c38494320648f95711 /xlators/cluster/afr
parent0d2bed70faed3c63f25ed9269dc55562973ef9b7 (diff)
afr: remove stale index entries
Problem: During pre-op phase, the index xlator 1. Creates the entry inside .glusterfs/indices/xattrop 2. Winds the xattrop fop to posix to mark dirty/pending changelogs. If the brick crashes after 1, the xattrop entry becomes stale and never gets removed by shd during subsequent crawls because there is nothing to heal (changelogs are zero). Though the stale entry does not get displayed in the output of 'heal info' command, it nevertheless stays there forever unless a new write transaction is performed on the file. Fix: During index self-heal if afr xattrs are found to be clean (indicated by ret value of 2 on a call to afr_shd_selfheal(), send a dummy post-op with all 0s for the xattr values, which makes the index xlator to unlink the stale entry. Change-Id: I02cb2bc937f2e3f3f3cb35d67b006664dc7ef919 BUG: 1190069 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/9714 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anuradha Talur <atalur@redhat.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c14
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c54
3 files changed, 66 insertions, 4 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 92ecb7fb99b..f7cc202d4d1 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4574,7 +4574,7 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
ret = afr_selfheal_do (frame, this, loc->gfid);
- if (ret == 1) {
+ if (ret == 1 || ret == 2) {
ret = dict_set_str (dict, "sh-fail-msg",
"File not in split-brain");
if (ret)
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index fd450be0890..2441f413f3e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1330,6 +1330,11 @@ afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid)
if (ret)
goto out;
+ if (!(data_selfheal || metadata_selfheal || entry_selfheal)) {
+ ret = 2;
+ goto out;
+ }
+
if (data_selfheal)
data_ret = afr_selfheal_data (frame, this, inode);
@@ -1358,9 +1363,12 @@ out:
return ret;
}
/*
- * This is the entry point for healing a given GFID
- * The function returns 0 if self-heal was successful, appropriate errno
- * in case of a failure and 1 in case self-heal was never needed on the gfid.
+ * This is the entry point for healing a given GFID. The return values for this
+ * function are as follows:
+ * '0' if the self-heal is successful
+ * '1' if the afr-xattrs are non-zero (due to on-going IO) and no heal is needed
+ * '2' if the afr-xattrs are all-zero and no heal is needed
+ * $errno if the heal on the gfid failed.
*/
int
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index cb5bf6ce197..c38440b4c9b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -238,6 +238,53 @@ afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+void
+afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid)
+{
+
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ int i = 0;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+
+ priv = this->private;
+ frame = afr_frame_create (this);
+ if (!frame)
+ goto out;
+ inode = afr_inode_find (this, gfid);
+ if (!inode)
+ goto out;
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+ ret = dict_set_static_bin (xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ }
+
+ /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or
+ * has valid repies for this gfid seems a bit of an overkill.*/
+ for (i = 0; i < priv->child_count; i++)
+ afr_selfheal_post_op (frame, this, inode, i, xattr);
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+ if (inode)
+ inode_unref (inode);
+ if (xattr)
+ dict_unref (xattr);
+ return;
+}
int
afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent,
@@ -369,6 +416,13 @@ afr_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
if (ret == -ENOENT || ret == -ESTALE)
afr_shd_index_purge (subvol, parent->inode, entry->d_name);
+ if (ret == 2)
+ /* If bricks crashed in pre-op after creating indices/xattrop
+ * link but before setting afr changelogs, we end up with stale
+ * xattrop links but zero changelogs. Remove such entries by
+ * sending a post-op with zero changelogs.
+ */
+ afr_shd_zero_xattrop (healer->this, gfid);
return 0;
}