summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorkarthik-us <ksubrahm@redhat.com>2017-03-09 18:08:28 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2017-04-20 20:38:54 -0400
commit799a2ff8299db6d6dc75f1533f4bd5a3bb72164d (patch)
tree7a35badad1ad8cd42e6a1d8cff61d7522ff7bf03 /xlators/cluster/afr
parent98dc1f08c114adea1f4133c12dff0d4c3d75b30d (diff)
cluster/afr: GFID split brain resolution with favorite-child-policy
Problem: Currently the automatic split brain resolution with favorite child policy is not resolving the GFID split brains. Fix: When there is a GFID split brain and the favorite child policy is set to size/mtime/ctime/majority, based on the policy decide on the source and sinks. Delete the entry from the sinks and recreate it from the source. Mark the appropriate pending attributes and resolve the GFID split brain. When the heal takes place it will complete the pending heals and reset the attributes. Change-Id: Ie30e5373f94ca6f276745d9c3ad662b8acca6946 BUG: 1430719 Signed-off-by: karthik-us <ksubrahm@redhat.com> Reviewed-on: https://review.gluster.org/16878 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Ravishankar N <ravishankar@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c196
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h11
2 files changed, 162 insertions, 45 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 413425ccad9..82ae6432d7d 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -17,6 +17,105 @@
#include "syncop-utils.h"
#include "events.h"
+int
+afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies,
+ int child_count)
+{
+ int j = 0;
+ int i = 0;
+ int src = -1;
+ int votes[child_count];
+
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+
+ votes[i] = 1;
+ for (j = i+1; j < child_count; j++) {
+ if ((!gf_uuid_compare (replies[i].poststat.ia_gfid,
+ replies[j].poststat.ia_gfid)))
+ votes[i]++;
+ if (votes[i] > child_count / 2) {
+ src = i;
+ goto out;
+ }
+ }
+ }
+
+out:
+ return src;
+}
+
+int
+afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src)
+{
+ afr_private_t *priv = NULL;
+ char g1[64] = {0,};
+ char g2[64] = {0,};
+ int up_count = 0;
+
+ priv = this->private;
+ up_count = AFR_COUNT (locked_on, priv->child_count);
+ if (up_count != priv->child_count) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN,
+ "All the bricks should be up to resolve the gfid split "
+ "brain");
+ goto out;
+ }
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ *src = afr_sh_fav_by_size (this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ *src = afr_sh_fav_by_mtime (this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ *src = afr_sh_fav_by_ctime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ if (priv->child_count != 2)
+ *src = afr_selfheal_gfid_mismatch_by_majority (replies,
+ priv->child_count);
+ else
+ *src = -1;
+
+ if (*src == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN, "No majority to resolve "
+ "gfid split brain");
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ if (*src == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and"
+ " %s on %s. Skipping conservative merge on the file.",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1),
+ priv->children[child_idx]->name,
+ uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2),
+ priv->children[src_idx]->name);
+ gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file="
+ "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
+ "child-%d=%s;gfid-%d=%s", this->name,
+ uuid_utoa (pargfid), bname, child_idx,
+ priv->children[child_idx]->name, child_idx,
+ uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1),
+ src_idx, priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2));
+ return -1;
+ }
+ return 0;
+}
+
static int
afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name,
inode_t *inode, int child, struct afr_reply *replies)
@@ -206,13 +305,15 @@ __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
static int
afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
struct afr_reply *replies,
- uuid_t pargfid, char *bname,
- int src_idx)
+ inode_t *inode,
+ uuid_t pargfid,
+ char *bname, int src_idx,
+ unsigned char *locked_on,
+ int *src)
{
- int i = 0;
- char g1[64] = {0,};
- char g2[64] = {0,};
- afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ afr_private_t *priv = NULL;
priv = this->private;
@@ -227,46 +328,33 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
continue;
if (gf_uuid_compare (replies[src_idx].poststat.ia_gfid,
- replies[i].poststat.ia_gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN, "Gfid mismatch "
- "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
- "Skipping conservative merge on the file.",
- uuid_utoa (pargfid), bname,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
- priv->children[i]->name,
- uuid_utoa_r (replies[src_idx].poststat.ia_gfid,
- g2), priv->children[src_idx]->name);
- gf_event (EVENT_AFR_SPLIT_BRAIN,
- "subvol=%s;type=gfid;file=<gfid:%s>/%s>;count=2;"
- "child-%d=%s;gfid-%d=%s;child-%d=%s;gfid-%d=%s",
- this->name, uuid_utoa (pargfid), bname, i,
- priv->children[i]->name, i,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
- src_idx, priv->children[src_idx]->name, src_idx,
- uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2));
- return -1;
+ replies[i].poststat.ia_gfid)) {
+ ret = afr_gfid_split_brain_source (this, replies, inode,
+ pargfid, bname,
+ src_idx, i,
+ locked_on, src);
+ return ret;
}
if ((replies[src_idx].poststat.ia_type) !=
(replies[i].poststat.ia_type)) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN, "Type mismatch "
- "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
+ AFR_MSG_SPLIT_BRAIN, "Type mismatch detected "
+ "for <gfid:%s>/%s>, %s on %s and %s on %s. "
"Skipping conservative merge on the file.",
uuid_utoa (pargfid), bname,
- gf_inode_type_to_str (replies[i].poststat.ia_type),
+ gf_inode_type_to_str (replies[i].poststat.ia_type),
priv->children[i]->name,
- gf_inode_type_to_str (replies[src_idx].poststat.ia_type),
+ gf_inode_type_to_str (replies[src_idx].poststat.ia_type),
priv->children[src_idx]->name);
- gf_event (EVENT_AFR_SPLIT_BRAIN,
- "subvol=%s;type=file;file=<gfid:%s>/%s>;count=2;"
- "child-%d=%s;type-%d=%s;child-%d=%s;type-%d=%s",
- this->name, uuid_utoa (pargfid), bname, i,
- priv->children[i]->name, i,
- gf_inode_type_to_str(replies[i].poststat.ia_type),
- src_idx, priv->children[src_idx]->name, src_idx,
- gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
+ gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
+ "%d=%s;child-%d=%s;type-%d=%s",
+ this->name, uuid_utoa (pargfid), bname, i,
+ priv->children[i]->name, i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type),
+ src_idx, priv->children[src_idx]->name, src_idx,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
return -1;
}
}
@@ -283,11 +371,12 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = 0;
int i = 0;
int source = -1;
+ int src = -1;
afr_private_t *priv = NULL;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (replies[i].valid && replies[i].op_ret == 0) {
source = i;
break;
@@ -306,24 +395,41 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
}
- /* In case of a gfid or type mismatch on the entry, return -1.*/
- ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies,
+ /* In case of type mismatch / unable to resolve gfid mismatch on the
+ * entry, return -1.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies, inode,
fd->inode->gfid,
- name, source);
+ name, source,
+ locked_on, &src);
if (ret < 0)
return ret;
+ if (src != -1) {
+ source = src;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != src && replies[i].valid &&
+ gf_uuid_compare (replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid)) {
+ sources[i] = 0;
+ }
+ }
+ }
for (i = 0; i < priv->child_count; i++) {
if (i == source || !healed_sinks[i])
continue;
- if (replies[i].op_errno != ENOENT)
+ if (src != -1) {
+ if (!gf_uuid_compare (replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid))
+ continue;
+ } else if (replies[i].op_errno != ENOENT) {
continue;
+ }
- ret = afr_selfheal_recreate_entry (frame, i, source, sources,
- fd->inode, name, inode,
- replies);
+ ret |= afr_selfheal_recreate_entry (frame, i, source, sources,
+ fd->inode, name, inode,
+ replies);
}
return ret;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 735e520070e..2e22ac2d7a1 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -319,4 +319,15 @@ afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources,
int
afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf);
+
+int
+afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode);
+int
+afr_sh_fav_by_mtime (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode);
+int
+afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode);
+
#endif /* !_AFR_SELFHEAL_H */