diff options
| author | Richard Wareing <rwareing@fb.com> | 2015-09-28 20:38:50 -0700 |
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-07-11 00:51:25 +0000 |
| commit | 87cf185bb44eaf853de0ada71d66369685c52ae3 (patch) | |
| tree | 9a02b8c4297cfd1bfd47ec9ff066e95f5cf701f7 /xlators/cluster/afr/src | |
| parent | 24d3099681c4fbeb4c9d868ee58810548335833a (diff) | |
cluster/afr: GFID unsplit improvements
Summary:
- Few improvements: handle type mis-matches (e.g. dir/file mis-matches), added in an option to control whether gfid unsplits will happen, ensured entry healing will happen in the gfid mis-match case when the option is enabled.
- Added prove test to cover entry healing & type mis-match cases
- Enable metadata split-brain resolution by default
- Enable gfid split-brain resolution by default
- Fix gfid unsplit logging bugs where it was showing null GFIDs instead of the actual chosen GFIDs
Test Plan:
- run prove -v test/basic/gfid_unsplit*
- Ran valgrind to verify leak-free state
Reviewers: moox, sshreyas
Reviewed By: sshreyas
Change-Id: Id67ddc728745ebbbaf7bdd3f9a5549e5a4cc4a20
Signed-off-by: Jeff Darcy <jdarcy@fb.com>
Change-Id: I4181233f9ba7f61ccd2ba91f0874eb2ac7cd40b5
Manually-merged-by: Jeff Darcy <jdarcy@fb.com>
Signed-off-by: Jeff Darcy <jdarcy@fb.com>
Reviewed-on: https://review.gluster.org/17739
Tested-by: Jeff Darcy <jeff@pl.atyp.us>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 10 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 17 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 158 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 13 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 |
5 files changed, 94 insertions, 105 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 04f16cc0122..59228b1925a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -697,7 +697,7 @@ afr_sh_fav_by_majority (xlator_t *this, struct afr_reply *replies, priv->children[i]->name, replies[i].poststat.ia_mtime, replies[i].poststat.ia_size, - uuid_utoa (inode->gfid)); + uuid_utoa (replies[i].poststat.ia_gfid)); vote_count = 0; for (k = 1; k < priv->child_count; k++) { if (replies_are_same (replies, i, k)) { @@ -734,7 +734,7 @@ afr_sh_fav_by_mtime (xlator_t *this, struct afr_reply *replies, inode_t *inode) priv->children[i]->name, replies[i].poststat.ia_mtime, replies[i].poststat.ia_mtime_nsec, - uuid_utoa (inode->gfid)); + uuid_utoa (replies[i].poststat.ia_gfid)); if (replies[i].poststat.ia_mtime > cmp_mtime) { cmp_mtime = replies[i].poststat.ia_mtime; cmp_mtime_nsec = @@ -774,7 +774,7 @@ afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies, inode_t *inode) priv->children[i]->name, replies[i].poststat.ia_ctime, replies[i].poststat.ia_ctime_nsec, - uuid_utoa (inode->gfid)); + uuid_utoa (replies[i].poststat.ia_gfid)); if (replies[i].poststat.ia_ctime > cmp_ctime) { cmp_ctime = replies[i].poststat.ia_ctime; cmp_ctime_nsec = @@ -812,7 +812,7 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode) "file size = %lu for gfid %s", priv->children[i]->name, replies[i].poststat.ia_size, - uuid_utoa (inode->gfid)); + uuid_utoa (replies[i].poststat.ia_gfid)); if (replies[i].poststat.ia_size > cmp_sz) { cmp_sz = replies[i].poststat.ia_size; fav_child = i; @@ -911,7 +911,7 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, "data in file (gfid:%s) by %s (%lu bytes @ %s mtime, " "%s ctime).", priv->children[fav_child]->name, - uuid_utoa (inode->gfid), + uuid_utoa (replies[fav_child].poststat.ia_gfid), policy_str, replies[fav_child].poststat.ia_size, mtime_str, diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 09f26690588..776cc9c5c21 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -299,13 +299,16 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, } } - /* In case of a gfid or type mismatch on the entry, return -1.*/ - ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies, - fd->inode->gfid, - name, source); - - if (ret < 0) - return ret; + /* Returning EIO here isn't needed if GFID forced heal is + * enabled. + */ + if (!priv->gfid_splitbrain_forced_heal) { + /* In case of a gfid or type mismatch on the entry, return -1.*/ + ret = afr_selfheal_detect_gfid_and_type_mismatch (this, + replies, fd->inode->gfid, name, source); + if (ret < 0) + return ret; + } for (i = 0; i < priv->child_count; i++) { if (i == source || !healed_sinks[i]) diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index bf34215ba1f..9ca56f8bd9d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -129,13 +129,10 @@ __afr_selfheal_do_gfid_unsplit (xlator_t *this, unsigned char *locked_on, afr_private_t *priv = NULL; call_frame_t *frame = NULL; loc_t *unsplit_loc; - int fav_child = -1; - unsigned char *fav_gfid; unsigned int i = 0; unsigned int split_count = 0; unsigned char *rename_list; int ret = 0; - char *policy_str; frame = afr_frame_create (this); @@ -148,29 +145,11 @@ __afr_selfheal_do_gfid_unsplit (xlator_t *this, unsigned char *locked_on, goto out; } - /* - * Ok, go find our favorite child by one of the active policies: - * majority -> ctime -> mtime -> size -> predefined - * we'll use this gfid as the "real" one. - */ - fav_child = afr_sh_get_fav_by_policy (this, replies, inode, - &policy_str); - if (fav_child == -1) { /* No policies are in place, bail */ - gf_log (this->name, GF_LOG_WARNING, "Unable to resolve GFID " - "split brain, there are no favorite child policies " - "set."); - ret = -EIO; - goto out; - } - fav_gfid = replies[fav_child].poststat.ia_gfid; - gf_log (this->name, GF_LOG_INFO, "Using child %d to resolve gfid " - "split-brain. GFID is %s.", fav_child, uuid_utoa (fav_gfid)); - /* Pre-compute the number of rename calls we will be doing */ for (i = 0; i < priv->child_count; i++) { if (local->child_up[i] && !gf_uuid_is_null (replies[i].poststat.ia_gfid) && - gf_uuid_compare (replies[i].poststat.ia_gfid, fav_gfid)) { + gf_uuid_compare (replies[i].poststat.ia_gfid, loc->gfid)) { split_count++; } } @@ -192,7 +171,7 @@ __afr_selfheal_do_gfid_unsplit (xlator_t *this, unsigned char *locked_on, if (locked_on[i] && local->child_up[i] && replies[i].op_errno != ENOENT && !gf_uuid_is_null (replies[i].poststat.ia_gfid) && - gf_uuid_compare (replies[i].poststat.ia_gfid, fav_gfid)) { + gf_uuid_compare (replies[i].poststat.ia_gfid, loc->gfid)) { ret = _afr_sh_create_unsplit_loc (replies, i, loc, unsplit_loc); gf_log (this->name, GF_LOG_INFO, "Renaming child %d to " @@ -234,6 +213,9 @@ __afr_selfheal_gfid_unsplit (xlator_t *this, inode_t *parent, uuid_t pargfid, loc_t loc = {0, }; call_frame_t *new_frame = NULL; afr_local_t *new_local = NULL; + int fav_child = -1; + unsigned char *fav_gfid; + char *policy_str; priv = this->private; @@ -247,29 +229,48 @@ __afr_selfheal_gfid_unsplit (xlator_t *this, inode_t *parent, uuid_t pargfid, gf_uuid_copy (parent->gfid, pargfid); - xdata = dict_new (); - if (!xdata) { - ret = -ENOMEM; - goto out; - } - - ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16); - if (ret) { - ret = -ENOMEM; - goto out; - } - loc.parent = inode_ref (parent); loc.inode = inode_ref (inode); gf_uuid_copy (loc.pargfid, pargfid); loc.name = bname; + /* + * Ok, go find our favorite child by one of the active policies: + * majority -> ctime -> mtime -> size -> predefined + * we'll use this gfid as the "real" one. + */ + fav_child = afr_sh_get_fav_by_policy (this, replies, inode, + &policy_str); + if (fav_child == -1) { /* No policies are in place, bail */ + gf_log (this->name, GF_LOG_WARNING, "Unable to resolve GFID " + "split brain, there are no favorite child policies " + "set."); + ret = -EIO; + goto out; + } + fav_gfid = replies[fav_child].poststat.ia_gfid; + gf_log (this->name, GF_LOG_INFO, "Using child %d to resolve gfid " + "split-brain. GFID is %s.", fav_child, uuid_utoa (fav_gfid)); + + gf_uuid_copy (loc.gfid, fav_gfid); ret = __afr_selfheal_do_gfid_unsplit (this, locked_on, replies, inode, &loc); if (ret) goto out; + xdata = dict_new (); + if (!xdata) { + ret = -ENOMEM; + goto out; + } + + ret = dict_set_static_bin (xdata, "gfid-req", fav_gfid, 16); + if (ret) { + ret = -ENOMEM; + goto out; + } + /* Clear out old replies here and wind lookup on all locked * subvolumes to achieve two things: * a. gfid heal on those subvolumes that do not have gfid associated @@ -309,6 +310,7 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, loc_t loc = {0, }; call_frame_t *new_frame = NULL; afr_local_t *new_local = NULL; + int i; priv = this->private; @@ -364,6 +366,25 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, * __afr_selfheal_name_impunge(). */ + gf_log (this->name, GF_LOG_INFO, + "smashing gfid to %s", uuid_utoa(gfid)); + + ia_type_t ia_type = replies[0].poststat.ia_type; + for (i = 1; i < priv->child_count; ++i) { + if (replies[i].poststat.ia_type != ia_type) { + if (replies[i].poststat.ia_type == IA_INVAL) { + continue; + } + gf_log (this->name, GF_LOG_WARNING, + "type[%d] = %d (not %d)", i, + replies[i].poststat.ia_type, ia_type); + if (ia_type != IA_INVAL) { + ret = -EIO; + goto out; + } + ia_type = replies[i].poststat.ia_type; + } + } AFR_ONLIST (locked_on, new_frame, afr_selfheal_discover_cbk, lookup, &loc, xdata); @@ -547,52 +568,6 @@ afr_selfheal_name_need_heal_check (xlator_t *this, struct afr_reply *replies) return need_heal; } -static int -afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies, - int source, unsigned char *sources, - uuid_t pargfid, const char *bname) -{ - int i = 0; - int type_idx = -1; - ia_type_t inode_type = IA_INVAL; - afr_private_t *priv = NULL; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; - - if (replies[i].poststat.ia_type == IA_INVAL) - continue; - - if (inode_type == IA_INVAL) { - inode_type = replies[i].poststat.ia_type; - type_idx = i; - continue; - } - - if (sources[i] || source == -1) { - if ((sources[type_idx] || source == -1) && - (inode_type != replies[i].poststat.ia_type)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SPLIT_BRAIN, - "Type mismatch for <gfid:%s>/%s: " - "%d on %s and %d on %s", - uuid_utoa(pargfid), bname, - replies[i].poststat.ia_type, - priv->children[i]->name, - replies[type_idx].poststat.ia_type, - priv->children[type_idx]->name); - - return -EIO; - } - inode_type = replies[i].poststat.ia_type; - type_idx = i; - } - } - return 0; -} static int afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies, @@ -690,7 +665,9 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, gf_boolean_t need_heal = _gf_false; gf_boolean_t is_gfid_absent = _gf_false; gf_boolean_t tried_gfid_unsplit = _gf_false; + afr_private_t *priv = NULL; + priv = this->private; need_heal = afr_selfheal_name_need_heal_check (this, replies); if (!need_heal) return 0; @@ -706,18 +683,14 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, return ret; } - ret = afr_selfheal_name_type_mismatch_check (this, replies, source, - sources, pargfid, bname); - if (ret) - return ret; - gfid_mismatch_check: ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source, sources, &gfid_idx, pargfid, bname); - if (ret && tried_gfid_unsplit == _gf_true) + if (ret && tried_gfid_unsplit) { return ret; + } if (gfid_idx == -1) { if (!gfid_req || gf_uuid_is_null (gfid_req)) @@ -727,7 +700,7 @@ gfid_mismatch_check: gfid = &replies[gfid_idx].poststat.ia_gfid; } - if (ret && tried_gfid_unsplit == _gf_false) { + if (priv->gfid_splitbrain_forced_heal || ret) { ret = __afr_selfheal_gfid_unsplit (this, parent, pargfid, bname, inode, replies, gfid, locked_on); @@ -739,11 +712,12 @@ gfid_mismatch_check: } is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false; - ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode, - replies, gfid, locked_on, - is_gfid_absent); - if (ret) + ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, + inode, replies, gfid, + locked_on, is_gfid_absent); + if (ret) { return ret; + } if (gfid_idx == -1) { gfid_idx = afr_selfheal_gfid_idx_get (this, replies, sources); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 6d4b3c73429..31949bdcea4 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -143,6 +143,10 @@ reconfigure (xlator_t *this, dict_t *options) priv->metadata_splitbrain_forced_heal, options, bool, out); + GF_OPTION_RECONF ("gfid-splitbrain-forced-heal", + priv->gfid_splitbrain_forced_heal, options, bool, + out); + GF_OPTION_RECONF ("background-self-heal-count", priv->background_self_heal_count, options, uint32, out); @@ -364,6 +368,9 @@ init (xlator_t *this) GF_OPTION_INIT ("metadata-splitbrain-forced-heal", priv->metadata_splitbrain_forced_heal, bool, out); + GF_OPTION_INIT ("gfid-splitbrain-forced-heal", + priv->gfid_splitbrain_forced_heal, bool, out); + GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out); if (read_subvol) { priv->read_child = xlator_subvolume_index (this, read_subvol); @@ -956,7 +963,7 @@ struct volume_options options[] = { }, { .key = {"iam-nfs-daemon"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", + .default_value = "on", .description = "This option differentiates if the replicate " "translator is running as part of an NFS daemon " "or not." @@ -1023,6 +1030,10 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "off", }, + { .key = {"gfid-splitbrain-forced-heal"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {"heal-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 5, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 53abeaace11..aa2af38e8cf 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -126,6 +126,7 @@ typedef struct _afr_private { gf_boolean_t entry_change_log; /* on/off */ gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */ + gf_boolean_t gfid_splitbrain_forced_heal; /* on/off */ int read_child; /* read-subvolume */ unsigned int hash_mode; /* for when read_child is not set */ int favorite_child; /* subvolume to be preferred in resolving |
