summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2018-08-27 12:40:16 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2018-09-04 01:52:02 +0000
commitf29689783d970cba804505d7c778c905b2ba1992 (patch)
tree14be40bab39b877e69a7a2583a7c32efbc9e44d8 /xlators
parentaf0d5a9b5375a5cd87ac10b429e2b9934718ce5b (diff)
cluster/afr: Delegate name-heal when possible
Problem: When name-self-heal is triggered on the mount, it blocks lookup until name-self-heal completes. But that can lead to hangs when lot of clients are accessing a directory which needs name heal and all of them trigger heals waiting for other clients to complete heal. Fix: When a name-heal is needed but quorum number of names have the file and pending xattrs exist on the parent, then better to delegate the heal to SHD which will be completed as part of entry-heal of the parent directory. We could also do the same for quorum-number of names not present but we don't have any known use-case where this is a frequent occurrence so not changing that part at the moment. When there is a gfid mismatch or missing gfid it is important to complete the heal so that next rename doesn't assume everything is fine and perform a rename etc fixes bz#1622821 Change-Id: I8b002c85dffc6eb6f2833e742684a233daefeb2c Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c100
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c12
2 files changed, 85 insertions, 27 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 3cc70d205c7..0b1d4628c4e 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2415,8 +2415,6 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
*/
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid || replies[i].op_ret == -1) {
- if (priv->child_up[i])
- can_interpret = _gf_false;
continue;
}
@@ -2863,21 +2861,52 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
call_frame_t *heal = NULL;
int i = 0, first = -1;
- gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t name_state_mismatch = _gf_false;
struct afr_reply *replies = NULL;
int ret = 0;
+ unsigned char *par_readables = NULL;
+ unsigned char *success = NULL;
+ int32_t op_errno = 0;
+ uuid_t gfid = {0};
local = frame->local;
replies = local->replies;
priv = this->private;
+ par_readables = alloca0(priv->child_count);
+ success = alloca0(priv->child_count);
+
+ ret = afr_inode_read_subvol_get (local->loc.parent, this, par_readables,
+ NULL, NULL);
+ if (ret < 0 || AFR_COUNT (par_readables, priv->child_count) == 0) {
+ /* In this case set par_readables to all 1 so that name_heal
+ * need checks at the end of this function will flag missing
+ * entry when name state mismatches*/
+ memset (par_readables, 1, priv->child_count);
+ }
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
+ if (replies[i].op_ret == 0) {
+ if (uuid_is_null (gfid)) {
+ gf_uuid_copy (gfid,
+ replies[i].poststat.ia_gfid);
+ }
+ success[i] = 1;
+ } else {
+ if ((replies[i].op_errno != ENOTCONN) &&
+ (replies[i].op_errno != ENOENT) &&
+ (replies[i].op_errno != ESTALE)) {
+ op_errno = replies[i].op_errno;
+ }
+ }
+
+ /*gfid is missing, needs heal*/
if ((replies[i].op_ret == -1) &&
- (replies[i].op_errno == ENODATA))
- need_heal = _gf_true;
+ (replies[i].op_errno == ENODATA)) {
+ goto name_heal;
+ }
if (first == -1) {
first = i;
@@ -2885,30 +2914,53 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
}
if (replies[i].op_ret != replies[first].op_ret) {
- need_heal = _gf_true;
- break;
+ name_state_mismatch = _gf_true;
}
- if (gf_uuid_compare (replies[i].poststat.ia_gfid,
- replies[first].poststat.ia_gfid)) {
- need_heal = _gf_true;
- break;
- }
+ if (replies[i].op_ret == 0) {
+ /* Rename after this lookup may succeed if we don't do
+ * a name-heal and the destination may not have pending xattrs
+ * to indicate which name is good and which is bad so always do
+ * this heal*/
+ if (gf_uuid_compare (replies[i].poststat.ia_gfid,
+ gfid)) {
+ goto name_heal;
+ }
+ }
}
- if (need_heal) {
- heal = afr_frame_create (this, NULL);
- if (!heal)
- goto metadata_heal;
-
- ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap,
- afr_refresh_selfheal_done, heal, frame);
- if (ret) {
- AFR_STACK_DESTROY (heal);
- goto metadata_heal;
+ if (name_state_mismatch) {
+ if (!priv->quorum_count)
+ goto name_heal;
+ if (!afr_has_quorum (success, this))
+ goto name_heal;
+ if (op_errno)
+ goto name_heal;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (par_readables[i] && replies[i].op_ret < 0 &&
+ replies[i].op_errno != ENOTCONN) {
+ goto name_heal;
+ }
}
- return ret;
- }
+ }
+
+ goto metadata_heal;
+
+name_heal:
+ heal = afr_frame_create (this, NULL);
+ if (!heal)
+ goto metadata_heal;
+
+ ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret) {
+ AFR_STACK_DESTROY (heal);
+ goto metadata_heal;
+ }
+ return ret;
+
metadata_heal:
ret = afr_lookup_metadata_heal_check (frame, this);
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index bcd0e60c947..0a5be29d5ee 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -634,20 +634,26 @@ afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this,
continue;
if ((replies[i].op_ret == -1) &&
- (replies[i].op_errno == ENODATA))
+ (replies[i].op_errno == ENODATA)) {
*need_heal = _gf_true;
+ break;
+ }
if (first_idx == -1) {
first_idx = i;
continue;
}
- if (replies[i].op_ret != replies[first_idx].op_ret)
+ if (replies[i].op_ret != replies[first_idx].op_ret) {
*need_heal = _gf_true;
+ break;
+ }
if (gf_uuid_compare (replies[i].poststat.ia_gfid,
- replies[first_idx].poststat.ia_gfid))
+ replies[first_idx].poststat.ia_gfid)) {
*need_heal = _gf_true;
+ break;
+ }
}
if (inode)