From 1f7692fecd1b5eaab60a01b64e9e4a67f0630804 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Thu, 19 Jan 2012 12:40:34 +0530 Subject: cluster/afr: Handle afr data self-heal failures gracefully Change-Id: I5f91098111a8ca29982f3b4292e2109e4a36cce1 BUG: 765373 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.com/2662 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-self-heal-common.h | 1 + xlators/cluster/afr/src/afr-self-heal-data.c | 188 ++++++++++++++++++------- 2 files changed, 138 insertions(+), 51 deletions(-) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 2979966c5..339eebe15 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -21,6 +21,7 @@ #define __AFR_SELF_HEAL_COMMON_H__ #define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512)) +#define AFR_SH_MIN_PARTICIPANTS 2 typedef enum { AFR_SELF_HEAL_ENTRY, diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 1c687a4e4..3df6a0e87 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -49,6 +49,14 @@ #include "afr-self-heal-common.h" #include "afr-self-heal-algorithm.h" +int +afr_sh_data_fail (call_frame_t *frame, xlator_t *this); + +static inline gf_boolean_t +afr_sh_data_proceed (unsigned int success_count) +{ + return (success_count >= AFR_SH_MIN_PARTICIPANTS); +} extern int sh_loop_finish (call_frame_t *loop_frame, xlator_t *this); @@ -259,9 +267,14 @@ afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie, sh = &local->self_heal; GF_ASSERT (sh->source == child_index); - if (op_ret != -1) + if (op_ret != -1) { sh->buf[child_index] = *buf; - afr_sh_data_setattr (frame, this); + afr_sh_data_setattr (frame, this); + } else { + gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set " + "time-stamps after self-heal", local->loc.path); + afr_sh_data_fail (frame, this); + } return 0; } @@ -360,12 +373,27 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie, int call_count = 0; afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int32_t child_index = (long) cookie; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change " + "log failed on %s for subvol %s, reason: %s", + local->loc.path, priv->children[child_index]->name, + strerror (op_errno)); + sh->op_failed = 1; + } call_count = afr_frame_return (frame); if (call_count == 0) { - local = frame->local; - sh = &local->self_heal; + if (sh->op_failed) { + afr_sh_data_fail (frame, this); + goto out; + } if (!IA_ISREG (sh->type)) { afr_sh_data_finish (frame, this); goto out; @@ -550,36 +578,44 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; int call_count = 0; int child_index = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; - priv = this->private; + priv = this->private; local = frame->local; + sh = &local->self_heal; child_index = (long) cookie; LOCK (&frame->lock); { - if (op_ret == -1) - gf_log (this->name, GF_LOG_INFO, + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "ftruncate of %s on subvolume %s failed (%s)", local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - else + sh->op_failed = 1; + } else { gf_log (this->name, GF_LOG_DEBUG, "ftruncate of %s on subvolume %s completed", local->loc.path, priv->children[child_index]->name); + } } UNLOCK (&frame->lock); call_count = afr_frame_return (frame); - if (call_count == 0) - afr_sh_data_sync_prepare (frame, this); + if (call_count == 0) { + if (sh->op_failed) + afr_sh_data_fail (frame, this); + else + afr_sh_data_sync_prepare (frame, this); + } return 0; } @@ -849,6 +885,12 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie, sh->buf[child_index] = *buf; sh->success_children[sh->success_count] = child_index; sh->success_count++; + } else { + gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed " + "on %s, reason %s", local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + sh->child_errno[child_index] = op_errno; } } UNLOCK (&frame->lock); @@ -859,13 +901,20 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie, /* Previous versions of glusterfs might have set * the pending data xattrs which need to be erased */ + if (!afr_sh_data_proceed (sh->success_count)) { + gf_log (this->name, GF_LOG_ERROR, "inspecting metadata " + "succeeded on < %d children, aborting " + "self-heal for %s", AFR_SH_MIN_PARTICIPANTS, + local->loc.path); + afr_sh_data_fail (frame, this); + goto out; + } if (IA_ISREG (buf->ia_type)) afr_sh_data_fix (frame, this); else afr_sh_data_special_file_fix (frame, this); - } - +out: return 0; } @@ -876,33 +925,43 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this) afr_self_heal_t *sh = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; - int call_count = 0; - int i = 0; + int call_count = 0; + int i = 0; + int child = 0; + int32_t *fstat_children = NULL; priv = this->private; local = frame->local; sh = &local->self_heal; - call_count = afr_up_children_count (local->child_up, - priv->child_count); - + fstat_children = memdup (sh->success_children, + sizeof (*fstat_children) * priv->child_count); + if (!fstat_children) { + afr_sh_data_fail (frame, this); + goto out; + } + call_count = sh->success_count; local->call_count = call_count; afr_reset_children (sh->success_children, priv->child_count); + memset (sh->child_errno, 0, + sizeof (*sh->child_errno) * priv->child_count); sh->success_count = 0; for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fstat, - sh->healing_fd, NULL); - - if (!--call_count) - break; - } + child = fstat_children[i]; + if (child == -1) + break; + STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk, + (void *) (long) child, + priv->children[child], + priv->children[child]->fops->fstat, + sh->healing_fd, NULL); + --call_count; } - + GF_ASSERT (!call_count); +out: + if (fstat_children) + GF_FREE (fstat_children); return 0; } @@ -931,6 +990,12 @@ afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie, sh->xattr[child_index] = dict_ref (xattr); sh->success_children[sh->success_count] = child_index; sh->success_count++; + } else { + gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s " + "failed on %s, reason %s", local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + sh->child_errno[child_index] = op_errno; } } UNLOCK (&frame->lock); @@ -952,19 +1017,27 @@ afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie, local = frame->local; sh = &local->self_heal; call_count = afr_frame_return (frame); - if (call_count == 0) { - (void) afr_build_sources (this, sh->xattr, NULL, - sh->pending_matrix, - sh->sources, sh->success_children, - AFR_DATA_TRANSACTION, NULL, _gf_false); - ret = afr_sh_inode_set_read_ctx (sh, this); - if (ret) - afr_sh_data_fail (frame, this); - else - afr_sh_set_timestamps (frame, this); - } + if (call_count) + goto out; - return 0; + if (!afr_sh_data_proceed (sh->success_count)) { + gf_log (this->name, GF_LOG_ERROR, "%s, inspecting change log " + "succeeded on < %d children", local->loc.path, + AFR_SH_MIN_PARTICIPANTS); + afr_sh_data_fail (frame, this); + goto out; + } + (void) afr_build_sources (this, sh->xattr, NULL, + sh->pending_matrix, + sh->sources, sh->success_children, + AFR_DATA_TRANSACTION, NULL, _gf_false); + ret = afr_sh_inode_set_read_ctx (sh, this); + if (ret) + afr_sh_data_fail (frame, this); + else + afr_sh_set_timestamps (frame, this); +out: + return 0; } int @@ -972,16 +1045,28 @@ afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata) { - int call_count = -1; + int call_count = -1; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + sh = &local->self_heal; afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret, op_errno, xattr); call_count = afr_frame_return (frame); if (call_count == 0) { + if (!afr_sh_data_proceed (sh->success_count)) { + gf_log (this->name, GF_LOG_ERROR, "%s, inspecting " + "change log succeeded on < %d children", + local->loc.path, AFR_SH_MIN_PARTICIPANTS); + afr_sh_data_fail (frame, this); + goto out; + } afr_sh_data_fstat (frame, this); } - +out: return 0; } @@ -1035,6 +1120,8 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this, afr_reset_xattr (sh->xattr, priv->child_count); afr_reset_children (sh->success_children, priv->child_count); + memset (sh->child_errno, 0, + sizeof (*sh->child_errno) * priv->child_count); sh->success_count = 0; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { @@ -1057,8 +1144,7 @@ out: if (ret) { if (zero_pending) GF_FREE (zero_pending); - sh->op_failed = 1; - afr_sh_data_done (frame, this); + afr_sh_data_fail (frame, this); } return 0; @@ -1240,12 +1326,12 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, priv->children[child_index]->name, strerror (op_errno)); sh->op_failed = 1; + } else { + gf_log (this->name, GF_LOG_TRACE, + "open of %s succeeded on child %s", + local->loc.path, + priv->children[child_index]->name); } - - gf_log (this->name, GF_LOG_TRACE, - "open of %s succeeded on child %s", - local->loc.path, - priv->children[child_index]->name); } UNLOCK (&frame->lock); -- cgit