From a3e593f9f17cb1e68db97bb5a0d8074793a33964 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Fri, 31 May 2013 10:07:57 -0400 Subject: cluster/dht: Return success in dht_discover if layout issues We cannot heal in dht_discover, as it is a gfid based lookup, and not path based. So, returning error here would lead to app's to see failure. Also, update the layout in inode_ctx even if it has anomalies. Let subsequent heals fix the issue. Change-Id: I2358aadacf9a24e20a22ab0a6055c38c5eb6485c BUG: 960348 Original-author: shishir gowda Signed-off-by: shishir gowda Signed-off-by: Jeff Darcy Reviewed-on: http://review.gluster.org/4959 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/cluster/dht/src/dht-common.c | 45 +++++++++++++++++++++++++----------- xlators/cluster/dht/src/dht-common.h | 32 +++++++++++++++---------- xlators/cluster/dht/src/dht-layout.c | 21 ++++++++++------- 3 files changed, 65 insertions(+), 33 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 8cbae676ee3..fbde47df69e 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -148,9 +148,12 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) int op_errno = 0; int ret = -1; dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + uint32_t missing = 0; local = discover_frame->local; layout = local->layout; + conf = this->private; LOCK(&discover_frame->lock); { @@ -183,19 +186,27 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) goto out; } } else { - ret = dht_layout_normalize (this, &local->loc, layout); - if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) { - /* either the layout is incorrect or the directory is - * not found even in one subvolume. - */ + ret = dht_layout_normalize (this, &local->loc, layout, + &missing); + if (ret < 0) { gf_log (this->name, GF_LOG_DEBUG, - "normalizing failed on %s " - "(overlaps/holes present: %s, " - "ENOENT errors: %d)", local->loc.path, - (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0); - op_errno = EINVAL; + "normalizing failed on %s (internal error)", + local->loc.path); + op_errno = EIO; + goto out; + } + if (missing == conf->subvolume_cnt) { + gf_log (this->name, GF_LOG_DEBUG, + "normalizing failed on %s, ENOENT errors: %u)", + local->loc.path, missing); + op_errno = ENOENT; goto out; } + if (ret != 0) { + gf_log (this->name, GF_LOG_DEBUG, + "normalizing failed on %s " + "(overlaps/holes present)", local->loc.path); + } dht_layout_set (this, local->inode, layout); } @@ -397,6 +408,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_layout_t *layout = NULL; int ret = -1; int is_dir = 0; + uint32_t missing = 0; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -477,9 +489,16 @@ unlock: } if (local->op_ret == 0) { - ret = dht_layout_normalize (this, &local->loc, layout); - - if (ret != 0) { + ret = dht_layout_normalize (this, &local->loc, layout, + &missing); + + /* + * Arguably, we shouldn't do self-heal just because + * bricks are missing as long as there are no other + * anomalies. For now, though, just preserve the + * existing behavior. + */ + if ((ret != 0) || (missing != 0)) { gf_log (this->name, GF_LOG_DEBUG, "fixing assignment on %s", local->loc.path); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index f079e688be9..9de861360c5 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -394,18 +394,26 @@ typedef enum { } while (0) #define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))) -dht_layout_t *dht_layout_new (xlator_t *this, int cnt); -dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); -dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); -xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, - const char *name); -int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); -int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, - uint32_t *holes_p, uint32_t *overlaps_p, - uint32_t *missing_p, uint32_t *down_p, - uint32_t *misc_p, uint32_t *no_space_p); -int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, - xlator_t *subvol, loc_t *loc, dict_t *xattr); + +dht_layout_t *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, + const char *name); +int dht_layout_normalize (xlator_t *this, loc_t *loc, + dht_layout_t *layout, + uint32_t *missing_p); +int dht_layout_anomalies (xlator_t *this, loc_t *loc, + dht_layout_t *layout, + uint32_t *holes_p, + uint32_t *overlaps_p, + uint32_t *missing_p, + uint32_t *down_p, + uint32_t *misc_p, + uint32_t *no_space_p); +int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, + xlator_t *subvol, loc_t *loc, + dict_t *xattr); xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *buf, dict_t *xattr); diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 0572298699c..07e8cbae495 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -593,7 +593,8 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, int -dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) +dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *missing_p) { int ret = 0; int i = 0; @@ -605,6 +606,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) ret = dht_layout_sort (layout); if (ret == -1) { + /* defensive coding; this can't happen currently */ gf_log (this->name, GF_LOG_WARNING, "sort failed?! how the ...."); goto out; @@ -614,23 +616,26 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) &holes, &overlaps, &missing, &down, &misc, NULL); if (ret == -1) { + /* defensive coding; this can't happen currently */ gf_log (this->name, GF_LOG_WARNING, "error while finding anomalies in %s -- not good news", loc->path); goto out; } - if (holes || overlaps) { + ret = holes + overlaps; + if (ret) { if (missing == layout->cnt) { gf_log (this->name, GF_LOG_DEBUG, "directory %s looked up first time", loc->path); } else { gf_log (this->name, GF_LOG_INFO, - "found anomalies in %s. holes=%d overlaps=%d", - loc->path, holes, overlaps); + "found anomalies in %s. holes=%d overlaps=%d" + " missing=%d down=%d misc=%d", + loc->path, holes, overlaps, missing, down, + misc); } - ret = -1; } for (i = 0; i < layout->cnt; i++) { @@ -645,14 +650,14 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) (layout->list[i].xlator ? layout->list[i].xlator->name : "<>")); - if ((layout->list[i].err == ENOENT) && (ret >= 0)) { - ret++; - } } } out: + if (missing_p) { + *missing_p = missing; + } return ret; } -- cgit