From 570aefeb280e53e98cb5060cf384f1d74379a521 Mon Sep 17 00:00:00 2001 From: Poornima G Date: Mon, 21 Nov 2016 11:49:35 +0530 Subject: afr: Fix the EIO that can occur in afr_inode_refresh as a result of cache invalidation(upcall). Issue: ------ When a cache invalidation is recieved as a result of changing pending xattr, the read_subvol is reset. Consider the below chain of execution: CHILD_DOWN ... afr_readv ... afr_inode_refresh ... afr_inode_read_subvol_reset <- as a result of pending xattr set by some other client GF_EVENT_UPCALL will be sent afr_refresh_done -> this results in an EIO, as the read subvol was reset by the end of the afr_inode_refresh Solution: --------- When GF_EVENT_UPCALL is recieved, instead of resetting read_subvol, set a variable need_refresh in inode_ctx, the next time some one starts a txn, along with event gen, need_rrefresh also needs to be checked. Change-Id: Ifda21a7a8039b8874215e1afa4bdf20f7d991b58 BUG: 1396952 Signed-off-by: Poornima G Reviewed-on: http://review.gluster.org/15892 Reviewed-by: Ravishankar N Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Pranith Kumar Karampuri --- xlators/cluster/afr/src/afr-common.c | 90 +++++++++++++++++++++++++------ xlators/cluster/afr/src/afr-read-txn.c | 3 +- xlators/cluster/afr/src/afr-transaction.c | 4 +- xlators/cluster/afr/src/afr.h | 5 ++ 4 files changed, 84 insertions(+), 18 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 024f0f5f589..3521e63e6d0 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -616,6 +616,62 @@ out: } +/* The caller of this should perform afr_inode_refresh, if this function + * returns _gf_true + */ +gf_boolean_t +afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this, + int event_gen1, int event_gen2) +{ + gf_boolean_t need_refresh = _gf_false; + afr_inode_ctx_t *ctx = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO (this->name, inode, out); + + LOCK(&inode->lock); + { + ret = __afr_inode_ctx_get (this, inode, &ctx); + if (ret) + goto unlock; + + need_refresh = ctx->need_refresh; + /* Hoping that the caller will do inode_refresh followed by + * this, hence setting the need_refresh to false */ + ctx->need_refresh = _gf_false; + } +unlock: + UNLOCK(&inode->lock); + + if (event_gen1 != event_gen2) + need_refresh = _gf_true; +out: + return need_refresh; +} + + +static int +afr_inode_need_refresh_set (inode_t *inode, xlator_t *this) +{ + int ret = -1; + afr_inode_ctx_t *ctx = NULL; + + GF_VALIDATE_OR_GOTO (this->name, inode, out); + + LOCK(&inode->lock); + { + ret = __afr_inode_ctx_get (this, inode, &ctx); + if (ret) + goto unlock; + + ctx->need_refresh = _gf_true; + } +unlock: + UNLOCK(&inode->lock); +out: + return ret; +} + int afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this) { @@ -2786,7 +2842,8 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req afr_read_subvol_get (loc->inode, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); - if (event != local->event_generation) + if (afr_is_inode_refresh_reqd (loc->inode, this, event, + local->event_generation)) afr_inode_refresh (frame, this, loc->inode, NULL, afr_discover_do); else @@ -2937,7 +2994,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) afr_read_subvol_get (loc->parent, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); - if (event != local->event_generation) + if (afr_is_inode_refresh_reqd (loc->inode, this, event, + local->event_generation)) afr_inode_refresh (frame, this, loc->parent, NULL, afr_lookup_do); else @@ -4724,22 +4782,22 @@ afr_notify (xlator_t *this, int32_t event, * pronounced. Hence when a pending xattr is set notify * all the md-cache clients to invalidate the existing * stat cache and send the lookup next time */ - if (up_ci->dict) { - for (i = 0; i < priv->child_count; i++) { - if (dict_get (up_ci->dict, priv->pending_key[i])) { - ret = dict_set_int8 (up_ci->dict, - MDC_INVALIDATE_IATT , 0); - break; - } + if (!up_ci->dict) + break; + for (i = 0; i < priv->child_count; i++) { + if (dict_get (up_ci->dict, priv->pending_key[i])) { + ret = dict_set_int8 (up_ci->dict, + MDC_INVALIDATE_IATT, 0); + itable = ((xlator_t *)this->graph->top)->itable; + /*Internal processes may not have itable for top xlator*/ + if (itable) + inode = inode_find (itable, up_data->gfid); + if (inode) + afr_inode_need_refresh_set (inode, this); + + break; } } - itable = ((xlator_t *)this->graph->top)->itable; - /*Internal processes may not have itable for top xlator*/ - if (itable) - inode = inode_find (itable, up_data->gfid); - if (inode) - afr_inode_read_subvol_reset (inode, this); - break; default: propagate = 1; diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 26b0f1c2a11..ffd14a98c8e 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -227,7 +227,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, gf_msg_debug (this->name, 0, "%s: generation now vs cached: %d, " "%d", uuid_utoa (inode->gfid), local->event_generation, event_generation); - if (local->event_generation != event_generation) + if (afr_is_inode_refresh_reqd (inode, this, local->event_generation, + event_generation)) /* servers have disconnected / reconnected, and possibly rebooted, very likely changing the state of freshness of copies */ diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index e6878eb35ff..eb7571db5f1 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2573,7 +2573,9 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) ret = afr_inode_get_readable (frame, local->inode, this, local->readable, &event_generation, type); - if (ret < 0 || event_generation != priv->event_generation) { + if (ret < 0 || afr_is_inode_refresh_reqd (local->inode, this, + priv->event_generation, + event_generation)) { afr_inode_refresh (frame, this, local->inode, local->loc.gfid, afr_write_txn_refresh_done); } else { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 9bfb5812d8a..dcc162f97c3 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -812,6 +812,7 @@ typedef struct _afr_inode_ctx { uint64_t read_subvol; int spb_choice; gf_timer_t *timer; + gf_boolean_t need_refresh; } afr_inode_ctx_t; typedef struct afr_spbc_timeout { @@ -1235,4 +1236,8 @@ __afr_fd_ctx_get (fd_t *fd, xlator_t *this); void afr_compound_cleanup (compound_args_t *args, dict_t *xdata, dict_t *newloc_xdata); + +gf_boolean_t +afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this, + int event_gen1, int event_gen2); #endif /* __AFR_H__ */ -- cgit