From a770c7bba13734602b11a750e037cb11e42fe706 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 16 May 2016 15:05:36 +0530 Subject: cluster/afr: Refresh inode for inode-write fops in need Problem: If a named fresh-lookup is done on an loc and the fop fails on one of the bricks or not sent on one of the bricks, but by the time response comes to afr, if the brick is up, 'can_interpret' will be set to false in afr_lookup_done(), this will lead to inode-ctx for that inode to be not set, this can lead to EIO in case of a transaction as it depends on 'readable' array to be available by that point. Fix: Refresh inode for inode-write fops for the ctx to be set if it is not already done at the time of named fresh-lookup or if the file is in split-brain where we need to perform one more refresh before failing the fop to check if the file is still in split-brain or not. >BUG: 1336612 >Change-Id: I5c50b62c8de06129b8516039f7c252e5008c47a5 >Signed-off-by: Pranith Kumar K >Reviewed-on: http://review.gluster.org/14368 >Smoke: Gluster Build System >NetBSD-regression: NetBSD Build System >Reviewed-by: Ravishankar N >CentOS-regression: Gluster Build System BUG: 1337822 Change-Id: I0f904ebaa78b99cbb11546e08c9fc1562e9a3eef Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/14449 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System Reviewed-by: Krutika Dhananjay Reviewed-by: Anuradha Talur CentOS-regression: Gluster Build System Reviewed-by: Niels de Vos --- xlators/cluster/afr/src/afr-common.c | 31 ++++++---- xlators/cluster/afr/src/afr-read-txn.c | 4 +- xlators/cluster/afr/src/afr-transaction.c | 95 +++++++++++++++++++++++-------- xlators/cluster/afr/src/afr.h | 5 +- 4 files changed, 98 insertions(+), 37 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 08186b0bbcb..d7bc3e29ee2 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -380,10 +380,6 @@ afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this, int event_generation = 0; int ret = 0; - /* We don't care about split-brains for entry transactions. */ - if (type == AFR_ENTRY_TRANSACTION || type == AFR_ENTRY_RENAME_TRANSACTION) - return 0; - ret = afr_inode_read_subvol_get (inode, this, data, metadata, &event_generation); if (ret == -1) @@ -926,7 +922,8 @@ afr_inode_refresh_subvol_with_lookup_cbk (call_frame_t *frame, void *cookie, int afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this, - int i, inode_t *inode, dict_t *xdata) + int i, inode_t *inode, uuid_t gfid, + dict_t *xdata) { loc_t loc = {0, }; afr_private_t *priv = NULL; @@ -934,7 +931,13 @@ afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this, priv = this->private; loc.inode = inode; - gf_uuid_copy (loc.gfid, inode->gfid); + if (gf_uuid_is_null (inode->gfid) && gfid) { + /* To handle setattr/setxattr on yet to be linked inode from + * dht */ + gf_uuid_copy (loc.gfid, gfid); + } else { + gf_uuid_copy (loc.gfid, inode->gfid); + } STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_with_lookup_cbk, (void *) (long) i, priv->children[i], @@ -1048,7 +1051,8 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) xdata); else afr_inode_refresh_subvol_with_lookup (frame, this, i, - local->refreshinode, xdata); + local->refreshinode, + local->refreshgfid, xdata); if (!--call_count) break; @@ -1062,7 +1066,7 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) int afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, - afr_inode_refresh_cbk_t refreshfn) + uuid_t gfid, afr_inode_refresh_cbk_t refreshfn) { afr_local_t *local = NULL; @@ -1077,6 +1081,11 @@ afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, local->refreshinode = inode_ref (inode); + if (gfid) + gf_uuid_copy (local->refreshgfid, gfid); + else + gf_uuid_clear (local->refreshgfid); + afr_inode_refresh_do (frame, this); return 0; @@ -2408,7 +2417,8 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) - afr_inode_refresh (frame, this, loc->inode, afr_discover_do); + afr_inode_refresh (frame, this, loc->inode, NULL, + afr_discover_do); else afr_discover_do (frame, this, 0); @@ -2558,7 +2568,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) - afr_inode_refresh (frame, this, loc->parent, afr_lookup_do); + afr_inode_refresh (frame, this, loc->parent, NULL, + afr_lookup_do); else afr_lookup_do (frame, this, 0); diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index a70565c37a1..32ad6a46d17 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -121,7 +121,7 @@ afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol) if (!local->refreshed) { local->refreshed = _gf_true; - afr_inode_refresh (frame, this, local->inode, + afr_inode_refresh (frame, this, local->inode, NULL, afr_read_txn_refresh_done); } else { afr_read_txn_next_subvol (frame, this); @@ -268,7 +268,7 @@ read: return 0; refresh: - afr_inode_refresh (frame, this, inode, afr_read_txn_refresh_done); + afr_inode_refresh (frame, this, inode, NULL, afr_read_txn_refresh_done); return 0; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index ff2b58c032d..22b6997f2f7 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2086,32 +2086,13 @@ unlock: UNLOCK (&local->fd->lock); } - -int -afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) +void +afr_transaction_start (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - fd_t *fd = NULL; - int ret = -1; - - local = frame->local; - priv = this->private; - - local->transaction.resume = afr_transaction_resume; - local->transaction.type = type; - - ret = afr_transaction_local_init (local, this); - if (ret < 0) - goto out; + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + fd_t *fd = NULL; - ret = afr_inode_get_readable (frame, local->inode, this, 0, 0, type); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, - "Failing %s on gfid %s: split-brain observed.", - gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); - goto out; - } afr_transaction_eager_lock_init (local, this); if (local->fd && local->transaction.eager_lock_on) @@ -2135,6 +2116,72 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) } else { afr_lock (frame, this); } +} + +int +afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +{ + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + int ret = 0; + + if (err) { + local->op_errno = -err; + local->op_ret = -1; + goto fail; + } + ret = afr_inode_get_readable (frame, local->inode, this, + local->readable, NULL, + local->transaction.type); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN, + "Failing %s on gfid %s: split-brain observed.", + gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); + local->op_ret = -1; + local->op_errno = -ret; + goto fail; + } + afr_transaction_start (frame, this); + return 0; +fail: + local->transaction.unwind (frame, this); + AFR_STACK_DESTROY (frame); + return 0; +} + +int +afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int ret = -1; + int event_generation = 0; + + local = frame->local; + priv = this->private; + + local->transaction.resume = afr_transaction_resume; + local->transaction.type = type; + + ret = afr_transaction_local_init (local, this); + if (ret < 0) + goto out; + + if (type == AFR_ENTRY_TRANSACTION || + type == AFR_ENTRY_RENAME_TRANSACTION) { + afr_transaction_start (frame, this); + ret = 0; + goto out; + } + + ret = afr_inode_get_readable (frame, local->inode, this, + local->readable, &event_generation, type); + if (ret < 0 || event_generation != priv->event_generation) { + afr_inode_refresh (frame, this, local->inode, local->loc.gfid, + afr_write_txn_refresh_done); + } else { + afr_transaction_start (frame, this); + } ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f16f9b4b4ac..7d270ea94e7 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -422,6 +422,9 @@ typedef struct _afr_local { */ inode_t *refreshinode; + /*To handle setattr/setxattr on yet to be linked inode from dht*/ + uuid_t refreshgfid; + /* @pre_op_compat: @@ -855,7 +858,7 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, int afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, - afr_inode_refresh_cbk_t cbk); + uuid_t gfid, afr_inode_refresh_cbk_t cbk); int32_t afr_notify (xlator_t *this, int32_t event, void *data, void *data2); -- cgit