summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2016-05-17 15:37:18 +0530
committerNiels de Vos <ndevos@redhat.com>2017-03-10 17:47:05 -0500
commit8462ab0fed6ff2f875909ba8fda146b72d535c2a (patch)
tree6d0d30bf6d49969d2206695c41f10eae2e42023d /xlators
parent4ffa06cc4ddd3e30c76ecbbb5c62e50a86dd1a85 (diff)
features/shard: Fix EIO error on add-brick
Backport of: https://review.gluster.org/14419 DHT seems to link inode during lookup even before initializing inode ctx with layout information, which comes after directory healing. Consider two parallel writes. As part of the first write, shard sends lookup on .shard which in its return path would cause DHT to link .shard inode. Now at this point, when a second write is wound, inode_find() of .shard succeeds and as a result of this, shard goes to create the participant shards by issuing MKNODs under .shard. Since the layout is yet to be initialized, mknod fails in dht call path with EIO, leading to VM pauses. The fix involves shard maintaining a flag to denote whether a fresh lookup on .shard completed one network trip. If it didn't, all inode_find()s in fop path will be followed by a lookup before proceeding with the next stage of the fop. Big thanks to Raghavendra G and Pranith Kumar K for the RCA and subsequent inputs and feedback on the patch. Change-Id: I66a7adf177e338a7691f441f199dde7c2b90c292 BUG: 1387878 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: https://review.gluster.org/16750 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Niels de Vos <ndevos@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/features/shard/src/shard.c172
-rw-r--r--xlators/features/shard/src/shard.h1
2 files changed, 154 insertions, 19 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 59a0cfa53cf..39d648bb34d 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -178,6 +178,63 @@ shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
}
int
+__shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get (inode, this, &ctx);
+ if (ret)
+ return ret;
+
+ ctx->refreshed = _gf_true;
+ return 0;
+}
+
+int
+shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_inode_ctx_set_refreshed_flag (inode, this);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+gf_boolean_t
+__shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get (inode, this, &ctx);
+ /* If inode ctx get fails, better to err on the side of caution and
+ * try again? Unless the failure is due to mem-allocation.
+ */
+ if (ret)
+ return _gf_true;
+
+ return !ctx->refreshed;
+}
+
+gf_boolean_t
+shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)
+{
+ gf_boolean_t flag = _gf_false;
+
+ LOCK (&inode->lock);
+ {
+ flag = __shard_inode_ctx_needs_lookup (inode, this);
+ }
+ UNLOCK (&inode->lock);
+
+ return flag;
+}
+int
__shard_inode_ctx_invalidate (inode_t *inode, xlator_t *this, struct iatt *stbuf)
{
int ret = -1;
@@ -756,7 +813,7 @@ out:
}
-static void
+static inode_t *
shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode,
struct iatt *buf)
{
@@ -765,10 +822,72 @@ shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode,
priv = THIS->private;
- linked_inode = inode_link (inode, local->dot_shard_loc.parent,
- local->dot_shard_loc.name, buf);
+ linked_inode = inode_link (inode, inode->table->root, ".shard", buf);
inode_lookup (linked_inode);
priv->dot_shard_inode = linked_inode;
+ return linked_inode;
+}
+
+
+int
+shard_refresh_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ /* To-Do: Fix refcount increment per call to
+ * shard_link_dot_shard_inode().
+ */
+ shard_link_dot_shard_inode (local, inode, buf);
+ shard_inode_ctx_set_refreshed_flag (inode, this);
+out:
+ shard_common_resolve_shards (frame, this, local->post_res_handler);
+ return 0;
+}
+
+int
+shard_refresh_dot_shard (call_frame_t *frame, xlator_t *this)
+{
+ loc_t loc = {0,};
+ inode_t *inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ inode = inode_find (this->itable, priv->dot_shard_gfid);
+
+ if (!shard_inode_ctx_needs_lookup (inode, this)) {
+ local->op_ret = 0;
+ goto out;
+ }
+
+ /* Plain assignment because the ref is already taken above through
+ * call to inode_find()
+ */
+ loc.inode = inode;
+ gf_uuid_copy (loc.gfid, priv->dot_shard_gfid);
+
+ STACK_WIND (frame, shard_refresh_dot_shard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &loc, NULL);
+ loc_wipe (&loc);
+
+ return 0;
+
+out:
+ shard_common_resolve_shards (frame, this, local->post_res_handler);
+ return 0;
}
int
@@ -777,7 +896,8 @@ shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
- shard_local_t *local = NULL;
+ inode_t *link_inode = NULL;
+ shard_local_t *local = NULL;
local = frame->local;
@@ -797,8 +917,14 @@ shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- shard_link_dot_shard_inode (local, inode, buf);
- shard_common_resolve_shards (frame, this, local->post_res_handler);
+ link_inode = shard_link_dot_shard_inode (local, inode, buf);
+ if (link_inode != inode) {
+ shard_refresh_dot_shard (frame, this);
+ } else {
+ shard_inode_ctx_set_refreshed_flag (link_inode, this);
+ shard_common_resolve_shards (frame, this,
+ local->post_res_handler);
+ }
return 0;
unwind:
@@ -1843,8 +1969,8 @@ shard_truncate_begin (call_frame_t *frame, xlator_t *this)
shard_lookup_dot_shard (frame, this,
shard_post_resolve_truncate_handler);
} else {
- shard_common_resolve_shards (frame, this,
- shard_post_resolve_truncate_handler);
+ local->post_res_handler = shard_post_resolve_truncate_handler;
+ shard_refresh_dot_shard (frame, this);
}
return 0;
@@ -2273,8 +2399,8 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
shard_lookup_dot_shard (frame, this,
shard_post_resolve_unlink_handler);
} else {
- shard_common_resolve_shards (frame, this,
- shard_post_resolve_unlink_handler);
+ local->post_res_handler = shard_post_resolve_unlink_handler;
+ shard_refresh_dot_shard (frame, this);
}
return 0;
@@ -2624,8 +2750,8 @@ shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this)
shard_lookup_dot_shard (frame, this,
shard_post_resolve_unlink_handler);
} else {
- shard_common_resolve_shards (frame, this,
- shard_post_resolve_unlink_handler);
+ local->post_res_handler = shard_post_resolve_unlink_handler;
+ shard_refresh_dot_shard (frame, this);
}
return 0;
@@ -3388,8 +3514,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this)
shard_lookup_dot_shard (frame, this,
shard_post_resolve_readv_handler);
} else {
- shard_common_resolve_shards (frame, this,
- shard_post_resolve_readv_handler);
+ local->post_res_handler = shard_post_resolve_readv_handler;
+ shard_refresh_dot_shard (frame, this);
}
return 0;
@@ -3849,7 +3975,8 @@ shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- shard_local_t *local = NULL;
+ inode_t *link_inode = NULL;
+ shard_local_t *local = NULL;
local = frame->local;
@@ -3869,8 +3996,15 @@ shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
}
}
- shard_link_dot_shard_inode (local, inode, buf);
-
+ link_inode = shard_link_dot_shard_inode (local, inode, buf);
+ if (link_inode != inode) {
+ shard_refresh_dot_shard (frame, this);
+ } else {
+ shard_inode_ctx_set_refreshed_flag (link_inode, this);
+ shard_common_resolve_shards (frame, this,
+ local->post_res_handler);
+ }
+ return 0;
unwind:
shard_common_resolve_shards (frame, this, local->post_res_handler);
return 0;
@@ -4638,8 +4772,8 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,
shard_mkdir_dot_shard (frame, this,
shard_common_inode_write_post_resolve_handler);
} else {
- shard_common_resolve_shards (frame, this,
- shard_common_inode_write_post_resolve_handler);
+ local->post_res_handler = shard_common_inode_write_post_resolve_handler;
+ shard_refresh_dot_shard (frame, this);
}
return 0;
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index f626fae33c1..09232a45f24 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -268,6 +268,7 @@ typedef struct shard_inode_ctx {
struct list_head ilist;
uuid_t base_gfid;
int block_num;
+ gf_boolean_t refreshed;
} shard_inode_ctx_t;
#endif /* __SHARD_H__ */