summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2019-01-24 14:14:39 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2019-02-04 15:07:46 +0000
commit525abba58fb31df9e2d515b1ecf84f1345c78c25 (patch)
tree6ee42131071da41bd31f84a383d3d10e5331c81c
parent3f05fb00a090686bbdb0b467cec1f3647903b851 (diff)
features/shard: Ref shard inode while adding to fsync list
PROBLEM: Lot of the earlier changes in the management of shards in lru, fsync lists assumed that if a given shard exists in fsync list, it must be part of lru list as well. This was found to be not true. Consider this - a file is FALLOCATE'd to a size which would make the number of participant shards to be greater than the lru list size. In this case, some of the resolved shards that are to participate in this fop will be evicted from lru list to give way to the rest of the shards. And once FALLOCATE completes, these shards are added to fsync list but without a ref. After the fop completes, these shard inodes are unref'd and destroyed while their inode ctxs are still part of fsync list. Now when an FSYNC is called on the base file and the fsync-list traversed, the client crashes due to illegal memory access. FIX: Hold a ref on the shard inode when adding to fsync list as well. And unref under following conditions: 1. when the shard is evicted from lru list 2. when the base file is fsync'd 3. when the shards are deleted. Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2 fixes: bz#1669382 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> (cherry picked from commit 72922c1fd69191b220f79905a23395c3a87f86ce)
-rw-r--r--tests/bugs/shard/bug-1669077.t29
-rw-r--r--xlators/features/shard/src/shard.c30
2 files changed, 51 insertions, 8 deletions
diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
new file mode 100644
index 00000000000..8d3a67a36be
--- /dev/null
+++ b/tests/bugs/shard/bug-1669077.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# If the bug still exists, client should crash during fallocate below
+TEST fallocate -l 200M $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 5715555109d..bcb0662d474 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -273,6 +273,7 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
* of the to_fsync_list.
*/
inode_ref(base_inode);
+ inode_ref(shard_inode);
LOCK(&base_inode->lock);
LOCK(&shard_inode->lock);
@@ -286,8 +287,10 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
/* Unref the base inode corresponding to the ref above, if the shard is
* found to be already part of the fsync list.
*/
- if (ret != 0)
+ if (ret != 0) {
inode_unref(base_inode);
+ inode_unref(shard_inode);
+ }
return ret;
}
@@ -734,6 +737,10 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
inode_forget(lru_inode, 0);
} else {
+ /* The following unref corresponds to the ref
+ * held when the shard was added to fsync list.
+ */
+ inode_unref(lru_inode);
fsync_inode = lru_inode;
if (lru_base_inode)
inode_unref(lru_base_inode);
@@ -2932,8 +2939,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
shard_priv_t *priv = NULL;
shard_inode_ctx_t *ctx = NULL;
shard_inode_ctx_t *base_ictx = NULL;
- gf_boolean_t unlink_unref_forget = _gf_false;
int unref_base_inode = 0;
+ int unref_shard_inode = 0;
this = THIS;
priv = this->private;
@@ -2958,11 +2965,12 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
list_del_init(&ctx->ilist);
priv->inode_count--;
unref_base_inode++;
+ unref_shard_inode++;
GF_ASSERT(priv->inode_count >= 0);
- unlink_unref_forget = _gf_true;
}
if (ctx->fsync_needed) {
unref_base_inode++;
+ unref_shard_inode++;
list_del_init(&ctx->to_fsync_list);
if (base_inode) {
__shard_inode_ctx_get(base_inode, this, &base_ictx);
@@ -2973,11 +2981,11 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
UNLOCK(&inode->lock);
if (base_inode)
UNLOCK(&base_inode->lock);
- if (unlink_unref_forget) {
- inode_unlink(inode, priv->dot_shard_inode, block_bname);
- inode_unref(inode);
- inode_forget(inode, 0);
- }
+
+ inode_unlink(inode, priv->dot_shard_inode, block_bname);
+ inode_ref_reduce_by_n(inode, unref_shard_inode);
+ inode_forget(inode, 0);
+
if (base_inode && unref_base_inode)
inode_ref_reduce_by_n(base_inode, unref_base_inode);
UNLOCK(&priv->lock);
@@ -5785,6 +5793,7 @@ shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
shard_inode_ctx_t *ctx = NULL;
shard_inode_ctx_t *base_ictx = NULL;
inode_t *base_inode = NULL;
+ gf_boolean_t unref_shard_inode = _gf_false;
local = frame->local;
base_inode = local->fd->inode;
@@ -5818,11 +5827,16 @@ out:
if (ctx->fsync_needed != 0) {
list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
base_ictx->fsync_count++;
+ } else {
+ unref_shard_inode = _gf_true;
}
}
UNLOCK(&anon_fd->inode->lock);
UNLOCK(&base_inode->lock);
}
+
+ if (unref_shard_inode)
+ inode_unref(anon_fd->inode);
if (anon_fd)
fd_unref(anon_fd);