summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/inode-leak.t8
-rw-r--r--tests/bugs/shard/bug-1605056-2.t34
-rw-r--r--tests/bugs/shard/bug-1605056.t63
-rw-r--r--tests/bugs/shard/shard-inode-refcount-test.t2
-rw-r--r--tests/volume.rc6
-rw-r--r--xlators/features/shard/src/shard.c48
6 files changed, 140 insertions, 21 deletions
diff --git a/tests/basic/inode-leak.t b/tests/basic/inode-leak.t
index 6bfbf572f03..e112fdddf8a 100644
--- a/tests/basic/inode-leak.t
+++ b/tests/basic/inode-leak.t
@@ -11,15 +11,15 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
TEST $CLI volume start $V0
TEST glusterfs -s $H0 --volfile-id $V0 $M0
-EXPECT "1" get_mount_active_size_value $V0
-EXPECT "0" get_mount_lru_size_value $V0
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
TEST cp -rf /etc $M0
TEST find $M0
TEST rm -rf $M0/*
-EXPECT "1" get_mount_active_size_value $V0
-EXPECT "0" get_mount_lru_size_value $V0
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
cleanup
diff --git a/tests/bugs/shard/bug-1605056-2.t b/tests/bugs/shard/bug-1605056-2.t
new file mode 100644
index 00000000000..a9c10fec3ea
--- /dev/null
+++ b/tests/bugs/shard/bug-1605056-2.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Perform a write that would cause 25 shards to be created under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=104
+
+# Write into another file bar to ensure all of foo's shards are evicted from lru list of $M0
+TEST dd if=/dev/zero of=$M0/bar bs=1M count=104
+
+# Delete foo from $M0. If there's a bug, the mount will crash.
+TEST unlink $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1605056.t b/tests/bugs/shard/bug-1605056.t
new file mode 100644
index 00000000000..c2329ea79f8
--- /dev/null
+++ b/tests/bugs/shard/bug-1605056.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+
+# Perform a write that would cause 25 shards to be created under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=104
+
+# Read the file from $M1, indirectly filling up the lru list.
+TEST `cat $M1/foo > /dev/null`
+statedump=$(generate_mount_statedump $V0 $M1)
+sleep 1
+EXPECT "25" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+# Delete foo from $M0.
+TEST unlink $M0/foo
+
+# Send stat on foo from $M1 to force $M1 to "forget" inode associated with foo.
+# Now the ghost shards associated with "foo" are still in lru list of $M1.
+TEST ! stat $M1/foo
+
+# Let's force the ghost shards of "foo" out of lru list by looking up more shards
+# through I/O on a file named "bar" from $M1. This should crash if the base inode
+# had been destroyed by now.
+
+TEST dd if=/dev/zero of=$M1/bar bs=1M count=104
+
+###############################################
+#### Now for some inode ref-leak tests ... ####
+###############################################
+
+# Expect there to be 29 active inodes - 26 belonging to "bar", 1 for .shard,
+# 1 for .shard/remove_me and 1 for '/'
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr 26 + 3` get_mount_active_size_value $V0 $M1
+
+TEST rm -f $M1/bar
+EXPECT_WITHIN $SHARD_COUNT_TIME 3 get_mount_active_size_value $V0 $M1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t
index 087c8ba7815..3fd181be690 100644
--- a/tests/bugs/shard/shard-inode-refcount-test.t
+++ b/tests/bugs/shard/shard-inode-refcount-test.t
@@ -21,7 +21,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
TEST rm -f $M0/one-plus-five-shards
# Expect 5 inodes less. But one inode more than before because .remove_me would be created.
-EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 $M0
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0
diff --git a/tests/volume.rc b/tests/volume.rc
index cc0b5c670e4..261c6554d46 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -857,7 +857,8 @@ function get_active_fd_count {
function get_mount_active_size_value {
local vol=$1
- local statedump=$(generate_mount_statedump $vol)
+ local mount=$2
+ local statedump=$(generate_mount_statedump $vol $mount)
sleep 1
local val=$(grep "active_size" $statedump | cut -f2 -d'=' | tail -1)
rm -f $statedump
@@ -866,7 +867,8 @@ function get_mount_active_size_value {
function get_mount_lru_size_value {
local vol=$1
- local statedump=$(generate_mount_statedump $vol)
+ local mount=$2
+ local statedump=$(generate_mount_statedump $vol $mount)
sleep 1
local val=$(grep "lru_size" $statedump | cut -f2 -d'=' | tail -1)
rm -f $statedump
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 182678c9a64..d5f55efa0bc 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -650,7 +650,8 @@ out:
inode_t *
__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
- inode_t *base_inode, int block_num)
+ inode_t *base_inode, int block_num,
+ uuid_t gfid)
{
char block_bname[256] = {
0,
@@ -680,10 +681,13 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
inode_ref(linked_inode);
if (base_inode)
gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+ else
+ gf_uuid_copy(ctx->base_gfid, gfid);
ctx->block_num = block_num;
list_add_tail(&ctx->ilist, &priv->ilist_head);
priv->inode_count++;
- ctx->base_inode = base_inode;
+ if (base_inode)
+ ctx->base_inode = inode_ref(base_inode);
} else {
/*If on the other hand there is no available slot for this inode
* in the list, delete the lru inode from the head of the list,
@@ -701,6 +705,8 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
* deleted from fsync list and fsync'd in a new frame,
* and then unlinked in memory and forgotten.
*/
+ if (!lru_base_inode)
+ goto after_fsync_check;
LOCK(&lru_base_inode->lock);
LOCK(&lru_inode->lock);
{
@@ -716,6 +722,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
UNLOCK(&lru_inode->lock);
UNLOCK(&lru_base_inode->lock);
+ after_fsync_check:
if (!do_fsync) {
shard_make_block_bname(lru_inode_ctx->block_num,
lru_inode_ctx->base_gfid, block_bname,
@@ -728,20 +735,31 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
inode_forget(lru_inode, 0);
} else {
fsync_inode = lru_inode;
- inode_unref(lru_base_inode);
+ if (lru_base_inode)
+ inode_unref(lru_base_inode);
}
/* The following unref corresponds to the ref
* held by inode_find() above.
*/
inode_unref(lru_inode);
+
+ /* The following unref corresponds to the ref held on the base shard
+ * at the time of adding shard inode to lru list
+ */
+ if (lru_base_inode)
+ inode_unref(lru_base_inode);
+
/* For as long as an inode is in lru list, we try to
* keep it alive by holding a ref on it.
*/
inode_ref(linked_inode);
if (base_inode)
gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+ else
+ gf_uuid_copy(ctx->base_gfid, gfid);
ctx->block_num = block_num;
- ctx->base_inode = base_inode;
+ if (base_inode)
+ ctx->base_inode = inode_ref(base_inode);
list_add_tail(&ctx->ilist, &priv->ilist_head);
}
} else {
@@ -1025,7 +1043,7 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
LOCK(&priv->lock);
{
fsync_inode = __shard_update_shards_inode_list(
- inode, this, res_inode, shard_idx_iter);
+ inode, this, res_inode, shard_idx_iter, gfid);
}
UNLOCK(&priv->lock);
shard_idx_iter++;
@@ -2167,7 +2185,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
LOCK(&priv->lock);
{
fsync_inode = __shard_update_shards_inode_list(
- linked_inode, this, local->loc.inode, block_num);
+ linked_inode, this, local->loc.inode, block_num, gfid);
}
UNLOCK(&priv->lock);
if (fsync_inode)
@@ -2875,17 +2893,18 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
shard_inode_ctx_t *ctx = NULL;
shard_inode_ctx_t *base_ictx = NULL;
gf_boolean_t unlink_unref_forget = _gf_false;
+ int unref_base_inode = 0;
this = THIS;
priv = this->private;
inode = local->inode_list[shard_block_num - local->first_block];
- base_inode = local->resolver_base_inode;
+ shard_inode_ctx_get(inode, this, &ctx);
+ base_inode = ctx->base_inode;
if (base_inode)
gf_uuid_copy(gfid, base_inode->gfid);
else
- gf_uuid_copy(gfid, local->base_gfid);
-
+ gf_uuid_copy(gfid, ctx->base_gfid);
shard_make_block_bname(shard_block_num, gfid, block_bname,
sizeof(block_bname));
@@ -2898,17 +2917,16 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
if (!list_empty(&ctx->ilist)) {
list_del_init(&ctx->ilist);
priv->inode_count--;
+ unref_base_inode++;
GF_ASSERT(priv->inode_count >= 0);
unlink_unref_forget = _gf_true;
}
if (ctx->fsync_needed) {
- if (base_inode)
- inode_unref(base_inode);
+ unref_base_inode++;
list_del_init(&ctx->to_fsync_list);
- if (base_inode) {
+ if (base_inode)
__shard_inode_ctx_get(base_inode, this, &base_ictx);
- base_ictx->fsync_count--;
- }
+ base_ictx->fsync_count--;
}
}
UNLOCK(&inode->lock);
@@ -2919,6 +2937,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
inode_unref(inode);
inode_forget(inode, 0);
}
+ if (base_inode && unref_base_inode)
+ inode_ref_reduce_by_n(base_inode, unref_base_inode);
UNLOCK(&priv->lock);
}