path: root/xlators/features/shard/src
diff options
authorKrutika Dhananjay <>2019-04-05 10:30:23 +0530
committerKrutika Dhananjay <>2019-05-16 11:58:54 +0000
commit7ead11711181104b23c004e0cd5e9e3c3c981a4f (patch)
tree1044de91ae96e6434078cf60e3db565399e57cbf /xlators/features/shard/src
parent40b7121afbd3969706acb8198cf660a710583e70 (diff)
features/shard: Fix crash during background shard deletion in a specific case
Consider the following case - 1. A file gets FALLOCATE'd such that > "shard-lru-limit" number of shards are created. 2. And then it is deleted after that. The unique thing about FALLOCATE is that unlike WRITE, all of the participant shards are resolved and created and fallocated in a single batch. This means, in this case, after the first "shard-lru-limit" number of shards are resolved and added to lru list, as part of resolution of the remaining shards, some of the existing shards in lru list will need to be evicted. So these evicted shards will be inode_unlink()d as part of eviction. Now once the fop gets to the actual FALLOCATE stage, the lru'd-out shards get added to fsync list. 2 things to note at this point: i. the lru'd out shards are only part of fsync list, so each holds 1 ref on base shard ii. and the more recently used shards are part of both fsync and lru list. So each of these shards holds 2 refs on base inode - one for being part of fsync list, and the other for being part of lru list. FALLOCATE completes successfully and then this very file is deleted, and background shard deletion launched. Here's where the ref counts get mismatched. First as part of inode_resolve()s during the deletion, the lru'd-out inodes return NULL, because they are inode_unlink()'d by now. So these inodes need to be freshly looked up. But as part of linking them in lookup_cbk (precisely in shard_link_block_inode()), inode_link() returns the lru'd-out inode object. And its inode ctx is still valid and ctx->base_inode valid from the last time it was added to list. But shard_common_lookup_shards_cbk() passes NULL in the place of base_pointer to __shard_update_shards_inode_list(). This means, as part of adding the lru'd out inode back to lru list, base inode is not ref'd since its NULL. Whereas post unlinking this shard, during shard_unlink_block_inode(), ctx->base_inode is accessible and is unref'd because the shard was found to be part of LRU list, although the matching ref didn't occur. This at some point leads to base_inode refcount becoming 0 and it getting destroyed and released back while some of its associated shards are continuing to be unlinked in parallel and the client crashes whenever it is accessed next. Fix is to pass base shard correctly, if available, in shard_link_block_inode(). Also, the patch fixes the ret value check in tests/bugs/shard/shard-fallocate.c Change-Id: Ibd0bc4c6952367608e10701473cbad3947d7559f Updates: bz#1696136 Signed-off-by: Krutika Dhananjay <>
Diffstat (limited to 'xlators/features/shard/src')
1 files changed, 9 insertions, 3 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 7a62f92f9ef..b8f5a31742e 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -2212,13 +2212,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
xlator_t *this = NULL;
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
+ inode_t *base_inode = NULL;
this = THIS;
priv = this->private;
- if (local->loc.inode)
+ if (local->loc.inode) {
gf_uuid_copy(gfid, local->loc.inode->gfid);
- else
+ base_inode = local->loc.inode;
+ } else if (local->resolver_base_inode) {
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ base_inode = local->resolver_base_inode;
+ } else {
gf_uuid_copy(gfid, local->base_gfid);
+ }
shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
@@ -2231,7 +2237,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
fsync_inode = __shard_update_shards_inode_list(
- linked_inode, this, local->loc.inode, block_num, gfid);
+ linked_inode, this, base_inode, block_num, gfid);
if (fsync_inode)