summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2018-12-28 18:53:15 +0530
committerKrutika Dhananjay <kdhananj@redhat.com>2019-01-15 04:57:11 +0000
commit11186f4e0b1d5952318060d6e8d3610d4d94add8 (patch)
tree8838b319874976c7102da0aa326bd221a383dcbd
parent4bbb0ecfac59f2ae8816640c516bb20b0b06aa44 (diff)
features/shard: Fix launch of multiple synctasks for background deletion
PROBLEM: When multiple sharded files are deleted in quick succession, multiple issues were observed: 1. misleading logs corresponding to a sharded file where while one log message said the shards corresponding to the file were deleted successfully, this was followed by multiple logs suggesting the very same operation failed. This was because of multiple synctasks attempting to clean up shards of the same file and only one of them succeeding (the one that gets ENTRYLK successfully), and the rest of them logging failure. 2. multiple synctasks to do background deletion would be launched, one for each deleted file but all of them could readdir entries from .remove_me at the same time could potentially contend for ENTRYLK on .shard for each of the entry names. This is undesirable and wasteful. FIX: Background deletion will now follow a state machine. In the event that there are multiple attempts to launch synctask for background deletion, one for each file deleted, only the first task is launched. And if while this task is doing the cleanup, more attempts are made to delete other files, the state of the synctask is adjusted so that it restarts the crawl even after reaching end-of-directory to pick up any files it may have missed in the previous iteration. This patch also fixes uninitialized lk-owner during syncop_entrylk() which was leading to multiple background deletion synctasks entering the critical section at the same time and leading to illegal memory access of base inode in the second syntcask after it was destroyed post shard deletion by the first synctask. Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e updates: bz#1665803 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> (cherry picked from commit c0c2022e7d7097e96270a74f37813eda0c4e6339)
-rw-r--r--xlators/features/shard/src/shard.c188
-rw-r--r--xlators/features/shard/src/shard.h11
2 files changed, 128 insertions, 71 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 1e6a0205810..5715555109d 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1461,16 +1461,45 @@ int
shard_start_background_deletion(xlator_t *this)
{
int ret = 0;
+ gf_boolean_t i_cleanup = _gf_true;
+ shard_priv_t *priv = NULL;
call_frame_t *cleanup_frame = NULL;
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ switch (priv->bg_del_state) {
+ case SHARD_BG_DELETION_NONE:
+ i_cleanup = _gf_true;
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ break;
+ case SHARD_BG_DELETION_LAUNCHING:
+ i_cleanup = _gf_false;
+ break;
+ case SHARD_BG_DELETION_IN_PROGRESS:
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ i_cleanup = _gf_false;
+ break;
+ default:
+ break;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (!i_cleanup)
+ return 0;
+
cleanup_frame = create_frame(this, this->ctx->pool);
if (!cleanup_frame) {
gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
"Failed to create "
"new frame to delete shards");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err;
}
+ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+
ret = synctask_new(this->ctx->env, shard_delete_shards,
shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
if (ret < 0) {
@@ -1479,7 +1508,16 @@ shard_start_background_deletion(xlator_t *this)
"failed to create task to do background "
"cleanup of shards");
STACK_DESTROY(cleanup_frame->root);
+ goto err;
}
+ return 0;
+
+err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
return ret;
}
@@ -1488,7 +1526,7 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int ret = 0;
+ int ret = -1;
shard_priv_t *priv = NULL;
gf_boolean_t i_start_cleanup = _gf_false;
@@ -1521,23 +1559,25 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
LOCK(&priv->lock);
{
- if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
- priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
+ if (priv->first_lookup_done == _gf_false) {
+ priv->first_lookup_done = _gf_true;
i_start_cleanup = _gf_true;
}
}
UNLOCK(&priv->lock);
- if (i_start_cleanup) {
- ret = shard_start_background_deletion(this);
- if (ret) {
- LOCK(&priv->lock);
- {
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
- }
- UNLOCK(&priv->lock);
+ if (!i_start_cleanup)
+ goto unwind;
+
+ ret = shard_start_background_deletion(this);
+ if (ret < 0) {
+ LOCK(&priv->lock);
+ {
+ priv->first_lookup_done = _gf_false;
}
+ UNLOCK(&priv->lock);
}
+
unwind:
SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
@@ -2924,10 +2964,10 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
if (ctx->fsync_needed) {
unref_base_inode++;
list_del_init(&ctx->to_fsync_list);
- if (base_inode)
+ if (base_inode) {
__shard_inode_ctx_get(base_inode, this, &base_ictx);
- if (base_ictx)
base_ictx->fsync_count--;
+ }
}
}
UNLOCK(&inode->lock);
@@ -3333,9 +3373,13 @@ shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
loc.inode = inode_ref(priv->dot_shard_rm_inode);
ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL);
- if (ret)
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ ret = 0;
+ }
goto out;
+ }
{
ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
}
@@ -3349,20 +3393,6 @@ out:
int
shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
{
- xlator_t *this = NULL;
- shard_priv_t *priv = NULL;
-
- this = frame->this;
- priv = this->private;
-
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, -ret,
- SHARD_MSG_SHARDS_DELETION_FAILED,
- "Background deletion of shards failed");
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
- } else {
- priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
- }
SHARD_STACK_DESTROY(frame);
return 0;
}
@@ -3484,6 +3514,7 @@ shard_delete_shards(void *opaque)
gf_dirent_t entries;
gf_dirent_t *entry = NULL;
call_frame_t *cleanup_frame = NULL;
+ gf_boolean_t done = _gf_false;
this = THIS;
priv = this->private;
@@ -3538,51 +3569,76 @@ shard_delete_shards(void *opaque)
goto err;
}
- while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
- &entries, local->xattr_req, NULL))) {
- if (ret > 0)
- ret = 0;
- list_for_each_entry(entry, &entries.list, list)
+ for (;;) {
+ offset = 0;
+ LOCK(&priv->lock);
{
- offset = entry->d_off;
-
- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
- continue;
+ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ done = _gf_true;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (done)
+ break;
+ while (
+ (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+ &entries, local->xattr_req, NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
- if (!entry->inode) {
- ret = shard_lookup_marker_entry(this, local, entry);
- if (ret < 0)
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- }
- link_inode = inode_link(entry->inode, local->fd->inode,
- entry->d_name, &entry->d_stat);
- gf_msg_debug(this->name, 0,
- "Initiating deletion of "
- "shards of gfid %s",
- entry->d_name);
- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
- link_inode);
- inode_unlink(link_inode, local->fd->inode, entry->d_name);
- inode_unref(link_inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- SHARD_MSG_SHARDS_DELETION_FAILED,
- "Failed to clean up shards of gfid %s", entry->d_name);
- continue;
+ if (!entry->inode) {
+ ret = shard_lookup_marker_entry(this, local, entry);
+ if (ret < 0)
+ continue;
+ }
+ link_inode = inode_link(entry->inode, local->fd->inode,
+ entry->d_name, &entry->d_stat);
+
+ gf_msg_debug(this->name, 0,
+ "Initiating deletion of "
+ "shards of gfid %s",
+ entry->d_name);
+ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+ link_inode);
+ inode_unlink(link_inode, local->fd->inode, entry->d_name);
+ inode_unref(link_inode);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Failed to clean up shards of gfid %s",
+ entry->d_name);
+ continue;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ SHARD_MSG_SHARD_DELETION_COMPLETED,
+ "Deleted "
+ "shards of gfid=%s from backend",
+ entry->d_name);
}
- gf_msg(this->name, GF_LOG_INFO, 0,
- SHARD_MSG_SHARD_DELETION_COMPLETED,
- "Deleted "
- "shards of gfid=%s from backend",
- entry->d_name);
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
}
- gf_dirent_free(&entries);
- if (ret)
- break;
}
ret = 0;
+ loc_wipe(&loc);
+ return ret;
+
err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
loc_wipe(&loc);
return ret;
}
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index f877591faee..15c3cef0a07 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -200,10 +200,10 @@ shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
} while (0)
typedef enum {
- SHARD_FIRST_LOOKUP_PENDING = 0,
- SHARD_FIRST_LOOKUP_IN_PROGRESS,
- SHARD_FIRST_LOOKUP_DONE,
-} shard_first_lookup_state_t;
+ SHARD_BG_DELETION_NONE = 0,
+ SHARD_BG_DELETION_LAUNCHING,
+ SHARD_BG_DELETION_IN_PROGRESS,
+} shard_bg_deletion_state_t;
/* rm = "remove me" */
@@ -217,7 +217,8 @@ typedef struct shard_priv {
int inode_count;
struct list_head ilist_head;
uint32_t deletion_rate;
- shard_first_lookup_state_t first_lookup;
+ shard_bg_deletion_state_t bg_del_state;
+ gf_boolean_t first_lookup_done;
uint64_t lru_limit;
} shard_priv_t;