From 9b15867070b0cc241ab165886292ecffc3bc0aed Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Tue, 1 Oct 2019 17:37:15 +0530 Subject: cluster/dht: Correct fd processing loop The fd processing loops in the dht_migration_complete_check_task and the dht_rebalance_inprogress_task functions were unsafe and could cause an open to be sent on an already freed fd. This has been fixed. Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540 Fixes: bz#1757399 Signed-off-by: N Balachandran --- xlators/cluster/dht/src/dht-helper.c | 84 ++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 22 deletions(-) (limited to 'xlators/cluster/dht/src') diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index c7bc8c3aaf2..980a562deb2 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -1260,6 +1260,7 @@ dht_migration_complete_check_task(void *data) fd_t *tmp = NULL; uint64_t tmp_miginfo = 0; dht_migrate_info_t *miginfo = NULL; + gf_boolean_t skip_open = _gf_false; int open_failed = 0; this = THIS; @@ -1398,24 +1399,34 @@ dht_migration_complete_check_task(void *data) * the loop will cause the destruction of the fd. So we need to * iterate the list safely because iter_fd cannot be trusted. */ - list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) - { - if (fd_is_anonymous(iter_fd)) - continue; - - if (dht_fd_open_on_dst(this, iter_fd, dst_node)) - continue; - + iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list); + while (&iter_fd->inode_list != (&inode->fd_list)) { + if (fd_is_anonymous(iter_fd) || + (dht_fd_open_on_dst(this, iter_fd, dst_node))) { + if (!tmp) { + iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd), + inode_list); + continue; + } + skip_open = _gf_true; + } /* We need to release the inode->lock before calling * syncop_open() to avoid possible deadlocks. However this * can cause the iter_fd to be released by other threads. * To avoid this, we take a reference before releasing the * lock. */ - __fd_ref(iter_fd); + fd_ref(iter_fd); UNLOCK(&inode->lock); + if (tmp) { + fd_unref(tmp); + tmp = NULL; + } + if (skip_open) + goto next; + /* flags for open are stripped down to allow following the * new location of the file, otherwise we can get EEXIST or * truncate the file again as rebalance is moving the data */ @@ -1437,9 +1448,11 @@ dht_migration_complete_check_task(void *data) dht_fd_ctx_set(this, iter_fd, dst_node); } - fd_unref(iter_fd); - + next: LOCK(&inode->lock); + skip_open = _gf_false; + tmp = iter_fd; + iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list); } SYNCTASK_SETID(frame->root->uid, frame->root->gid); @@ -1452,6 +1465,10 @@ dht_migration_complete_check_task(void *data) unlock: UNLOCK(&inode->lock); + if (tmp) { + fd_unref(tmp); + tmp = NULL; + } out: if (dict) { @@ -1533,6 +1550,7 @@ dht_rebalance_inprogress_task(void *data) int open_failed = 0; uint64_t tmp_miginfo = 0; dht_migrate_info_t *miginfo = NULL; + gf_boolean_t skip_open = _gf_false; this = THIS; frame = data; @@ -1653,24 +1671,40 @@ dht_rebalance_inprogress_task(void *data) * the loop will cause the destruction of the fd. So we need to * iterate the list safely because iter_fd cannot be trusted. */ - list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) - { - if (fd_is_anonymous(iter_fd)) - continue; - - if (dht_fd_open_on_dst(this, iter_fd, dst_node)) - continue; - + iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list); + while (&iter_fd->inode_list != (&inode->fd_list)) { /* We need to release the inode->lock before calling * syncop_open() to avoid possible deadlocks. However this * can cause the iter_fd to be released by other threads. * To avoid this, we take a reference before releasing the * lock. */ - __fd_ref(iter_fd); + if (fd_is_anonymous(iter_fd) || + (dht_fd_open_on_dst(this, iter_fd, dst_node))) { + if (!tmp) { + iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd), + inode_list); + continue; + } + skip_open = _gf_true; + } + + /* Yes, this is ugly but there isn't a cleaner way to do this + * the fd_ref is an atomic increment so not too bad. We want to + * reduce the number of inode locks and unlocks. + */ + + fd_ref(iter_fd); UNLOCK(&inode->lock); + if (tmp) { + fd_unref(tmp); + tmp = NULL; + } + if (skip_open) + goto next; + /* flags for open are stripped down to allow following the * new location of the file, otherwise we can get EEXIST or * truncate the file again as rebalance is moving the data */ @@ -1691,9 +1725,11 @@ dht_rebalance_inprogress_task(void *data) dht_fd_ctx_set(this, iter_fd, dst_node); } - fd_unref(iter_fd); - + next: LOCK(&inode->lock); + skip_open = _gf_false; + tmp = iter_fd; + iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list); } SYNCTASK_SETID(frame->root->uid, frame->root->gid); @@ -1701,6 +1737,10 @@ dht_rebalance_inprogress_task(void *data) unlock: UNLOCK(&inode->lock); + if (tmp) { + fd_unref(tmp); + tmp = NULL; + } if (open_failed) { ret = -1; goto out; -- cgit