summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2016-06-09 16:53:19 +0200
committerKaushal M <kaushal@redhat.com>2016-06-24 03:20:05 -0700
commited16cfb0455e41ee39addf6b3cdacdbe0d98308a (patch)
tree4f83e39f18fb229f8449f5fd4e568ed755ea9ddd
parent8dddda654a60ed66792212b413fde113f4cf951f (diff)
cluster/dht: Fix unsafe iteration on inode->fd_list
When DHT traverses the inode->fd_list, it does that in an unsafe way that can generate races with fd_unref() called from other threads. This patch fixes this problem taking the inode->lock and adding a reference to the fd while it's being used outside of the mutex protected region. A minor change in storage/posix has been done to also access the inode->fd_list in a safe way. Backport of: > Change-Id: I10d469ca6a8f76e950a8c9779ae9c8b70f88ef93 > BUG: 1344340 > Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> > Reviewed-on: http://review.gluster.org/14682 > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Smoke: Gluster Build System <jenkins@build.gluster.org> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Change-Id: I10d469ca6a8f76e950a8c9779ae9c8b70f88ef93 BUG: 1346751 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/14734 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r--xlators/cluster/dht/src/dht-helper.c86
-rw-r--r--xlators/storage/posix/src/posix.c2
2 files changed, 71 insertions, 17 deletions
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index ac70a665094..472abc1cf68 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1108,6 +1108,7 @@ dht_migration_complete_check_task (void *data)
dht_conf_t *conf = NULL;
inode_t *inode = NULL;
fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
int open_failed = 0;
@@ -1230,27 +1231,46 @@ dht_migration_complete_check_task (void *data)
goto out;
}
+ /* perform 'open()' on all the fd's present on the inode */
+ if (tmp_loc.path == NULL) {
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
if (list_empty (&inode->fd_list))
- goto out;
+ goto unlock;
/* perform open as root:root. There is window between linkfile
* creation(root:root) and setattr with the correct uid/gid
*/
SYNCTASK_SETID(0, 0);
- /* perform 'open()' on all the fd's present on the inode */
- tmp_loc.inode = inode;
- inode_path (inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
-
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) {
if (fd_is_anonymous (iter_fd))
continue;
+
if (dht_fd_open_on_dst (this, iter_fd, dst_node))
continue;
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+ __fd_ref(iter_fd);
+
+ UNLOCK(&inode->lock);
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1272,15 +1292,23 @@ dht_migration_complete_check_task (void *data)
} else {
dht_fd_ctx_set (this, iter_fd, dst_node);
}
+
+ fd_unref(iter_fd);
+
+ LOCK(&inode->lock);
}
SYNCTASK_SETID (frame->root->uid, frame->root->gid);
if (open_failed) {
ret = -1;
- goto out;
+ goto unlock;
}
ret = 0;
+
+unlock:
+ UNLOCK(&inode->lock);
+
out:
loc_wipe (&tmp_loc);
@@ -1353,6 +1381,7 @@ dht_rebalance_inprogress_task (void *data)
dht_conf_t *conf = NULL;
inode_t *inode = NULL;
fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
int open_failed = 0;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
@@ -1463,24 +1492,44 @@ dht_rebalance_inprogress_task (void *data)
ret = 0;
+ if (tmp_loc.path == NULL) {
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
if (list_empty (&inode->fd_list))
- goto done;
+ goto unlock;
/* perform open as root:root. There is window between linkfile
* creation(root:root) and setattr with the correct uid/gid
*/
SYNCTASK_SETID (0, 0);
- inode_path (inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
-
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) {
if (fd_is_anonymous (iter_fd))
continue;
if (dht_fd_open_on_dst (this, iter_fd, dst_node))
continue;
+
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+ __fd_ref(iter_fd);
+
+ UNLOCK(&inode->lock);
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1503,16 +1552,21 @@ dht_rebalance_inprogress_task (void *data)
dht_fd_ctx_set (this, iter_fd, dst_node);
}
+ fd_unref(iter_fd);
+
+ LOCK(&inode->lock);
}
SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+unlock:
+ UNLOCK(&inode->lock);
+
if (open_failed) {
ret = -1;
goto out;
}
-done:
ret = dht_inode_ctx_set_mig_info (this, inode, src_node, dst_node);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index c2644116eb1..7290905807b 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -4338,7 +4338,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
}
if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
- if (!list_empty (&loc->inode->fd_list)) {
+ if (!fd_list_empty (loc->inode)) {
ret = dict_set_uint32 (dict, (char *)name, 1);
if (ret < 0)
gf_msg (this->name, GF_LOG_WARNING, 0,