summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRajesh Joseph <rjoseph@redhat.com>2016-11-22 22:25:42 +0530
committerNiels de Vos <ndevos@redhat.com>2017-01-02 05:11:40 -0800
commit1d03b1b6b48e4ee9260852b35ec827f203e2287c (patch)
treea8ee7a433c3f8f200bcec15dee79250516e6b5cf
parented3fb30254af39e560d09466c6a755d6e0e4b32d (diff)
gfapi: glfs_subvol_done should NOT wait for graph migration.
In graph_setup function glfs_subvol_done is called which is executed in an epoll thread. glfs_lock waits on other thread to finish graph migration. This can lead to dead lock if we consume all the epoll threads. In general any call-back function executed in epoll thread should not call any blocking call which waits on a network reply either directly or indirectly, e.g. syncop functions should not be called in these threads. As a fix we should not wait for migration in the call-back path. > Reviewed-on: http://review.gluster.org/15913 > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > Smoke: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Niels de Vos <ndevos@redhat.com> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> (cherry picked from commit 17d10b42fc4041442e6cd0bfda45944edea498c6) Change-Id: If96d0689fe1b4d74631e383048cdc30b01690dc2 BUG: 1399915 Signed-off-by: Rajesh Joseph <rjoseph@redhat.com> Reviewed-on: http://review.gluster.org/15977 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com> Reviewed-by: Niels de Vos <ndevos@redhat.com>
-rw-r--r--api/src/glfs-internal.h15
-rw-r--r--api/src/glfs-resolve.c19
-rw-r--r--api/src/glfs.c8
3 files changed, 28 insertions, 14 deletions
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
index f69df15715c..6ff9aada0c5 100644
--- a/api/src/glfs-internal.h
+++ b/api/src/glfs-internal.h
@@ -327,17 +327,26 @@ do { \
we can give up the mutex during syncop calls so
that bottom up calls (particularly CHILD_UP notify)
can do a mutex_lock() on @glfs without deadlocking
- the filesystem
+ the filesystem.
+
+ All the fops should wait for graph migration to finish
+ before starting the fops. Therefore these functions should
+ call glfs_lock with wait_for_migration as true. But waiting
+ for migration to finish in call-back path can result thread
+ dead-locks. The reason for this is we only have finite
+ number of epoll threads. so if we wait on epoll threads
+ there will not be any thread left to handle outstanding
+ rpc replies.
*/
static inline int
-glfs_lock (struct glfs *fs)
+glfs_lock (struct glfs *fs, gf_boolean_t wait_for_migration)
{
pthread_mutex_lock (&fs->mutex);
while (!fs->init)
pthread_cond_wait (&fs->cond, &fs->mutex);
- while (fs->migration_in_progress)
+ while (wait_for_migration && fs->migration_in_progress)
pthread_cond_wait (&fs->cond, &fs->mutex);
return 0;
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
index b84e5d8f58c..f8b437bab0e 100644
--- a/api/src/glfs-resolve.c
+++ b/api/src/glfs-resolve.c
@@ -784,7 +784,7 @@ glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
{
fd_t *fd = NULL;
- glfs_lock (fs);
+ glfs_lock (fs, _gf_true);
{
fd = __glfs_resolve_fd (fs, subvol, glfd);
}
@@ -897,12 +897,17 @@ priv_glfs_subvol_done (struct glfs *fs, xlator_t *subvol)
if (!subvol)
return;
- glfs_lock (fs);
+ /* For decrementing subvol->wind ref count we need not check/wait for
+ * migration-in-progress flag.
+ * Also glfs_subvol_done is called in call-back path therefore waiting
+ * fot migration-in-progress flag can lead to dead-lock.
+ */
+ glfs_lock (fs, _gf_false);
{
ref = (--subvol->winds);
active_subvol = fs->active_subvol;
}
- glfs_unlock (fs);
+ glfs_unlock (fs);
if (ref == 0) {
assert (subvol != active_subvol);
@@ -919,7 +924,7 @@ priv_glfs_active_subvol (struct glfs *fs)
xlator_t *subvol = NULL;
xlator_t *old_subvol = NULL;
- glfs_lock (fs);
+ glfs_lock (fs, _gf_true);
{
subvol = __glfs_active_subvol (fs);
@@ -968,7 +973,7 @@ glfs_cwd_set (struct glfs *fs, inode_t *inode)
{
int ret = 0;
- glfs_lock (fs);
+ glfs_lock (fs, _gf_true);
{
ret = __glfs_cwd_set (fs, inode);
}
@@ -1001,7 +1006,7 @@ glfs_cwd_get (struct glfs *fs)
{
inode_t *cwd = NULL;
- glfs_lock (fs);
+ glfs_lock (fs, _gf_true);
{
cwd = __glfs_cwd_get (fs);
}
@@ -1041,7 +1046,7 @@ glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
{
inode_t *inode = NULL;
- glfs_lock (fs);
+ glfs_lock (fs, _gf_true);
{
inode = __glfs_resolve_inode(fs, subvol, object);
}
diff --git a/api/src/glfs.c b/api/src/glfs.c
index 1c6ac75b694..2ec319fe3e6 100644
--- a/api/src/glfs.c
+++ b/api/src/glfs.c
@@ -549,7 +549,7 @@ glfs_fd_destroy (void *data)
glfd = (struct glfs_fd *)data;
- glfs_lock (glfd->fs);
+ glfs_lock (glfd->fs, _gf_true);
{
list_del_init (&glfd->openfds);
}
@@ -592,7 +592,7 @@ glfs_fd_bind (struct glfs_fd *glfd)
fs = glfd->fs;
- glfs_lock (fs);
+ glfs_lock (fs, _gf_true);
{
list_add_tail (&glfd->openfds, &fs->openfds);
}
@@ -882,7 +882,7 @@ glfs_init_wait (struct glfs *fs)
int ret = -1;
/* Always a top-down call, use glfs_lock() */
- glfs_lock (fs);
+ glfs_lock (fs, _gf_true);
{
while (!fs->init)
pthread_cond_wait (&fs->cond,
@@ -1256,7 +1256,7 @@ pub_glfs_get_volfile (struct glfs *fs, void *buf, size_t len)
DECLARE_OLD_THIS;
__GLFS_ENTRY_VALIDATE_FS (fs, invalid_fs);
- glfs_lock(fs);
+ glfs_lock(fs, _gf_true);
if (len >= fs->oldvollen) {
gf_msg_trace ("glfs", 0, "copying %zu to %p", len, buf);
memcpy(buf,fs->oldvolfile,len);