From b2c5d8ee2fe0f8baa7641d564ad86bb815f9509c Mon Sep 17 00:00:00 2001
From: Rajesh Joseph <rjoseph@redhat.com>
Date: Tue, 22 Nov 2016 22:25:42 +0530
Subject: gfapi: glfs_subvol_done should NOT wait for graph migration.

In graph_setup function glfs_subvol_done is called which
is executed in an epoll thread. glfs_lock waits on other
thread to finish graph migration. This can lead to dead lock
if we consume all the epoll threads.

In general any call-back function executed in epoll thread
should not call any blocking call which waits on a network
reply either directly or indirectly, e.g. syncop functions
should not be called in these threads.

As a fix we should not wait for migration in the call-back path.

> Reviewed-on: http://review.gluster.org/15913
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Niels de Vos <ndevos@redhat.com>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
(cherry picked from commit 17d10b42fc4041442e6cd0bfda45944edea498c6)

Change-Id: If96d0689fe1b4d74631e383048cdc30b01690dc2
BUG: 1399914
Signed-off-by: Rajesh Joseph <rjoseph@redhat.com>
Reviewed-on: http://review.gluster.org/15976
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Kaushal M <kaushal@redhat.com>
---
 api/src/glfs-internal.h | 15 ++++++++++++---
 api/src/glfs-resolve.c  | 19 ++++++++++++-------
 api/src/glfs.c          |  8 ++++----
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
index 0d1f9e9b887..b308a4f9fd8 100644
--- a/api/src/glfs-internal.h
+++ b/api/src/glfs-internal.h
@@ -327,17 +327,26 @@ do {                                                                \
   we can give up the mutex during syncop calls so
   that bottom up calls (particularly CHILD_UP notify)
   can do a mutex_lock() on @glfs without deadlocking
-  the filesystem
+  the filesystem.
+
+  All the fops should wait for graph migration to finish
+  before starting the fops. Therefore these functions should
+  call glfs_lock with wait_for_migration as true. But waiting
+  for migration to finish in call-back path can result thread
+  dead-locks. The reason for this is we only have finite
+  number of epoll threads. so if we wait on epoll threads
+  there will not be any thread left to handle outstanding
+  rpc replies.
 */
 static inline int
-glfs_lock (struct glfs *fs)
+glfs_lock (struct glfs *fs, gf_boolean_t wait_for_migration)
 {
 	pthread_mutex_lock (&fs->mutex);
 
 	while (!fs->init)
 		pthread_cond_wait (&fs->cond, &fs->mutex);
 
-	while (fs->migration_in_progress)
+        while (wait_for_migration && fs->migration_in_progress)
 		pthread_cond_wait (&fs->cond, &fs->mutex);
 
 	return 0;
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
index b31507a949b..fea6d22de91 100644
--- a/api/src/glfs-resolve.c
+++ b/api/src/glfs-resolve.c
@@ -789,7 +789,7 @@ glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
 {
 	fd_t *fd = NULL;
 
-	glfs_lock (fs);
+        glfs_lock (fs, _gf_true);
 	{
 		fd = __glfs_resolve_fd (fs, subvol, glfd);
 	}
@@ -902,12 +902,17 @@ priv_glfs_subvol_done (struct glfs *fs, xlator_t *subvol)
 	if (!subvol)
 		return;
 
-	glfs_lock (fs);
+        /* For decrementing subvol->wind ref count we need not check/wait for
+         * migration-in-progress flag.
+         * Also glfs_subvol_done is called in call-back path therefore waiting
+         * fot migration-in-progress flag can lead to dead-lock.
+         */
+        glfs_lock (fs, _gf_false);
 	{
 		ref = (--subvol->winds);
 		active_subvol = fs->active_subvol;
 	}
-	glfs_unlock (fs);
+        glfs_unlock (fs);
 
 	if (ref == 0) {
 		assert (subvol != active_subvol);
@@ -924,7 +929,7 @@ priv_glfs_active_subvol (struct glfs *fs)
 	xlator_t      *subvol = NULL;
 	xlator_t      *old_subvol = NULL;
 
-	glfs_lock (fs);
+        glfs_lock (fs, _gf_true);
 	{
 		subvol = __glfs_active_subvol (fs);
 
@@ -973,7 +978,7 @@ glfs_cwd_set (struct glfs *fs, inode_t *inode)
 {
 	int ret = 0;
 
-	glfs_lock (fs);
+        glfs_lock (fs, _gf_true);
 	{
 		ret = __glfs_cwd_set (fs, inode);
 	}
@@ -1006,7 +1011,7 @@ glfs_cwd_get (struct glfs *fs)
 {
 	inode_t *cwd = NULL;
 
-	glfs_lock (fs);
+        glfs_lock (fs, _gf_true);
 	{
 		cwd = __glfs_cwd_get (fs);
 	}
@@ -1046,7 +1051,7 @@ glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
 {
 	inode_t *inode = NULL;
 
-	glfs_lock (fs);
+        glfs_lock (fs, _gf_true);
 	{
 		inode = __glfs_resolve_inode(fs, subvol, object);
 	}
diff --git a/api/src/glfs.c b/api/src/glfs.c
index 4f742b2847c..effce14bf4a 100644
--- a/api/src/glfs.c
+++ b/api/src/glfs.c
@@ -524,7 +524,7 @@ glfs_fd_destroy (void *data)
 
         glfd = (struct glfs_fd *)data;
 
-        glfs_lock (glfd->fs);
+        glfs_lock (glfd->fs, _gf_true);
         {
                 list_del_init (&glfd->openfds);
         }
@@ -567,7 +567,7 @@ glfs_fd_bind (struct glfs_fd *glfd)
 
 	fs = glfd->fs;
 
-	glfs_lock (fs);
+        glfs_lock (fs, _gf_true);
 	{
 		list_add_tail (&glfd->openfds, &fs->openfds);
 	}
@@ -857,7 +857,7 @@ glfs_init_wait (struct glfs *fs)
 	int   ret = -1;
 
 	/* Always a top-down call, use glfs_lock() */
-	glfs_lock (fs);
+        glfs_lock (fs, _gf_true);
 	{
 		while (!fs->init)
 			pthread_cond_wait (&fs->cond,
@@ -1231,7 +1231,7 @@ pub_glfs_get_volfile (struct glfs *fs, void *buf, size_t len)
         DECLARE_OLD_THIS;
         __GLFS_ENTRY_VALIDATE_FS (fs, invalid_fs);
 
-        glfs_lock(fs);
+        glfs_lock(fs, _gf_true);
         if (len >= fs->oldvollen) {
                 gf_msg_trace ("glfs", 0, "copying %zu to %p", len, buf);
                 memcpy(buf,fs->oldvolfile,len);
-- 
cgit