From fafd5c17c0d126e10b401199cd4f01f7786deef8 Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@redhat.com>
Date: Fri, 9 Mar 2012 09:11:23 +0100
Subject: geo-rep / gsyncd: fix cleanup of temporary mounts

The "finally" clause that was meant to cleanup after the
temp mount has not covered the case of getting signalled
(eg. by monitor, upon worker timing out).

So here we "outsource" the cleanup to an ephemeral child process.
Child calls setsid(2) so it won't be bothered by internal process
management. We use a pipe in between worker and the cleanup child;
when child sees the worker end getting closed, it performs the cleanup.
Worker end can get closed either because worker closes it (normal case),
or because worker has terminated (faulty case) -- thus as bonus, we get
a nice uniform handling with no need to differentiate between normal and
faulty cases.

The faulty case that was seen IRL -- ie., users of maintainance mounts
hang in chdir(2) to mount point -- can be simulated for testing purposes
by applying the following patch:

diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index acd3c68..1ce5dc1 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -2918,7 +2918,7 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
         if (fini->minor < 9)
                 *priv->msg0_len_p = sizeof(*finh) + FUSE_COMPAT_WRITE_IN_SIZE;
 #endif
-        ret = send_fuse_obj (this, finh, &fino);
+        ret = priv->client_pid_set ? 0 : send_fuse_obj (this, finh, &fino);
         if (ret == 0)
                 gf_log ("glusterfs-fuse", GF_LOG_INFO,
                         "FUSE inited with protocol versions:"

Change-Id: I1172bf16ac1006bad48958655572155820e5138d
BUG: 786291
Signed-off-by: Csaba Henk <csaba@redhat.com>
Reviewed-on: http://review.gluster.com/2908
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
---
 .../features/marker/utils/syncdaemon/resource.py   | 38 ++++++++++++++--------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py
index 5f46edd44cf..3a0ae6e0a4c 100644
--- a/xlators/features/marker/utils/syncdaemon/resource.py
+++ b/xlators/features/marker/utils/syncdaemon/resource.py
@@ -4,6 +4,7 @@ import sys
 import pwd
 import stat
 import time
+import fcntl
 import errno
 import struct
 import socket
@@ -364,32 +365,43 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
         return True
 
     def connect(self):
-        def umount_l(d):
-            time.sleep(0.2) # XXX temporary workaround
-            argv = ['umount', '-l', d]
-            return os.spawnvp(os.P_WAIT, argv[0], argv)
         d = tempfile.mkdtemp(prefix='gsyncd-aux-mount-')
-        mounted = False
-        try:
+        mpi, mpo = os.pipe()
+        mh = os.fork()
+        if mh:
+            os.close(mpi)
+            fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
             argv = gconf.gluster_command.split() + \
                     (gconf.gluster_log_level and ['-L', gconf.gluster_log_level] or []) + \
                     ['-l', gconf.gluster_log_file, '-s', self.host,
                      '--volfile-id', self.volume, '--client-pid=-1', d]
             if os.spawnvp(os.P_WAIT, argv[0], argv):
                 raise RuntimeError("command failed: " + " ".join(argv))
-            mounted = True
             logging.debug('auxiliary glusterfs mount in place')
+            os.write(mpo, 'M')
             os.chdir(d)
-            if umount_l(d) != 0:
-                raise RuntimeError("umounting %s failed" % d)
-            mounted = False
-        finally:
+            os.close(mpo)
+            _, rv = os.waitpid(mh, 0)
+            if rv:
+                logging.warn('stale mount possibly left behind on ' + d)
+                raise RuntimeError("cleaning up temp mountpoint %s failed with status %d" % \
+                                   (d, rv))
+        else:
+            rv = 0
             try:
+                os.setsid()
+                os.close(mpo)
+                mounted = False
+                while os.read(mpi, 1):
+                    mounted = True
                 if mounted:
-                    umount_l(d)
+                    time.sleep(0.2) # XXX temporary workaround
+                    argv = ['umount', '-l', d]
+                    rv = os.spawnvp(os.P_WAIT, argv[0], argv)
                 os.rmdir(d)
             except:
-                logging.warn('stale mount possibly left behind on ' + d)
+                rv = 200
+            os._exit(rv)
         logging.debug('auxiliary glusterfs mount prepared')
 
     def connect_remote(self, *a, **kw):
-- 
cgit