fuse, glusterfsd: mount logic fixes

Commit 7d0397c2 introduced two issues: i) broke the libfuse derived mount logic (details below) ii) in case of a daemonized glusterfs client is ran as daemon, parent process can return earlier than the mount is in place, which breaks agents that programmatically do a gluster mount via a direct call to glusterfs (ie. not via mount(8)). This patch fixes these issues by a refactor that merges the approaches sported by commits 7d0397c2 fuse: allow requests during mount (needed for SELinux labels) c5d781e0 upon daemonizing, wait on mtab update to terminate in parent Original daemonized libfuse event flow is as follows: try: fd = open("/dev/fuse") mount("-oopts,fd=%s" % fd ...) mount(8) -f # manipulate mtab except: sp = socketpair() env _FUSE_COMMFD=sp fusermount -oopts fd = receive_fd(sp) where fusermount(1) does: fd = open("/dev/fuse") mount("-oopts,fd=%d" % fd ...) sp = atoi(getenv("_FUSE_COMMFD")) send_fd(sp, fd) daemonize( # in child fuse_loop(fd) ) # in parent exit() As of 013850c9 (instead of adopting FUSE's 47e61004¹), we went for async mtab manipulation, and as of c5d781e0, still wanted keep that in sync with termination of daemon parent, so we changed it to: try: fd = open("/dev/fuse") mount("-oopts,fd=%s" % fd ...) pid = fork( # in child mount(8) -f ) except: sp = socketpair() env _FUSE_COMMFD=sp fusermount -oopts fd = receive_fd(sp) daemonize( fuse_loop(fd) ) waitpid(pid) exit() (Note the new approch came only to direct [privileged] mount, so fusermount based mounting was already partially broken.) As of 7d0397c2, with the purpose of facilitating async mount, the event flow was practically reduced to: fd = open("/dev/fuse") fork( mount("-oopts,fd=%s" % fd ...) fork( mount(8) -n ) ) daemonize( fuse_loop(fd) ) exit() Thus fusermount based mounting become defunct; however, the dead code was still kept around. So, we should either drop it or fix it. Also, the mtab manipulator is forked into yet another child with no purpose, while syncing with it in daemon parent is broken. mount(2) is neither synced with parent. Now we are coming to the following scheme: fd = open("/dev/fuse") pid = fork( try: mount("-oopts,fd=%s" % fd ...) mount(8) -n except: env _FUSE_DEVFD=fd fusermount -oopts ) where fusermount(1) does: fd = getenv("_FUSE_DEVFD") mount("-oopts,fd=%s" % fd ...) daemonize( fuse_loop(fd) ) waitpid(pid) exit() Nb.: - We can't help losing compatibility with upstream fusermount, as it sends back the fd only when mount(2) is completed, thus defeating the async mount approach. The 'getenv("_FUSE_DEVFD")' mechanism is specfic to glusterfs' fusermount (at the moment -- sure we can talk about it with upstream) - fusermount opens /dev/fuse at same privilege level as of original process², so we can bravely go on with doing the open unconditionally in original process - Original mounting code actually tries to mount through fusermount _twice_: if first attempt fails, then, assuming subtype support is missing in kernel, it tries again subtype stripped. However, this is redundant, as fusermount internally also performs the subtype check³. Therefore we simplified the logic to have just a single fusermount call. - we revert the changes to mount.glusterfs as of 7d0397c2, as now there is no issue with glusterfs to work around in that scope ¹ http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=blobdiff;f=ChangeLog;h=47e61004;hb=4c3d9b19;hpb=e61b775a ² http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=blob;f=util/fusermount.c;h=b2e87d95#l1023 ³ http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=blob;f=util/fusermount.c;h=b2e87d95#l839 Change-Id: I0c4ab70e0c5ad7b27337228749b266bcd0ba941d BUG: 811217 Signed-off-by: Csaba Henk <csaba@redhat.com> Reviewed-on: http://review.gluster.com/3428 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
author: Csaba Henk <csaba@redhat.com> 2012-05-14 17:07:28 +0530
committer: Vijay Bellur <vijay@gluster.com> 2012-05-27 09:55:47 -0700
commit: 4409b22a7a86431631f3fdf5a07d642e32735042 (patch)
tree: 4ca50821bf70bbd2f591f3a8ffb19b00f968a920 /xlators
parent: 507a4ebb6ca3704e1a6ee3d1b6511b58ed9f6ba7 (diff)
2 files changed, 21 insertions, 62 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index f14ccf5c178..42190083a27 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -3879,29 +3879,17 @@ int
 fuse_get_mount_status (xlator_t *this)
 {
         int             kid_status = -1;
-        pid_t           kid_pid = -1;
         fuse_private_t *priv = this->private;
-        int             our_status = -1;
 
         if (read(priv->status_pipe[0],&kid_status, sizeof(kid_status)) < 0) {
                 gf_log (this->name, GF_LOG_ERROR, "could not get mount status");
-                goto out;
+                kid_status = -1;
         }
         gf_log (this->name, GF_LOG_DEBUG, "mount status is %d", kid_status);
 
-        if (read(priv->status_pipe[0],&kid_pid, sizeof(kid_pid)) < 0) {
-                gf_log (this->name, GF_LOG_ERROR, "could not get mount PID");
-                goto out;
-        }
-        gf_log (this->name, GF_LOG_DEBUG, "mount PID is %d", kid_pid);
-
-        (void)waitpid(kid_pid,NULL,0);
-        our_status = kid_status;
-
-out:
         close(priv->status_pipe[0]);
         close(priv->status_pipe[1]);
-        return our_status;
+        return kid_status;
 }
 
 static void *
@@ -4384,7 +4372,7 @@ init (xlator_t *this_xl)
         int                xl_name_allocated = 0;
         int                fsname_allocated = 0;
         glusterfs_ctx_t   *ctx = NULL;
-        gf_boolean_t       sync_mtab = _gf_false;
+        gf_boolean_t       sync_to_mount = _gf_false;
         char              *mnt_args = NULL;
 
         if (this_xl == NULL)
@@ -4532,11 +4520,11 @@ init (xlator_t *this_xl)
                 priv->fuse_dump_fd = ret;
         }
 
-        sync_mtab = _gf_false;
-        ret = dict_get_str (options, "sync-mtab", &value_string);
+        sync_to_mount = _gf_false;
+        ret = dict_get_str (options, "sync-to-mount", &value_string);
         if (ret == 0) {
                 ret = gf_string2boolean (value_string,
-                                         &sync_mtab);
+                                         &sync_to_mount);
                 GF_ASSERT (ret == 0);
         }
 
@@ -4582,7 +4570,7 @@ init (xlator_t *this_xl)
         }
 
         priv->fd = gf_fuse_mount (priv->mount_point, fsname, mnt_args,
-                                  sync_mtab ? &ctx->mtab_pid : NULL,
+                                  sync_to_mount ? &ctx->mnt_pid : NULL,
                                   priv->status_pipe[1]);
         if (priv->fd == -1)
                 goto cleanup_exit;
@@ -4694,7 +4682,7 @@ struct volume_options options[] = {
         { .key  = {"uid-map-root"},
           .type = GF_OPTION_TYPE_INT
         },
-        { .key  = {"sync-mtab"},
+        { .key  = {"sync-to-mount"},
           .type = GF_OPTION_TYPE_BOOL
         },
         { .key = {"read-only"},
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 37cc2f9f60a..3ca09b03b1d 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -34,36 +34,6 @@ _init ()
     UPDATEDBCONF=/etc/updatedb.conf
 }
 
-# Mount happens asynchronously, so the command status alone will never be
-# sufficient.  Instead, we have to wait for multiple events representing
-# different possible outcomes.
-wait_for ()
-{
-    local daemon_pid=$1
-    local mount_point=$2
-
-    waited=0
-    while true; do
-        kill -s 0 $daemon_pid
-        if [ $? != 0 ]; then
-                echo "Gluster client daemon exited unexpectedly."
-                umount $mount_point &> /dev/null
-                exit 1
-        fi
-        inode=$(stat -c %i $mount_point 2>/dev/null);
-        if [ "$inode" = "1" ]; then
-            break
-        fi
-        if [ $waited -ge 10 ]; then
-            break
-        fi
-        sleep 1
-        waited=$((waited+1))
-    done
-
-    echo "$inode"
-}
-
 start_glusterfs ()
 {
     if [ -n "$log_level_str" ]; then
@@ -151,33 +121,34 @@ start_glusterfs ()
         cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
     fi
 
-    cmd_line=$(echo "$cmd_line $mount_point")
-    err=0
-    $cmd_line
-    daemon_pid=$$
+    cmd_line=$(echo "$cmd_line $mount_point");
+    err=0;
+    $cmd_line;
+
+
+    inode=$(stat -c %i $mount_point 2>/dev/null);
 
-    # Wait for the inode to change *or* for the daemon to exit (indicating a
-    # problem with the mount).
-    inode=$(wait_for $daemon_pid $mount_point)
     # this is required if the stat returns error
     if [ -z "$inode" ]; then
-        inode="0"
+        inode="0";
     fi
 
     # retry the failover
+    # if [ $? != "0" ]; then # <--- TODO: Once glusterfs returns proper error code, change it.
     if [ $inode -ne 1 ]; then
+        err=1;
         if [ -n "$cmd_line1" ]; then
             cmd_line1=$(echo "$cmd_line1 $mount_point");
-            $cmd_line1
-            daemon_pid=$$
+            $cmd_line1;
+            err=0;
 
-            inode=$(wait_for $daemon_pid $mount_point)
+            inode=$(stat -c %i $mount_point 2>/dev/null);
             # this is required if the stat returns error
             if [ -z "$inode" ]; then
                 inode="0";
             fi
             if [ $inode -ne 1 ]; then
-                err=1
+                err=1;
             fi
         fi
     fi
author	Csaba Henk <csaba@redhat.com>	2012-05-14 17:07:28 +0530
committer	Vijay Bellur <vijay@gluster.com>	2012-05-27 09:55:47 -0700
commit	4409b22a7a86431631f3fdf5a07d642e32735042 (patch)
tree	4ca50821bf70bbd2f591f3a8ffb19b00f968a920 /xlators
parent	507a4ebb6ca3704e1a6ee3d1b6511b58ed9f6ba7 (diff)