glusterd: socketfile & pidfile related fixes for brick multiplexing feature

Problem: While brick-muliplexing is on after restarting glusterd, CLI is not showing pid of all brick processes in all volumes. Solution: While brick-mux is on all local brick process communicated through one UNIX socket but as per current code (glusterd_brick_start) it is trying to communicate with separate UNIX socket for each volume which is populated based on brick-name and vol-name.Because of multiplexing design only one UNIX socket is opened so it is throwing poller error and not able to fetch correct status of brick process through cli process. To resolve the problem write a new function glusterd_set_socket_filepath_for_mux that will call by glusterd_brick_start to validate about the existence of socketpath. To avoid the continuous EPOLLERR erros in logs update socket_connect code. Test: To reproduce the issue followed below steps 1) Create two distributed volumes(dist1 and dist2) 2) Set cluster.brick-multiplex is on 3) kill glusterd 4) run command gluster v status After apply the patch it shows correct pid for all volumes BUG: 1444596 Change-Id: I5d10af69dea0d0ca19511f43870f34295a54a4d2 Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> Reviewed-on: https://review.gluster.org/17101 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Prashanth Pai <ppai@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
author: Mohit Agrawal <moagrawa@redhat.com> 2017-05-08 19:29:22 +0530
committer: Atin Mukherjee <amukherj@redhat.com> 2017-05-09 01:30:01 +0000
commit: 21c7f7baccfaf644805e63682e5a7d2a9864a1e6 (patch)
tree: 01bbbd50d13f609eb8f7d2cbe2ce5e3af1652e42
parent: 18e07cf01f975c80152e5469fb4e4274f08dc636 (diff)
16 files changed, 307 insertions, 63 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 5c4bc8113d5..bd10dff8430 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -2034,7 +2034,6 @@ glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx)
                       cmd_args->pid_file);
 
         if (ctx->cmd_args.pid_file) {
-                sys_unlink (ctx->cmd_args.pid_file);
                 ctx->cmd_args.pid_file = NULL;
         }
 
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 2acd83f36cf..6ea4fd14374 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -3633,6 +3633,24 @@ gf_skip_header_section (int fd, int header_len)
         return ret;
 }
 
+/* Below function is use to check at runtime if pid is running */
+
+gf_boolean_t
+gf_is_pid_running (int pid)
+{
+        char fname[32] = {0,};
+
+        snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
+
+        if (sys_access (fname , R_OK) != 0) {
+                return _gf_false;
+        }
+
+        return _gf_true;
+
+}
+
+
 gf_boolean_t
 gf_is_service_running (char *pidfile, int *pid)
 {
@@ -3661,15 +3679,7 @@ gf_is_service_running (char *pidfile, int *pid)
                 *pid = -1;
         }
 
-        if (!*pid) {
-                /*
-                 * PID 0 means we've started the process, but it hasn't gotten
-                 * far enough to put in a real PID yet.  More details are in
-                 * glusterd_brick_start.
-                 */
-                running = _gf_true;
-        }
-
+        running = gf_is_pid_running (*pid);
 out:
         if (file)
                 fclose (file);
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
index 67baa852c45..6243732b522 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/common-utils.h
@@ -821,7 +821,8 @@ int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
                       void *(*start_routine)(void *), void *arg);
 int gf_thread_create_detached (pthread_t *thread,
                       void *(*start_routine)(void *), void *arg);
-
+gf_boolean_t
+gf_is_pid_running (int pid);
 gf_boolean_t
 gf_is_service_running (char *pidfile, int *pid);
 gf_boolean_t
diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
new file mode 100644
index 00000000000..950cb5f8046
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1444596_brick_mux_gd_status_restart.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+
+function count_up_bricks {
+        $CLI --xml volume status $1 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+        pgrep glusterfsd | wc -l
+}
+
+cleanup
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+EXPECT 1 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+
+
+TEST $CLI volume stop $V0
+TEST $CLI volume stop $V1
+
+cleanup
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume set $V0 performance.cache-size 32MB
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+#Check No. of brick processes after change option
+EXPECT 2 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd should not be NA
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+EXPECT 2 count_brick_processes
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
new file mode 100644
index 00000000000..39ab2dd723c
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+
+function count_up_bricks {
+        $CLI --xml volume status $1 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+        pgrep glusterfsd | wc -l
+}
+
+cleanup
+TEST glusterd -LDEBUG
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $H0:$B0/brick{0,1}
+
+#Check No. of brick processes after remove brick from back-end
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+
+EXPECT 1 count_brick_processes
+
+pkill glusterd
+TEST glusterd -LDEBUG
+sleep 5
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1
+
+
+cleanup
+
diff --git a/tests/bugs/glusterd/bug-913555.t b/tests/bugs/glusterd/bug-913555.t
index 5c845dc8545..9bc875340d1 100755
--- a/tests/bugs/glusterd/bug-913555.t
+++ b/tests/bugs/glusterd/bug-913555.t
@@ -16,6 +16,10 @@ function check_peers {
 	$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
 }
 
+function online_brick_count {
+       $CLI_1 --xml volume status | grep '<status>1' | wc -l
+}
+
 cleanup;
 
 TEST launch_cluster 3; # start 3-node virtual cluster
diff --git a/tests/cluster.rc b/tests/cluster.rc
index 6dece8182e4..48071647260 100644
--- a/tests/cluster.rc
+++ b/tests/cluster.rc
@@ -179,13 +179,3 @@ function brick_up_status_1 {
         $CLI_1 volume status $vol $host:$brick --xml | sed -ne 's/.*<status>\([01]\)<\/status>/\1/p'
 }
 
-function online_brick_count {
-	local bricks
-	local total=0
-	local i
-	for i in $(seq 1 $CLUSTER_COUNT); do
-		bricks=$(find $B0/$i/glusterd/vols -name '*.pid' | wc -l)
-		total=$((total+bricks))
-	done
-	echo $total
-}
diff --git a/tests/volume.rc b/tests/volume.rc
index 5c53bba2efd..c30c5fd2ea7 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -21,11 +21,39 @@ function brick_count()
     $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
 }
 
+function check_brick_status() {
+       cmd="gluster --xml volume status"
+       local daemon=$1
+
+       if [[ -z $daemon ]]
+        then
+                echo `$cmd | grep '<status>1' | wc -l`
+       else
+                echo `$cmd | grep -A 5 ${daemon} | grep '<status>1' | wc -l`
+       fi
+}
+
 function online_brick_count ()
 {
-    find $GLUSTERD_WORKDIR/vols/ -name '*.pid' | wc -l
+       local v1=0
+       local v2=0
+       local v3=0
+       local v4=0
+       local v5=0
+       local tot=0
+
+       #First count total Number of bricks and then subtract daemon status
+       v1=`check_brick_status`
+       v2=`check_brick_status "Self-heal"`
+       v3=`check_brick_status "Quota"`
+       v4=`check_brick_status "Snapshot"`
+       v5=`check_brick_status "Tier"`
+       tot=$((v1-v2-v3-v4-v5))
+       echo $tot
+
 }
 
+
 function brick_up_status {
         local vol=$1
         local host=$2
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 0a7e7581cd5..b1b13a49f71 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1960,6 +1960,7 @@ glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
         int                     ret         = -1;
         char                    pidfile[PATH_MAX+1] = {0,};
         glusterd_conf_t        *priv        = THIS->private;
+        int                     pid         = -1;
 
         /* Check whether all the nodes of the bricks to be removed are
         * up, if not fail the operation */
@@ -2048,12 +2049,14 @@ check:
                         }
                         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
                                                     brickinfo, priv);
-                        if (!gf_is_service_running (pidfile, NULL)) {
+                        if (!gf_is_service_running (pidfile, &pid)) {
                                 snprintf (msg, sizeof (msg), "Found dead "
                                           "brick %s", brick);
                                 *errstr = gf_strdup (msg);
                                 ret = -1;
                                 goto out;
+                        } else {
+                                ret = 0;
                         }
                         continue;
                 }
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
index 9f934629330..8eeec403139 100644
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@@ -105,7 +105,7 @@ glusterd_proc_stop (glusterd_proc_t *proc, int sig, int flags)
                 goto out;
 
         sleep (1);
-        if (gf_is_service_running (proc->pidfile, NULL)) {
+        if (gf_is_service_running (proc->pidfile, &pid)) {
                 ret = kill (pid, SIGKILL);
                 if (ret) {
                         gf_msg (this->name, GF_LOG_ERROR, errno,
@@ -131,5 +131,7 @@ glusterd_proc_get_pid (glusterd_proc_t *proc)
 int
 glusterd_proc_is_running (glusterd_proc_t *proc)
 {
-        return gf_is_service_running (proc->pidfile, NULL);
+        int pid = -1;
+
+        return gf_is_service_running (proc->pidfile, &pid);
 }
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 22c18b96033..fbef1df7eaa 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -84,6 +84,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
         char                    pidfile[PATH_MAX];
         glusterd_conf_t        *priv    = NULL;
         xlator_t               *this    = NULL;
+        int                    pid      = -1;
 
         this = THIS;
         if (!this)
@@ -134,7 +135,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
                 }
                 UNLOCK (&defrag->lock);
 
-                if (!gf_is_service_running (pidfile, NULL)) {
+                if (!gf_is_service_running (pidfile, &pid)) {
                         if (volinfo->rebal.defrag_status ==
                                                 GF_DEFRAG_STATUS_STARTED) {
                                 volinfo->rebal.defrag_status =
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 740354304c5..72b70f916c6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -4584,6 +4584,10 @@ glusterd_restore ()
 
         this = THIS;
 
+	ret = glusterd_options_init (this);
+        if (ret < 0)
+                goto out;
+
         ret = glusterd_store_retrieve_volumes (this, NULL);
         if (ret)
                 goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 04dba9d0b41..0a4aea24d85 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1614,7 +1614,7 @@ glusterd_service_stop (const char *service, char *pidfile, int sig,
                 goto out;
 
         sleep (1);
-        if (gf_is_service_running (pidfile, NULL)) {
+        if (gf_is_service_running (pidfile, &pid)) {
                 ret = kill (pid, SIGKILL);
                 if (ret) {
                         gf_msg (this->name, GF_LOG_ERROR, errno,
@@ -1715,6 +1715,8 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
         xlator_t                *this = NULL;
         glusterd_conf_t         *priv = NULL;
         int                     expected_file_len = 0;
+        char                    export_path[PATH_MAX] = {0,};
+        char                    sock_filepath[PATH_MAX] = {0,};
 
         expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") +
                             MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1;
@@ -1725,18 +1727,10 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
         priv = this->private;
 
         GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv);
-        if (is_brick_mx_enabled ()) {
-                snprintf (sockpath, len, "%s/run/daemon-%s.socket",
-                          volume_dir, brickinfo->hostname);
-        } else {
-                char                    export_path[PATH_MAX] = {0,};
-                char                    sock_filepath[PATH_MAX] = {0,};
-                GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
-                snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
-                          volume_dir, brickinfo->hostname, export_path);
-
-                glusterd_set_socket_filepath (sock_filepath, sockpath, len);
-        }
+        GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
+        snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
+                  volume_dir, brickinfo->hostname, export_path);
+        glusterd_set_socket_filepath (sock_filepath, sockpath, len);
 }
 
 /* connection happens only if it is not aleady connected,
@@ -1830,6 +1824,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,
         char                    rdma_brick_path[PATH_MAX] = {0,};
         struct rpc_clnt         *rpc = NULL;
         rpc_clnt_connection_t   *conn  = NULL;
+        int                     pid    = -1;
 
         GF_ASSERT (volinfo);
         GF_ASSERT (brickinfo);
@@ -1852,7 +1847,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,
         }
 
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
-        if (gf_is_service_running (pidfile, NULL)) {
+        if (gf_is_service_running (pidfile, &pid)) {
                 goto connect;
         }
 
@@ -5044,8 +5039,6 @@ attach_brick (xlator_t *this,
 
         GLUSTERD_GET_BRICK_PIDFILE (pidfile1, other_vol, other_brick, conf);
         GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf);
-        (void) sys_unlink (pidfile2);
-        (void) sys_link (pidfile1, pidfile2);
 
         if (volinfo->is_snap_volume) {
                 snprintf (full_id, sizeof(full_id), "/%s/%s/%s.%s.%s",
@@ -5065,6 +5058,10 @@ attach_brick (xlator_t *this,
                                                GLUSTERD_BRICK_ATTACH);
                         rpc_clnt_unref (rpc);
                         if (!ret) {
+                                /* PID file is copied once brick has attached
+                                  successfully
+                                */
+                                glusterd_copy_file (pidfile1, pidfile2);
                                 return 0;
                         }
                 }
@@ -5284,6 +5281,80 @@ find_compatible_brick (glusterd_conf_t *conf,
         return NULL;
 }
 
+/* Below function is use to populate sockpath based on passed pid
+   value as a argument after check the value from proc
+*/
+
+void
+glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
+{
+        char fname[128] = {0,};
+        char buf[1024] = {0,};
+        char cmdline[2048] = {0,};
+        xlator_t                *this = NULL;
+        int fd         = -1;
+        int i = 0, j = 0;
+        char   *ptr   = NULL;
+        char   *brptr   = NULL;
+        char tmpsockpath[PATH_MAX] = {0,};
+        size_t blen    = 0;
+
+        this = THIS;
+        GF_ASSERT (this);
+
+        snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
+
+        if (sys_access (fname , R_OK) != 0) {
+                gf_log (this->name, GF_LOG_ERROR,
+                         "brick process %d is not running", pid);
+                return;
+        }
+
+        fd = open(fname, O_RDONLY);
+        if (fd != -1) {
+                blen = (int)sys_read(fd, buf, 1024);
+        } else {
+                gf_log (this->name, GF_LOG_ERROR,
+                         "open failed %s to open a file %s", strerror (errno),
+                                                              fname);
+                return;
+        }
+
+        /* convert cmdline to single string */
+        for (i = 0 , j = 0; i < blen; i++)  {
+                if (buf[i] == '\0')
+                        cmdline[j++] = ' ';
+                else if (buf[i] < 32 || buf[i] > 126) /* remove control char */
+                        continue;
+                else if (buf[i] == '"' || buf[i] == '\\') {
+                        cmdline[j++] = '\\';
+                        cmdline[j++] = buf[i];
+                } else {
+                        cmdline[j++] = buf[i];
+                }
+        }
+        cmdline[j] = '\0';
+        if (fd)
+                sys_close(fd);
+
+        ptr =   strstr(cmdline, "-S ");
+        ptr =   strchr(ptr, '/');
+        brptr = strstr(ptr, "--brick-name");
+        i = 0;
+
+        while (ptr < brptr) {
+                if (*ptr != 32)
+                        tmpsockpath[i++] = *ptr;
+                ptr++;
+        }
+
+        if (tmpsockpath[0]) {
+                strncpy (sockpath, tmpsockpath , i);
+        }
+
+}
+
+
 int
 glusterd_brick_start (glusterd_volinfo_t *volinfo,
                       glusterd_brickinfo_t *brickinfo,
@@ -5295,7 +5366,6 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
         glusterd_conf_t         *conf = NULL;
         int32_t                 pid                   = -1;
         char                    pidfile[PATH_MAX]     = {0};
-        FILE                    *fp;
         char                    socketpath[PATH_MAX]  = {0};
         glusterd_volinfo_t      *other_vol;
 
@@ -5349,8 +5419,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
                          * same port (on another brick) and re-use that.
                          * TBD: re-use RPC connection across bricks
                          */
-                        glusterd_set_brick_socket_filepath (volinfo, brickinfo,
-                                        socketpath, sizeof (socketpath));
+                        if (is_brick_mx_enabled ())
+                                glusterd_get_sock_from_brick_pid (pid, socketpath,
+                                                                  sizeof(socketpath));
+                        else
+                                glusterd_set_brick_socket_filepath (volinfo, brickinfo,
+                                                                    socketpath,
+                                                                    sizeof (socketpath));
+                        gf_log (this->name, GF_LOG_DEBUG,
+                                "Using %s as sockfile for brick %s of volume %s ",
+                                socketpath, brickinfo->path, volinfo->volname);
                         (void) glusterd_brick_connect (volinfo, brickinfo,
                                         socketpath);
                 }
@@ -5389,12 +5467,6 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
          *
          * TBD: pray for GlusterD 2 to be ready soon.
          */
-        (void) sys_unlink (pidfile);
-        fp = fopen (pidfile, "w+");
-        if (fp) {
-                (void) fprintf (fp, "0\n");
-                (void) fclose (fp);
-        }
 
         ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait);
         if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index bbd7adcac1b..ef20689b614 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1854,9 +1854,6 @@ init (xlator_t *this)
         if (glusterd_get_peers_count () < 2)
                 glusterd_launch_synctask (glusterd_spawn_daemons, NULL);
 
-        ret = glusterd_options_init (this);
-        if (ret < 0)
-                goto out;
 
         ret = glusterd_handle_upgrade_downgrade (this->options, conf, upgrade,
                                                  downgrade);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 5d16b81bb6c..17e29bbbad3 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -1257,4 +1257,7 @@ int glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr,
 int
 glusterd_tier_prevalidate (dict_t *dict, char **op_errstr,
                                dict_t *rsp_dict, uint32_t *op_errno);
+
+int
+glusterd_options_init (xlator_t *this);
 #endif
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index ae07b28e48a..ca2c2b923d3 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -51,6 +51,7 @@
 #include "hashfn.h"
 #include "glusterfs-acl.h"
 #include "events.h"
+#include <sys/types.h>
 
 char *marker_xattrs[] = {"trusted.glusterfs.quota.*",
                          "trusted.glusterfs.*.xtime",
@@ -1829,6 +1830,9 @@ posix_health_check_thread_proc (void *data)
         struct posix_private *priv               = NULL;
         uint32_t              interval           = 0;
         int                   ret                = -1;
+        xlator_t                *top             = NULL;
+        xlator_list_t           **trav_p         = NULL;
+        int                     count            = 0;
 
         this = data;
         priv = this->private;
@@ -1840,7 +1844,6 @@ posix_health_check_thread_proc (void *data)
 
         gf_msg_debug (this->name, 0, "health-check thread started, "
                 "interval = %d seconds", interval);
-
         while (1) {
                 /* aborting sleep() is a request to exit this thread, sleep()
                  * will normally not return when cancelled */
@@ -1877,18 +1880,33 @@ abort:
 
         xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
 
-        ret = sleep (30);
-        if (ret == 0) {
+        /* Below code is use to ensure if brick multiplexing is enabled if
+           count is more than 1 it means brick mux has enabled
+        */
+        if (this->ctx->active) {
+                top = this->ctx->active->first;
+                for (trav_p = &top->children; *trav_p;
+                                               trav_p = &(*trav_p)->next) {
+                        count++;
+                }
+        }
+
+        if (count == 1) {
                 gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
                         "still alive! -> SIGTERM");
-                kill (getpid(), SIGTERM);
-        }
+                ret = sleep (30);
 
-        ret = sleep (30);
-        if (ret == 0) {
+                /* Need to kill the process only while brick mux has not enabled
+                */
+                if (ret == 0)
+                        kill (getpid(), SIGTERM);
+
+                ret = sleep (30);
                 gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED,
-                        "still alive! -> SIGKILL");
-                kill (getpid(), SIGKILL);
+                        "still alive! -> SIGTERM");
+                if (ret == 0)
+                        kill (getpid(), SIGTERM);
+
         }
 
         return NULL;
author	Mohit Agrawal <moagrawa@redhat.com>	2017-05-08 19:29:22 +0530
committer	Atin Mukherjee <amukherj@redhat.com>	2017-05-09 01:30:01 +0000
commit	21c7f7baccfaf644805e63682e5a7d2a9864a1e6 (patch)
tree	01bbbd50d13f609eb8f7d2cbe2ce5e3af1652e42
parent	18e07cf01f975c80152e5469fb4e4274f08dc636 (diff)