diff options
| -rw-r--r-- | tests/bugs/glusterd/bug-1451248-mux-reboot-node.t | 54 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 6 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 18 | 
4 files changed, 79 insertions, 0 deletions
diff --git a/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t new file mode 100644 index 00000000000..5d8ce6e75e6 --- /dev/null +++ b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t @@ -0,0 +1,54 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../volume.rc + +function count_up_bricks { +        $CLI --xml volume status all | grep '<status>1' | wc -l +} + +function count_brick_processes { +	pgrep glusterfsd | wc -l +} + +function count_brick_pids { +        $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ +                                     | grep -v "N/A" | sort | uniq | wc -l +} + +cleanup; + +TEST glusterd +TEST $CLI volume set all cluster.brick-multiplex on +push_trapfunc "$CLI volume set all cluster.brick-multiplex off" +push_trapfunc "cleanup" + +TEST $CLI volume create $V0 $H0:$B0/brick{0..2} +TEST $CLI volume start $V0 + +EXPECT 1 count_brick_processes +EXPECT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +pkill gluster +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +pkill glusterd +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +TEST $CLI volume create $V1 $H0:$B0/brick{3..5} +TEST $CLI volume start $V1 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 2a5772b6669..f16bc20c01f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -5811,7 +5811,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,                  rpc_clnt_set_connected (&rpc->conn);                  gf_msg_debug (this->name, 0, "Connected to %s:%s",                          brickinfo->hostname, brickinfo->path); +                  glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); +                brickinfo->started_here = _gf_true; +                  gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",                            brickinfo->hostname, volinfo->volname,                            brickinfo->path); @@ -5841,6 +5844,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,                                  GD_MSG_BRICK_DISCONNECTED,                                  "Brick %s:%s has disconnected from glusterd.",                                  brickinfo->hostname, brickinfo->path); + +                        brickinfo->started_here = _gf_false; +                          ret = get_volinfo_from_brickid (brickid, &volinfo);                          if (ret) {                                  gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index c3f5dbc3cd1..2b60b17841b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -563,6 +563,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)                          GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo,                                                      conf);                          sys_unlink (pidfile); +                        brickinfo->started_here = _gf_false;                  }          } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 47ac842193e..ea8d60cd87b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2143,6 +2143,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,          GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);          gf_msg_debug (this->name,  0, "Unlinking pidfile %s", pidfile);          (void) sys_unlink (pidfile); + +        brickinfo->started_here = _gf_false;  out:          return ret;  } @@ -5170,6 +5172,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,          glusterd_brickinfo_t    *other_brick;          char                    pidfile2[PATH_MAX]      = {0};          int32_t                 pid2                    = -1; +        int16_t                 retries                 = 15;          /*           * If comp_vol is provided, we have to check *volume* compatibility @@ -5212,8 +5215,22 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,                  if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {                          continue;                  } +                  GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,                                              conf); + +                /* It is possible that the pidfile hasn't yet been populated, +                 * when bricks are started in "no-wait" mode; for example +                 * when bricks are started by glusterd_restart_bricks(). So +                 * wait for the pidfile to be populated with a value before +                 * checking if the service is running */ +                while (retries > 0) { +                        if (sys_access (pidfile2, F_OK) == 0) +                                break; +                        sleep (1); +                        retries--; +                } +                  if (!gf_is_service_running (pidfile2, &pid2)) {                          gf_log (this->name, GF_LOG_INFO,                                  "cleaning up dead brick %s:%s", @@ -5457,6 +5474,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,                                  socketpath, brickinfo->path, volinfo->volname);                          (void) glusterd_brick_connect (volinfo, brickinfo,                                          socketpath); +                        brickinfo->started_here = _gf_true;                  }                  return 0;          }  | 
