diff options
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 7 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 73 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 1 |
3 files changed, 57 insertions, 24 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 1fc7a250748..2a754769c95 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -502,6 +502,9 @@ __gluster_pmap_signin (rpcsvc_request_t *req) GF_PMAP_PORT_BRICKSERVER, req->trans); ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo); + /* Update portmap status in brickinfo */ + if (brickinfo) + brickinfo->port_registered = _gf_true; fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, @@ -554,6 +557,10 @@ __gluster_pmap_signout (rpcsvc_request_t *req) brick_path, GF_PMAP_PORT_BRICKSERVER, req->trans); } + /* Update portmap status on brickinfo */ + if (brickinfo) + brickinfo->port_registered = _gf_false; + /* Clean up the pidfile for this brick given glusterfsd doesn't clean it * any more. This is required to ensure we don't end up with having * stale pid files in case a brick is killed from the backend diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 9dcb4f78089..e0f166f1d4a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2102,6 +2102,7 @@ retry: brickinfo->port = port; brickinfo->rdma_port = rdma_port; brickinfo->status = GF_BRICK_STARTING; + brickinfo->port_registered = _gf_false; if (wait) { synclock_unlock (&priv->big_lock); @@ -5385,13 +5386,6 @@ attach_brick (xlator_t *this, GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, unslashed); - ret = pmap_registry_extend (this, other_brick->port, - brickinfo->path); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "adding brick to process failed"); - return -1; - } GLUSTERD_GET_BRICK_PIDFILE (pidfile1, other_vol, other_brick, conf); GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf); @@ -5414,6 +5408,14 @@ attach_brick (xlator_t *this, GLUSTERD_BRICK_ATTACH); rpc_clnt_unref (rpc); if (!ret) { + ret = pmap_registry_extend (this, other_brick->port, + brickinfo->path); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "adding brick to process failed"); + return ret; + } + /* PID file is copied once brick has attached successfully */ @@ -5422,6 +5424,7 @@ attach_brick (xlator_t *this, brickinfo->status = GF_BRICK_STARTED; brickinfo->rpc = rpc_clnt_ref (other_brick->rpc); + brickinfo->port_registered = _gf_true; ret = glusterd_brick_process_add_brick (brickinfo, volinfo); if (ret) { @@ -5475,6 +5478,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, int16_t retries = 15; int mux_limit = -1; int ret = -1; + gf_boolean_t brick_status = _gf_false; /* * If comp_vol is provided, we have to check *volume* compatibility @@ -5549,31 +5553,45 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, "set. Continuing with no limit set for " "brick multiplexing."); } - - GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick, - conf); - - /* It is possible that the pidfile hasn't yet been populated, - * when bricks are started in "no-wait" mode; for example - * when bricks are started by glusterd_restart_bricks(). So - * wait for the pidfile to be populated with a value before - * checking if the service is running */ + /* The first brick process might take some time to finish its + * handshake with glusterd and prepare the graph. We can't + * afford to send attach_req for other bricks till that time. + * brick process sends PMAP_SIGNIN event after processing the + * volfile and hence it's safe to assume that if glusterd has + * received a pmap signin request for the same brick, we are + * good for subsequent attach requests. + */ + retries = 15; while (retries > 0) { - if (sys_access (pidfile2, F_OK) == 0 && - gf_is_service_running (pidfile2, &pid2)) { - break; + if (other_brick->port_registered) { + GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, + other_brick, conf); + if (sys_access (pidfile2, F_OK) == 0 && + gf_is_service_running (pidfile2, &pid2)) { + gf_msg_debug (this->name, 0, + "brick %s is running as a pid %d ", + other_brick->path, pid2); + brick_status = _gf_true; + break; + } } - sleep (1); + synclock_unlock (&conf->big_lock); + gf_msg_debug (this->name, 0, "brick %s is still" + " starting, waiting for 2 seconds ", + other_brick->path); + sleep(2); + synclock_lock (&conf->big_lock); retries--; } - if (retries == 0) { + if (!brick_status) { gf_log (this->name, GF_LOG_INFO, - "cleaning up dead brick %s:%s", + "brick has not come up so cleaning up dead brick %s:%s", other_brick->hostname, other_brick->path); other_brick->status = GF_BRICK_STOPPED; - sys_unlink (pidfile2); + if (pidfile2[0]) + sys_unlink (pidfile2); continue; } return other_brick; @@ -5849,6 +5867,12 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, brickinfo->path); goto out; } + /* We need to set the status back to STARTING so that + * while the other (re)start brick requests come in for + * other bricks, this brick can be considered as + * compatible. + */ + brickinfo->status = GF_BRICK_STARTING; } return 0; } @@ -5892,7 +5916,8 @@ run: * * TBD: pray for GlusterD 2 to be ready soon. */ - + gf_log (this->name, GF_LOG_INFO, "starting a fresh brick process for " + "brick %s", brickinfo->path); ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 12a0c392d95..dd1ff7a66be 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -238,6 +238,7 @@ struct glusterd_brickinfo { the backend filesystem */ uint64_t statfs_fsid; uint32_t fs_share_count; + gf_boolean_t port_registered; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; |