diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-rebalance.c')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 83 |
1 files changed, 60 insertions, 23 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 271de6111..b7b974c68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe); int glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len) + size_t len, glusterd_op_t op) { - int ret = -1; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && + !gd_is_remove_brick_committed (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " + "volume %s is not yet committed", volinfo->volname); + snprintf (op_errstr, len, "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", volinfo->volname); + goto out; + } if (glusterd_is_defrag_on (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "rebalance on volume %s already started", volinfo->volname); snprintf (op_errstr, len, "Rebalance on %s is already started", @@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, if (glusterd_is_rb_started (volinfo) || glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Rebalance failed as replace brick is in progress on volume %s", volinfo->volname); snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " @@ -66,13 +81,14 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, } ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + int32_t -glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) +__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { glusterd_volinfo_t *volinfo = NULL; glusterd_defrag_info_t *defrag = NULL; @@ -130,8 +146,6 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, GF_DEFRAG_STATUS_STARTED) { volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED; - } else { - volinfo->rebal.defrag_cmd = 0; } } @@ -160,6 +174,14 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, return ret; } +int32_t +glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, + data, __glusterd_defrag_notify); +} + int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, size_t len, int cmd, defrag_cbk_fn_t cbk, @@ -174,15 +196,14 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, char pidfile[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; dict_t *options = NULL; -#ifdef DEBUG char valgrind_logfile[PATH_MAX] = {0,}; -#endif + priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (op_errstr); - ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); + ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op); if (ret) goto out; if (!volinfo->rebal.defrag) @@ -196,6 +217,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, defrag->cmd = cmd; + volinfo->rebal.defrag_cmd = cmd; volinfo->rebal.op = op; LOCK_INIT (&defrag->lock); @@ -218,7 +240,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); runinit (&runner); -#ifdef DEBUG + if (priv->valgrind) { snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", @@ -226,10 +248,10 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, volinfo->volname); runner_add_args (&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", NULL); + "--trace-children=yes", "--track-origins=yes", + NULL); runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } -#endif runner_add_args (&runner, SBIN_DIR"/glusterfs", "-s", "localhost", "--volfile-id", volinfo->volname, @@ -241,6 +263,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, "*replicate*.metadata-self-heal=off", "--xlator-option", "*replicate*.entry-self-heal=off", "--xlator-option", "*replicate*.readdir-failover=off", + "--xlator-option", "*dht.readdir-optimize=on", NULL); runner_add_arg (&runner, "--xlator-option"); runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); @@ -268,14 +291,16 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, * default timeout of 30mins used for unreliable network connections is * too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600); + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -320,14 +345,16 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, * default timeout of 30mins used for unreliable network connections is * too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600); + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -376,7 +403,7 @@ out: } int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +__glusterd_handle_defrag_volume (rpcsvc_request_t *req) { int32_t ret = -1; gf_cli_req cli_req = {{0,}}; @@ -461,6 +488,12 @@ out: return 0; } +int +glusterd_handle_defrag_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume); +} + int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) @@ -499,7 +532,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { op_ctx = glusterd_op_get_ctx (); if (!op_ctx) { ret = -1; @@ -525,8 +558,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) ret = 0; } } - ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "start validate failed"); @@ -629,15 +663,18 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = 0; } else { uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REBALANCE; } ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: - /* Clear task-id only on explicitly stopping the - * rebalance process. + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts */ uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; /* Fall back to the old volume file in case of decommission*/ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, |
