diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-rebalance.c')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 136 |
1 files changed, 93 insertions, 43 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 4c7282e82..b7b974c68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe); int glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len) + size_t len, glusterd_op_t op) { - int ret = -1; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && + !gd_is_remove_brick_committed (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " + "volume %s is not yet committed", volinfo->volname); + snprintf (op_errstr, len, "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", volinfo->volname); + goto out; + } if (glusterd_is_defrag_on (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "rebalance on volume %s already started", volinfo->volname); snprintf (op_errstr, len, "Rebalance on %s is already started", @@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, if (glusterd_is_rb_started (volinfo) || glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Rebalance failed as replace brick is in progress on volume %s", volinfo->volname); snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " @@ -66,13 +81,14 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, } ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + int32_t -glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) +__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { glusterd_volinfo_t *volinfo = NULL; glusterd_defrag_info_t *defrag = NULL; @@ -130,8 +146,6 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, GF_DEFRAG_STATUS_STARTED) { volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED; - } else { - volinfo->rebal.defrag_cmd = 0; } } @@ -160,6 +174,14 @@ glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, return ret; } +int32_t +glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, + data, __glusterd_defrag_notify); +} + int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, size_t len, int cmd, defrag_cbk_fn_t cbk, @@ -174,15 +196,14 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, char pidfile[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; dict_t *options = NULL; -#ifdef DEBUG char valgrind_logfile[PATH_MAX] = {0,}; -#endif + priv = THIS->private; GF_ASSERT (volinfo); GF_ASSERT (op_errstr); - ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); + ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op); if (ret) goto out; if (!volinfo->rebal.defrag) @@ -196,6 +217,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, defrag->cmd = cmd; + volinfo->rebal.defrag_cmd = cmd; volinfo->rebal.op = op; LOCK_INIT (&defrag->lock); @@ -218,7 +240,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); runinit (&runner); -#ifdef DEBUG + if (priv->valgrind) { snprintf (valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", @@ -226,10 +248,10 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, volinfo->volname); runner_add_args (&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", NULL); + "--trace-children=yes", "--track-origins=yes", + NULL); runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } -#endif runner_add_args (&runner, SBIN_DIR"/glusterfs", "-s", "localhost", "--volfile-id", volinfo->volname, @@ -240,6 +262,8 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, "--xlator-option", "*replicate*.metadata-self-heal=off", "--xlator-option", "*replicate*.entry-self-heal=off", + "--xlator-option", "*replicate*.readdir-failover=off", + "--xlator-option", "*dht.readdir-optimize=on", NULL); runner_add_arg (&runner, "--xlator-option"); runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); @@ -267,14 +291,16 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, * default timeout of 30mins used for unreliable network connections is * too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600); + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -319,14 +345,16 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, * default timeout of 30mins used for unreliable network connections is * too long for unix domain socket connections. */ - ret = rpc_clnt_transport_unix_options_build (&options, sockfile, 600); + ret = rpc_transport_unix_options_build (&options, sockfile, 600); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); goto out; } + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); goto out; @@ -375,26 +403,32 @@ out: } int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +__glusterd_handle_defrag_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf_cli_req cli_req = {{0,}}; - glusterd_conf_t *priv = NULL; - dict_t *dict = NULL; - char *volname = NULL; - gf_cli_defrag_type cmd = 0; + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + char *volname = NULL; + gf_cli_defrag_type cmd = 0; + char msg[2048] = {0,}; + xlator_t *this = NULL; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); - priv = THIS->private; + priv = this->private; + GF_ASSERT (priv); - if (!xdr_to_generic (req->msg[0], &cli_req, - (xdrproc_t)xdr_gf_cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - if (cli_req.dict.dict_len) { + + if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ dict = dict_new (); @@ -402,24 +436,25 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req) cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); goto out; } } ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "Failed to get volname"); + snprintf (msg, sizeof (msg), "Failed to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "Failed to get command"); + snprintf (msg, sizeof (msg), "Failed to get command"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } @@ -430,9 +465,10 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req) if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STOP)) { ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, - dict); + dict, msg, sizeof (msg)); } else - ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict); + ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict, + msg, sizeof (msg)); out: @@ -440,11 +476,10 @@ out: glusterd_op_sm (); if (ret) { + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, - req, dict, - "operation failed"); - if (dict) - dict_unref (dict); + req, dict, msg); } @@ -453,6 +488,12 @@ out: return 0; } +int +glusterd_handle_defrag_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume); +} + int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) @@ -491,7 +532,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { op_ctx = glusterd_op_get_ctx (); if (!op_ctx) { ret = -1; @@ -517,8 +558,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) ret = 0; } } - ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "start validate failed"); @@ -621,11 +663,19 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = 0; } else { uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REBALANCE; } ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + /* Fall back to the old volume file in case of decommission*/ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { |
