diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-rebalance.c')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 952 |
1 files changed, 599 insertions, 353 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 55a2e45e2..b7b974c68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -1,28 +1,22 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif + #include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> #include <sys/resource.h> +#include <sys/statvfs.h> #include "globals.h" #include "compat.h" @@ -36,468 +30,720 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "run.h" +#include "glusterd-volgen.h" #include "syscall.h" -#include "cli1.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +int32_t +glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe); int -gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) +glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, glusterd_op_t op) { - int ret = -1; - int dst_fd = -1; - int src_fd = -1; - DIR *fd = NULL; - glusterd_defrag_info_t *defrag = NULL; - struct dirent *entry = NULL; - struct stat stbuf = {0,}; - struct stat new_stbuf = {0,}; - char full_path[1024] = {0,}; - char tmp_filename[1024] = {0,}; - char value[16] = {0,}; - - if (!volinfo->defrag) + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && + !gd_is_remove_brick_committed (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " + "volume %s is not yet committed", volinfo->volname); + snprintf (op_errstr, len, "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", volinfo->volname); goto out; + } - defrag = volinfo->defrag; + if (glusterd_is_defrag_on (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, + "rebalance on volume %s already started", + volinfo->volname); + snprintf (op_errstr, len, "Rebalance on %s is already started", + volinfo->volname); + goto out; + } - fd = opendir (dir); - if (!fd) + if (glusterd_is_rb_started (volinfo) || + glusterd_is_rb_paused (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, + "Rebalance failed as replace brick is in progress on volume %s", + volinfo->volname); + snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " + "volume %s", volinfo->volname); goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; + } + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); +int32_t +__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_defrag_info_t *defrag = NULL; + int ret = 0; + char pidfile[PATH_MAX]; + glusterd_conf_t *priv = NULL; - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; + priv = THIS->private; + if (!priv) + return 0; - if (S_ISREG (stbuf.st_mode)) - defrag->num_files_lookedup += 1; + volinfo = mydata; + if (!volinfo) + return 0; - if (!(S_ISREG (stbuf.st_mode) && - ((stbuf.st_mode & 01000) == 01000))) - continue; + defrag = volinfo->rebal.defrag; + if (!defrag) + return 0; - /* If the file is open, don't run rebalance on it */ - ret = sys_lgetxattr (full_path, GLUSTERFS_OPEN_FD_COUNT, - &value, 16); - if ((ret < 0) || !strncmp (value, "1", 1)) - continue; + if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) + volinfo->rebal.defrag = NULL; - /* If its a regular file, and sticky bit is set, we need to - rebalance that */ - snprintf (tmp_filename, 1024, "%s/.%s.gfs%llu", dir, - entry->d_name, - (unsigned long long)stbuf.st_size); + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); - dst_fd = creat (tmp_filename, (stbuf.st_mode & ~01000)); - if (dst_fd == -1) - continue; + switch (event) { + case RPC_CLNT_CONNECT: + { + if (defrag->connected) + return 0; - src_fd = open (full_path, O_RDONLY); - if (src_fd == -1) { - close (dst_fd); - continue; + LOCK (&defrag->lock); + { + defrag->connected = 1; } + UNLOCK (&defrag->lock); - while (1) { - ret = read (src_fd, defrag->databuf, 131072); - if (!ret || (ret < 0)) { - close (dst_fd); - close (src_fd); - break; - } - ret = write (dst_fd, defrag->databuf, ret); - if (ret < 0) { - close (dst_fd); - close (src_fd); - break; - } + gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT", + rpc->conn.trans->name); + break; + } + + case RPC_CLNT_DISCONNECT: + { + if (!defrag->connected) + return 0; + + LOCK (&defrag->lock); + { + defrag->connected = 0; } + UNLOCK (&defrag->lock); - ret = stat (full_path, &new_stbuf); - if (ret < 0) - continue; - /* No need to rebalance, if there is some - activity on source file */ - if (new_stbuf.st_mtime != stbuf.st_mtime) - continue; - - ret = rename (tmp_filename, full_path); - if (ret != -1) { - LOCK (&defrag->lock); - { - defrag->total_files += 1; - defrag->total_data += stbuf.st_size; + if (!glusterd_is_service_running (pidfile, NULL)) { + if (volinfo->rebal.defrag_status == + GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_FAILED; } - UNLOCK (&defrag->lock); - } + } - if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { - closedir (fd); - ret = -1; - goto out; + glusterd_store_perform_node_state_store (volinfo); + + if (defrag->rpc) { + rpc_clnt_unref (defrag->rpc); + defrag->rpc = NULL; } + if (defrag->cbk_fn) + defrag->cbk_fn (volinfo, + volinfo->rebal.defrag_status); + + GF_FREE (defrag); + gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", + rpc->conn.trans->name); + break; } - closedir (fd); + default: + gf_log ("", GF_LOG_TRACE, + "got some other RPC event %d", event); + ret = 0; + break; + } + + return ret; +} - fd = opendir (dir); - if (!fd) +int32_t +glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, + data, __glusterd_defrag_notify); +} + +int +glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) +{ + int ret = -1; + glusterd_defrag_info_t *defrag = NULL; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + char defrag_path[PATH_MAX]; + char sockfile[PATH_MAX] = {0,}; + char pidfile[PATH_MAX] = {0,}; + char logfile[PATH_MAX] = {0,}; + dict_t *options = NULL; + char valgrind_logfile[PATH_MAX] = {0,}; + + priv = THIS->private; + + GF_ASSERT (volinfo); + GF_ASSERT (op_errstr); + + ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op); + if (ret) + goto out; + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; + defrag = volinfo->rebal.defrag; - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); + defrag->cmd = cmd; - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = op; - if (!S_ISDIR (stbuf.st_mode)) - continue; + LOCK_INIT (&defrag->lock); - ret = gf_glusterd_rebalance_move_data (volinfo, - full_path); - if (ret) - break; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + + glusterd_volinfo_reset_defrag_stats (volinfo); + glusterd_store_perform_node_state_store (volinfo); + + GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); + ret = mkdir_p (defrag_path, 0777, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create " + "directory %s", defrag_path); + goto out; } - closedir (fd); - if (!entry) - ret = 0; + GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); + GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv); + snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", + DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); + runinit (&runner); + + if (priv->valgrind) { + snprintf (valgrind_logfile, PATH_MAX, + "%s/valgrind-%s-rebalance.log", + DEFAULT_LOG_FILE_DIRECTORY, + volinfo->volname); + + runner_add_args (&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); + } + + runner_add_args (&runner, SBIN_DIR"/glusterfs", + "-s", "localhost", "--volfile-id", volinfo->volname, + "--xlator-option", "*dht.use-readdirp=yes", + "--xlator-option", "*dht.lookup-unhashed=yes", + "--xlator-option", "*dht.assert-no-child-down=yes", + "--xlator-option", "*replicate*.data-self-heal=off", + "--xlator-option", + "*replicate*.metadata-self-heal=off", + "--xlator-option", "*replicate*.entry-self-heal=off", + "--xlator-option", "*replicate*.readdir-failover=off", + "--xlator-option", "*dht.readdir-optimize=on", + NULL); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID)); + runner_add_arg (&runner, "--socket-file"); + runner_argprintf (&runner, "%s",sockfile); + runner_add_arg (&runner, "--pid-file"); + runner_argprintf (&runner, "%s",pidfile); + runner_add_arg (&runner, "-l"); + runner_argprintf (&runner, logfile); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + + ret = runner_run_nowait (&runner); + if (ret) { + gf_log ("glusterd", GF_LOG_DEBUG, "rebalance command failed"); + goto out; + } + + sleep (5); + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, sockfile, 600); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); + goto out; + } + + synclock_unlock (&priv->big_lock); + ret = glusterd_rpc_create (&defrag->rpc, options, + glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); + goto out; + } + + if (cbk) + defrag->cbk_fn = cbk; + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } + int -gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir) +glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, + glusterd_conf_t *priv, int cmd) { - int ret = -1; - char value[128] = {0,}; - char full_path[1024] = {0,}; - struct stat stbuf = {0,}; - DIR *fd = NULL; - struct dirent *entry = NULL; - - if (!volinfo->defrag) - goto out; + dict_t *options = NULL; + char sockfile[PATH_MAX] = {0,}; + int ret = -1; + glusterd_defrag_info_t *defrag = NULL; + + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); - fd = opendir (dir); - if (!fd) + if (!volinfo->rebal.defrag) goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; + defrag = volinfo->rebal.defrag; - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; + defrag->cmd = cmd; - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); + LOCK_INIT (&defrag->lock); - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; + GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); - if (S_ISDIR (stbuf.st_mode)) { - /* Fix the layout of the directory */ - sys_lgetxattr (full_path, "trusted.distribute.fix.layout", - &value, 128); + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, sockfile, 600); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); + goto out; + } + + synclock_unlock (&priv->big_lock); + ret = glusterd_rpc_create (&defrag->rpc, options, + glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); + goto out; + } + ret = 0; +out: + return ret; +} - volinfo->defrag->total_files += 1; +int +glusterd_rebalance_cmd_validate (int cmd, char *volname, + glusterd_volinfo_t **volinfo, + char *op_errstr, size_t len) +{ + int ret = -1; - /* Traverse into subdirectory */ - ret = gf_glusterd_rebalance_fix_layout (volinfo, - full_path); - if (ret) - break; - } + if (glusterd_volinfo_find(volname, volinfo)) { + gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on invalid" + " volname %s", volname); + snprintf (op_errstr, len, "Volume %s does not exist", + volname); + goto out; + } + if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) { + gf_log ("glusterd", GF_LOG_ERROR, "Volume %s is not a " + "distribute type or contains only 1 brick", volname); + snprintf (op_errstr, len, "Volume %s is not a distribute " + "volume or contains only 1 brick.\n" + "Not performing rebalance", volname); + goto out; + } - if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { - closedir (fd); - ret = -1; - goto out; - } + if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) { + gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on stopped" + " volname %s", volname); + snprintf (op_errstr, len, "Volume %s needs to " + "be started to perform rebalance", volname); + goto out; } - closedir (fd); - if (!entry) - ret = 0; + ret = 0; out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -void * -glusterd_defrag_start (void *data) +int +__glusterd_handle_defrag_volume (rpcsvc_request_t *req) { - glusterd_volinfo_t *volinfo = data; - glusterd_defrag_info_t *defrag = NULL; - char cmd_str[1024] = {0,}; - int ret = -1; - struct stat stbuf = {0,}; - char value[128] = {0,}; - - defrag = volinfo->defrag; - if (!defrag) + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + char *volname = NULL; + gf_cli_defrag_type cmd = 0; + char msg[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; goto out; + } - sleep (1); - ret = stat (defrag->mount, &stbuf); - if ((ret == -1) && (errno == ENOTCONN)) { - /* Wait for some more time before starting rebalance */ - sleep (2); - ret = stat (defrag->mount, &stbuf); - if (ret == -1) { - volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); goto out; } } - /* Fix the root ('/') first */ - sys_lgetxattr (defrag->mount, "trusted.distribute.fix.layout", - &value, 128); - - /* root's layout got fixed */ - defrag->total_files = 1; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } - /* Step 1: Fix layout of all the directories */ - ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount); + ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd); if (ret) { - volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; + snprintf (msg, sizeof (msg), "Failed to get command"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } - /* Completed first step */ - volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE; + ret = dict_set_static_bin (dict, "node-uuid", MY_UUID, 16); + if (ret) + goto out; - /* It was used by number of layout fixes on directories */ - defrag->total_files = 0; + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STOP)) { + ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, + dict, msg, sizeof (msg)); + } else + ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict, + msg, sizeof (msg)); + +out: + + glusterd_friend_sm (); + glusterd_op_sm (); - /* Step 2: Iterate over directories to move data */ - ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount); if (ret) { - volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; - } + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); + ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, + req, dict, msg); - /* Completed whole process */ - if (!ret) { - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; - } - volinfo->rebalance_files = defrag->total_files; - volinfo->rebalance_data = defrag->total_data; - volinfo->lookedup_files = defrag->num_files_lookedup; -out: - volinfo->defrag = NULL; - if (defrag) { - gf_log ("rebalance", GF_LOG_NORMAL, "rebalance on %s complete", - defrag->mount); - - snprintf (cmd_str, 1024, "umount -l %s", defrag->mount); - ret = system (cmd_str); - LOCK_DESTROY (&defrag->lock); - GF_FREE (defrag); } - return NULL; + free (cli_req.dict.dict_val);//malloced by xdr + + return 0; } int -glusterd_defrag_stop (glusterd_volinfo_t *volinfo, - gf1_cli_defrag_vol_rsp *rsp) +glusterd_handle_defrag_volume (rpcsvc_request_t *req) { - /* TODO: set a variaeble 'stop_defrag' here, it should be checked - in defrag loop */ - if (!volinfo || !volinfo->defrag) - goto out; - - LOCK (&volinfo->defrag->lock); - { - volinfo->defrag_status = GF_DEFRAG_STATUS_STOPED; - rsp->files = volinfo->defrag->total_files; - rsp->size = volinfo->defrag->total_data; - } - UNLOCK (&volinfo->defrag->lock); - - rsp->op_ret = 0; -out: - return 0; + return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume); } + int -glusterd_defrag_status_get (glusterd_volinfo_t *volinfo, - gf1_cli_defrag_vol_rsp *rsp) +glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) { - if (!volinfo) + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "volname not found"); goto out; + } - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); - { - rsp->files = volinfo->defrag->total_files; - rsp->size = volinfo->defrag->total_data; - rsp->lookedup_files = volinfo->defrag->num_files_lookedup; + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, + msg, sizeof (msg)); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "failed to validate"); + goto out; + } + switch (cmd) { + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd (dict)) { + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (op_ctx, GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, + &task_id_str); + if (ret) { + snprintf (msg, sizeof (msg), + "Missing rebalance-id"); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = 0; + } } - UNLOCK (&volinfo->defrag->lock); - } else { - rsp->files = volinfo->rebalance_files; - rsp->size = volinfo->rebalance_data; - rsp->lookedup_files = volinfo->lookedup_files; + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "start validate failed"); + goto out; + } + break; + case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STOP: + break; + default: + break; } - rsp->op_errno = volinfo->defrag_status; - rsp->op_ret = 0; + ret = 0; out: - return 0; + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup (msg); + + return ret; } + int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - int32_t ret = -1; - gf1_cli_defrag_vol_req cli_req = {0,}; - glusterd_conf_t *priv = NULL; - char cmd_str[4096] = {0,}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_defrag_info_t *defrag = NULL; - gf1_cli_defrag_vol_rsp rsp = {0,}; - - GF_ASSERT (req); - - priv = THIS->private; - if (!gf_xdr_to_cli_defrag_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "volname not given"); goto out; } - switch (cli_req.cmd) { - case GF_DEFRAG_CMD_START: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start, attempted", cli_req.volname); - break; - case GF_DEFRAG_CMD_STOP: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: stop, attempted", cli_req.volname); - break; - default: - break; + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "command not given"); + goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received rebalance volume on %s", - cli_req.volname); - rsp.volname = cli_req.volname; - rsp.op_ret = -1; - if (glusterd_volinfo_find(cli_req.volname, &volinfo)) { - gf_log ("glusterd", GF_LOG_NORMAL, "Received rebalance on invalid" - " volname %s", cli_req.volname); + + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, + msg, sizeof (msg)); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "cmd validate failed"); goto out; } - if (volinfo->status != GLUSTERD_STATUS_STARTED) { - gf_log ("glusterd", GF_LOG_NORMAL, "Received rebalance on stopped" - " volname %s", cli_req.volname); - goto out; + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + ret = -1; + goto out; + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REMOVE_BRICK_TID_KEY); + else + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set task-id"); + goto out; + } + } } - switch (cli_req.cmd) { + switch (cmd) { case GF_DEFRAG_CMD_START: - { - if (volinfo->defrag) { - gf_log ("glusterd", GF_LOG_DEBUG, - "rebalance on volume %s already started", - cli_req.volname); - rsp.op_errno = EEXIST; - goto out; + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_FORCE: + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance " + "id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REBALANCE; } - - if (glusterd_is_rb_started (volinfo) || - glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, - "Replace brick is in progress on volume %s", - cli_req.volname); - goto out; + ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), + cmd, NULL, GD_OP_REBALANCE); + break; + case GF_DEFRAG_CMD_STOP: + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + + /* Fall back to the old volume file in case of decommission*/ + list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, + brick_list) { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + volfile_update = _gf_true; } - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) - goto out; - defrag = volinfo->defrag; - - LOCK_INIT (&defrag->lock); - snprintf (defrag->mount, 1024, "%s/mount/%s", - priv->workdir, cli_req.volname); - /* Create a directory, mount glusterfs over it, start glusterfs-defrag */ - snprintf (cmd_str, 4096, "mkdir -p %s", defrag->mount); - ret = system (cmd_str); + if (volfile_update == _gf_false) { + ret = 0; + break; + } + ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); + gf_log (this->name, GF_LOG_WARNING, + "failed to create volfiles"); goto out; } - snprintf (cmd_str, 4096, "%s/sbin/glusterfs -s localhost " - "--volfile-id %s --volume-name %s-quick-read " - "--xlator-option *dht.unhashed-sticky-bit=yes " - "--xlator-option *dht.use-readdirp=yes " - "--xlator-option *dht.lookup-unhashed=yes %s", - GFS_PREFIX, cli_req.volname, cli_req.volname, - defrag->mount); - ret = gf_system (cmd_str); + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); + gf_log (this->name, GF_LOG_WARNING, + "failed to store volinfo"); goto out; } - volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; - rsp.op_ret = 0; - - ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start, - volinfo); - if (ret) { - snprintf (cmd_str, 1024, "umount -l %s", defrag->mount); - ret = system (cmd_str); - rsp.op_ret = -1; - } - break; - } - case GF_DEFRAG_CMD_STOP: - ret = glusterd_defrag_stop (volinfo, &rsp); + ret = 0; break; + case GF_DEFRAG_CMD_STATUS: - ret = glusterd_defrag_status_get (volinfo, &rsp); break; default: break; } - if (ret) - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed",cmd_str); - if (cli_req.cmd != GF_DEFRAG_CMD_STATUS) { - gf_cmd_log ("volume rebalance"," on volname: %s %d %s", - cli_req.volname, - cli_req.cmd, ((ret)?"FAILED":"SUCCESS")); +out: + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup (msg); + + return ret; +} + +int32_t +glusterd_defrag_event_notify_handle (dict_t *dict) +{ + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int32_t ret = -1; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to get volname"); + return ret; } -out: + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to get volinfo for %s" + , volname); + return ret; + } - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_defrag_vol_rsp); - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + ret = glusterd_defrag_volume_status_update (volinfo, dict); - return 0; + if (ret) + gf_log ("", GF_LOG_ERROR, "Failed to update status"); + return ret; } |
