summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src
diff options
context:
space:
mode:
authorshishirng <shishirng@gluster.com>2012-01-18 15:29:15 +0530
committerVijay Bellur <vijay@gluster.com>2012-02-19 01:31:19 -0800
commit7ba1e1ed45cee56ef51b9c04df99c976546d5d04 (patch)
treed3e4121729d51852a120ba5f067aa8a64f39b624 /xlators/mgmt/glusterd/src
parent061d70e8195d082043b071118333b7e3173fa3ec (diff)
cluster/dht: Rebalance will be a new glusterfs process
rebalance will not use any maintainance clients. It is replaced by syncops, with the volfile. Brickop (communication between glusterd<->glusterfs process) is used for status and stop commands. Dept-first traversal of dir is maintained, but data is migrated as and when encounterd. fix-layout (dir) do Complete migrate-data of dir fix-layout (subdir) done Rebalance state is saved in the vol file, for restart-ability. A disconnect event and pidfile state determine the defrag-status Signed-off-by: shishirng <shishirng@gluster.com> Change-Id: Iec6c80c84bbb2142d840242c28db3d5f5be94d01 BUG: 763844 Reviewed-on: http://review.gluster.com/2540 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amarts@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c80
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c601
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c12
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h29
11 files changed, 392 insertions, 425 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index c170972ce83..fc9c9cf0a93 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -31,6 +31,7 @@
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "run.h"
+#include <sys/signal.h>
/* misc */
@@ -1384,8 +1385,6 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
switch (volinfo->defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
volinfo->defrag_status = 0;
default:
break;
@@ -1420,6 +1419,9 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
int32_t replica_count = 0;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *tmp = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX];
+
ret = dict_get_str (dict, "volname", &volname);
@@ -1456,7 +1458,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
if (volinfo->defrag) {
LOCK (&volinfo->defrag->lock);
- volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;
+ //volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;
UNLOCK (&volinfo->defrag->lock);
}
@@ -1470,13 +1472,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
case GF_OP_CMD_ABORT:
{
if (volinfo->decommission_in_progress) {
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
+ priv = THIS->private;
+ if (!priv)
+ return ret;
- volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ glusterd_service_stop ("rebalance", pidfile, SIGTERM, 1);
- UNLOCK (&volinfo->defrag->lock);
- }
}
/* Fall back to the old volume file */
@@ -1577,8 +1580,6 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
switch (volinfo->defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
volinfo->defrag_status = 0;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index b80164e8d0e..b06dd28cf3f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -346,6 +346,11 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ snprintf (key, 256, "volume%d.rebalance", count);
+ ret = dict_set_int32 (volumes, key, volinfo->defrag_cmd);
+ if (ret)
+ goto out;
+
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
char brick[1024] = {0,};
snprintf (key, 256, "volume%d.brick%d", count, i);
@@ -2046,6 +2051,7 @@ glusterd_rpc_create (struct rpc_clnt **rpc,
GF_ASSERT (this);
GF_ASSERT (options);
+
new_rpc = rpc_clnt_new (options, this->ctx, this->name);
if (!new_rpc)
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 4a0561d1e6e..2a4bf82ee69 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -150,6 +150,8 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
{
int ret = -1;
gd1_mgmt_brick_op_req *brick_req = NULL;
+ char *volname = NULL;
+ char name[1024] = {0,};
GF_ASSERT (op < GD_OP_MAX);
GF_ASSERT (op > GD_OP_NONE);
@@ -204,6 +206,21 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
brick_req->name = "";
}
break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ snprintf (name, 1024, "%s-dht",volname);
+ brick_req->name = gf_strdup (name);
+
+ break;
default:
goto out;
break;
@@ -1617,6 +1634,7 @@ glusterd_op_build_payload (dict_t **req)
case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
{
dict_t *dict = ctx;
dict_copy (dict, req_dict);
@@ -2173,6 +2191,7 @@ glusterd_need_brick_op (glusterd_op_t op)
switch (op) {
case GD_OP_PROFILE_VOLUME:
case GD_OP_STATUS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
ret = _gf_true;
break;
default:
@@ -2368,6 +2387,7 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
break;
case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
ret = glusterd_op_stage_rebalance (dict, op_errstr);
break;
@@ -2464,6 +2484,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
break;
case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);
break;
@@ -2613,6 +2634,10 @@ glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo,
op_ctx, op_errstr);
break;
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ dict_copy (rsp_dict, op_ctx);
+ break;
+
default:
break;
}
@@ -2754,6 +2779,7 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr)
priv = this->private;
GF_ASSERT (priv);
+
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
@@ -2963,6 +2989,56 @@ out:
}
+
+static int
+glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = volinfo;
+ pending_node->type = GD_NODE_REBALANCE;
+ list_add_tail (&pending_node->list,
+ &opinfo.pending_bricks);
+ pending_node = NULL;
+ }
+
+out:
+ return ret;
+}
+
+
+
+
static int
glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr)
{
@@ -3196,6 +3272,9 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr)
ret = glusterd_bricks_select_status_volume (dict, op_errstr);
break;
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr);
+ break;
default:
break;
}
@@ -3758,6 +3837,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx)
case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
dict_unref (ctx);
break;
default:
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index e5a907eaa46..0fe8d3899fe 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -46,363 +46,35 @@
#include "cli1-xdr.h"
#include "xdr-generic.h"
-/* return values - 0: success, +ve: stopped, -ve: failure */
-int
-gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
-{
- int ret = -1;
- DIR *fd = NULL;
- glusterd_defrag_info_t *defrag = NULL;
- struct dirent *entry = NULL;
- struct stat stbuf = {0,};
- char full_path[PATH_MAX] = {0,};
- char linkinfo[PATH_MAX] = {0,};
- char force_string[64] = {0,};
-
- if (!volinfo->defrag)
- goto out;
-
- defrag = volinfo->defrag;
-
- fd = opendir (dir);
- if (!fd)
- goto out;
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)) {
- strcpy (force_string, "force");
- } else {
- strcpy (force_string, "not-force");
- }
-
- while ((entry = readdir (fd))) {
- if (!entry)
- break;
-
- /* We have to honor 'stop' (or 'pause'|'commit') as early
- as possible */
- if (volinfo->defrag_status !=
- GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) {
- /* It can be one of 'stopped|paused|commit' etc */
- closedir (fd);
- ret = 1;
- goto out;
- }
-
- if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
- continue;
-
- snprintf (full_path, PATH_MAX, "%s/%s", dir, entry->d_name);
-
- ret = lstat (full_path, &stbuf);
- if (ret == -1)
- continue;
-
- if (S_ISDIR (stbuf.st_mode))
- continue;
-
- defrag->num_files_lookedup += 1;
-
- /* TODO: bring in feature to support hardlink rebalance */
- if (stbuf.st_nlink > 1)
- continue;
-
- /* if distribute is present, it will honor this key.
- -1 is returned if distribute is not present or file doesn't
- have a link-file. If file has link-file, the path of
- link-file will be the value, and also that guarantees
- that file has to be mostly migrated */
- ret = sys_lgetxattr (full_path, GF_XATTR_LINKINFO_KEY,
- &linkinfo, PATH_MAX);
- if (ret <= 0)
- continue;
-
- ret = sys_lsetxattr (full_path, "distribute.migrate-data",
- force_string, strlen (force_string), 0);
-
- /* if errno is not ENOSPC or ENOTCONN, we can still continue
- with rebalance process */
- if ((ret == -1) && ((errno != ENOSPC) ||
- (errno != ENOTCONN)))
- continue;
-
- if ((ret == -1) && (errno == ENOTCONN)) {
- /* Most probably mount point went missing (mostly due
- to a brick down), say rebalance failure to user,
- let him restart it if everything is fine */
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- break;
- }
-
- if ((ret == -1) && (errno == ENOSPC)) {
- /* rebalance process itself failed, may be
- remote brick went down, or write failed due to
- disk full etc etc.. */
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- break;
- }
-
- LOCK (&defrag->lock);
- {
- defrag->total_files += 1;
- defrag->total_data += stbuf.st_size;
- }
- UNLOCK (&defrag->lock);
- }
- closedir (fd);
-
- fd = opendir (dir);
- if (!fd)
- goto out;
- while ((entry = readdir (fd))) {
- if (!entry)
- break;
-
- /* We have to honor 'stop' (or 'pause'|'commit') as early
- as possible */
- if (volinfo->defrag_status !=
- GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) {
- /* It can be one of 'stopped|paused|commit' etc */
- closedir (fd);
- ret = 1;
- goto out;
- }
-
- if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
- continue;
-
- snprintf (full_path, 1024, "%s/%s", dir, entry->d_name);
-
- ret = lstat (full_path, &stbuf);
- if (ret == -1)
- continue;
-
- if (!S_ISDIR (stbuf.st_mode))
- continue;
-
- ret = gf_glusterd_rebalance_move_data (volinfo, full_path);
- if (ret)
- break;
- }
- closedir (fd);
-
- if (!entry)
- ret = 0;
-out:
- return ret;
-}
+int32_t
+glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe);
-/* return values - 0: success, +ve: stopped, -ve: failure */
int
-gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir)
+glusterd_defrag_update_state (glusterd_volinfo_t *volinfo,
+ glusterd_defrag_info_t *defrag)
{
- int ret = -1;
- char full_path[1024] = {0,};
- struct stat stbuf = {0,};
- DIR *fd = NULL;
- struct dirent *entry = NULL;
+ int ret = -1;
+ int cmd = 0;
- if (!volinfo->defrag)
- goto out;
-
- fd = opendir (dir);
- if (!fd)
- goto out;
-
- while ((entry = readdir (fd))) {
- if (!entry)
- break;
-
- /* We have to honor 'stop' (or 'pause'|'commit') as early
- as possible */
- if (volinfo->defrag_status !=
- GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED) {
- /* It can be one of 'stopped|paused|commit' etc */
- closedir (fd);
- ret = 1;
- goto out;
- }
-
- if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
- continue;
-
- snprintf (full_path, 1024, "%s/%s", dir, entry->d_name);
-
- ret = lstat (full_path, &stbuf);
- if (ret == -1)
- continue;
-
- if (S_ISDIR (stbuf.st_mode)) {
- /* Fix the layout of the directory */
- /* TODO: isn't error code not important ? */
- sys_lsetxattr (full_path, "distribute.fix.layout",
- "yes", 3, 0);
-
- volinfo->defrag->total_files += 1;
-
- /* Traverse into subdirectory */
- ret = gf_glusterd_rebalance_fix_layout (volinfo,
- full_path);
- if (ret)
- break;
- }
- }
- closedir (fd);
-
- if (!entry)
- ret = 0;
-
-out:
- return ret;
-}
-
-void *
-glusterd_defrag_start (void *data)
-{
- glusterd_volinfo_t *volinfo = data;
- glusterd_defrag_info_t *defrag = NULL;
- int ret = -1;
- struct stat stbuf = {0,};
-
- THIS = volinfo->xl;
- defrag = volinfo->defrag;
- if (!defrag)
- goto out;
-
- sleep (1);
- ret = lstat (defrag->mount, &stbuf);
- if ((ret == -1) && (errno == ENOTCONN)) {
- /* Wait for some more time before starting rebalance */
- sleep (2);
- ret = lstat (defrag->mount, &stbuf);
- if (ret == -1) {
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- volinfo->rebalance_files = 0;
- volinfo->rebalance_data = 0;
- volinfo->lookedup_files = 0;
- goto out;
- }
- }
-
- /* Fix the root ('/') first */
- sys_lsetxattr (defrag->mount, "distribute.fix.layout",
- "yes", 3, 0);
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
- /* root's layout got fixed */
- defrag->total_files = 1;
-
- /* Step 1: Fix layout of all the directories */
- ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount);
- if (ret < 0)
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- /* in both 'stopped' or 'failure' cases goto out */
- if (ret) {
- goto out;
- }
-
- /* Completed first step */
- volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE;
- }
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
- /* It was used by number of layout fixes on directories */
- defrag->total_files = 0;
-
- volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED;
-
- /* Step 2: Iterate over directories to move data */
- ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount);
- if (ret < 0)
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- /* in both 'stopped' or 'failure' cases goto out */
- if (ret) {
- goto out;
- }
-
- /* Completed second step */
- volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE;
- }
-
- /* Completed whole process */
- if ((defrag->cmd == GF_DEFRAG_CMD_START) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_FORCE))
- volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
-
- volinfo->rebalance_files = defrag->total_files;
- volinfo->rebalance_data = defrag->total_data;
- volinfo->lookedup_files = defrag->num_files_lookedup;
-out:
- volinfo->defrag = NULL;
- if (defrag) {
- gf_log ("rebalance", GF_LOG_INFO, "rebalance on %s complete",
- defrag->mount);
-
- ret = runcmd ("umount", "-l", defrag->mount, NULL);
- LOCK_DESTROY (&defrag->lock);
-
- if (defrag->cbk_fn) {
- defrag->cbk_fn (volinfo, volinfo->defrag_status);
- }
-
- GF_FREE (defrag);
- }
- return NULL;
-}
-
-int
-glusterd_defrag_stop_validate (glusterd_volinfo_t *volinfo,
- char *op_errstr, size_t len)
-{
- int ret = -1;
- if (glusterd_is_defrag_on (volinfo) == 0) {
- snprintf (op_errstr, len, "Rebalance on %s is either Completed "
- "or not yet started", volinfo->volname);
- goto out;
- }
- ret = 0;
-out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
-
-int
-glusterd_defrag_stop (glusterd_volinfo_t *volinfo, u_quad_t *files,
- u_quad_t *size, char *op_errstr, size_t len)
-{
- /* TODO: set a variable 'stop_defrag' here, it should be checked
- in defrag loop */
- int ret = -1;
GF_ASSERT (volinfo);
- GF_ASSERT (files);
- GF_ASSERT (size);
- GF_ASSERT (op_errstr);
-
- if (!volinfo) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT (defrag);
- ret = glusterd_defrag_stop_validate (volinfo, op_errstr, len);
- if (ret) {
- /* rebalance may be happening on other nodes */
- ret = 0;
- goto out;
- }
-
- ret = 0;
if (volinfo->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
goto out;
}
- LOCK (&volinfo->defrag->lock);
+ LOCK (&defrag->lock);
{
- volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
- *files = volinfo->defrag->total_files;
- *size = volinfo->defrag->total_data;
+ cmd = defrag->cmd;
+ if ((cmd == GF_DEFRAG_CMD_START) || (cmd ==
+ GF_DEFRAG_CMD_START_FORCE) || (cmd ==
+ GF_DEFRAG_CMD_START_LAYOUT_FIX))
+ volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
}
- UNLOCK (&volinfo->defrag->lock);
+ UNLOCK (&defrag->lock);
ret = 0;
out:
@@ -475,10 +147,9 @@ glusterd_rebalance_cmd_attempted_log (int cmd, char *volname)
gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
"volume start layout fix on %s", volname);
break;
- case GF_DEFRAG_CMD_START_MIGRATE_DATA:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
+ case GF_DEFRAG_CMD_START_FORCE:
gf_cmd_log ("Volume rebalance"," on volname: %s "
- "cmd: start data migrate attempted",
+ "cmd: start data force attempted",
volname);
gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
"volume start migrate data on %s", volname);
@@ -539,6 +210,97 @@ out:
return ret;
}
+int32_t
+glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ char pidfile[PATH_MAX];
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ if (!priv)
+ return 0;
+
+ volinfo = mydata;
+ if (!volinfo)
+ return 0;
+
+ defrag = volinfo->defrag;
+ if (!defrag)
+ return 0;
+
+ if ((event == RPC_CLNT_DISCONNECT) && defrag->connected)
+ volinfo->defrag = NULL;
+
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ if (defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT",
+ rpc->conn.trans->name);
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ if (!defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 0;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (!glusterd_is_service_running (pidfile, NULL)) {
+ glusterd_defrag_update_state (volinfo, defrag);
+ } else {
+ volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ }
+
+ /* Success or failure, Reset cmd in volinfo */
+
+ volinfo->defrag_cmd = 0;
+
+ glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+ if (defrag->rpc) {
+ rpc_clnt_unref (defrag->rpc);
+ defrag->rpc = NULL;
+ }
+ if (defrag->cbk_fn)
+ defrag->cbk_fn (volinfo, volinfo->defrag_status);
+
+ if (defrag)
+ GF_FREE (defrag);
+ gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT",
+ rpc->conn.trans->name);
+ break;
+ }
+ default:
+ gf_log ("", GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
int
glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
size_t len, int cmd, defrag_cbk_fn_t cbk)
@@ -547,6 +309,11 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
glusterd_defrag_info_t *defrag = NULL;
runner_t runner = {0,};
glusterd_conf_t *priv = NULL;
+ char defrag_path[PATH_MAX];
+ struct stat buf = {0,};
+ char sockfile[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ dict_t *options = NULL;
priv = THIS->private;
@@ -567,18 +334,29 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
defrag->cmd = cmd;
LOCK_INIT (&defrag->lock);
- snprintf (defrag->mount, 1024, "%s/mount/%s",
- priv->workdir, volinfo->volname);
- /* Create a directory, mount glusterfs over it, start glusterfs-defrag */
- runinit (&runner);
- runner_add_args (&runner, "mkdir", "-p", defrag->mount, NULL);
- ret = runner_run_reuse (&runner);
- if (ret) {
- runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
+
+ volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ volinfo->defrag_cmd = cmd;
+ glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+ GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv);
+ ret = stat (defrag_path, &buf);
+ if (ret && (errno == ENOENT)) {
+ runinit (&runner);
+ runner_add_args (&runner, "mkdir", "-p", defrag_path, NULL);
+ ret = runner_run_reuse (&runner);
+ if (ret) {
+ runner_log (&runner, "glusterd", GF_LOG_DEBUG,
+ "command failed");
+ runner_end (&runner);
+ goto out;
+ }
runner_end (&runner);
- goto out;
}
- runner_end (&runner);
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
+ GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv);
runinit (&runner);
runner_add_args (&runner, SBIN_DIR"/glusterfs",
@@ -586,34 +364,37 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
"--xlator-option", "*dht.use-readdirp=yes",
"--xlator-option", "*dht.lookup-unhashed=yes",
"--xlator-option", "*dht.assert-no-child-down=yes",
- defrag->mount, NULL);
+ NULL);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
+ runner_add_arg (&runner, "--socket-file");
+ runner_argprintf (&runner, "%s",sockfile);
+ runner_add_arg (&runner, "--pid-file");
+ runner_argprintf (&runner, "%s",pidfile);
+
ret = runner_run_reuse (&runner);
if (ret) {
runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
runner_end (&runner);
goto out;
}
- runner_end (&runner);
- volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED;
- if ((cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA) ||
- (cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE)) {
- volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED;
+ ret = rpc_clnt_transport_unix_options_build (&options, sockfile);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
+ goto out;
+ }
+
+ ret = glusterd_rpc_create (&defrag->rpc, options,
+ glusterd_defrag_notify, volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
+ goto out;
}
if (cbk)
defrag->cbk_fn = cbk;
- ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start,
- volinfo);
- if (ret) {
- runinit (&runner);
- runner_add_args (&runner, "umount", "-l", defrag->mount, NULL);
- ret = runner_run_reuse (&runner);
- if (ret)
- runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
- runner_end (&runner);
- }
out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
@@ -712,7 +493,13 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req)
if (ret)
goto out;
- ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict);
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STOP)) {
+ ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
+ dict);
+ }
+ else
+ ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict);
out:
@@ -762,8 +549,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
+ case GF_DEFRAG_CMD_START_FORCE:
ret = glusterd_defrag_start_validate (volinfo,
msg, sizeof (msg));
if (ret) {
@@ -771,6 +557,33 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
"start validate failed");
goto out;
}
+ break;
+ case GF_DEFRAG_CMD_STATUS:
+ ret = glusterd_is_defrag_on (volinfo);
+ if (!ret) {
+ ret = -1;
+ if (volinfo->defrag_status ==
+ GF_DEFRAG_STATUS_COMPLETE) {
+ snprintf (msg, sizeof (msg), "Rebalance "
+ "completed!");
+ goto out;
+ }
+ snprintf (msg, sizeof(msg), "Rebalance is not running"
+ " on volume %s", volname);
+ goto out;
+ }
+ break;
+
+ case GF_DEFRAG_CMD_STOP:
+ ret = glusterd_is_defrag_on (volinfo);
+ if (!ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "rebalance is not running");
+ ret = -1;
+ snprintf (msg, sizeof(msg), "Rebalance is not running"
+ " on volume %s", volname);
+ goto out;
+ }
default:
break;
}
@@ -792,11 +605,8 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
int32_t cmd = 0;
char msg[2048] = {0};
glusterd_volinfo_t *volinfo = NULL;
- uint64_t files = 0;
- uint64_t size = 0;
void *node_uuid = NULL;
glusterd_conf_t *priv = NULL;
- dict_t *tmp_dict = NULL;
priv = THIS->private;
@@ -839,39 +649,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
+ case GF_DEFRAG_CMD_START_FORCE:
ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
cmd, NULL);
break;
- case GF_DEFRAG_CMD_STOP:
- ret = glusterd_defrag_stop (volinfo, &files, &size,
- msg, sizeof (msg));
- if (!ret && rsp_dict) {
- ret = dict_set_uint64 (rsp_dict, "files", files);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set file count");
-
- ret = dict_set_uint64 (rsp_dict, "size", size);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set xfer size");
-
- /* Don't want to propagate errors from dict_set() */
- ret = 0;
- }
- break;
+ case GF_DEFRAG_CMD_STOP:
case GF_DEFRAG_CMD_STATUS:
-
- if (rsp_dict)
- tmp_dict = rsp_dict;
-
- /* On source node, there will be no 'rsp_dict' */
- if (!tmp_dict)
- tmp_dict = glusterd_op_get_ctx (GD_OP_REBALANCE);
-
- ret = glusterd_defrag_status_get (volinfo, tmp_dict);
break;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 2cf17b3f730..537496f0835 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -82,6 +82,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
break;
}
case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
{
if (ctx) {
ret = dict_get_int32 (ctx, "status", &status);
@@ -1058,7 +1059,8 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)
GF_ASSERT (rsp_dict);
op = glusterd_op_get_op ();
- GF_ASSERT (GD_OP_REBALANCE == op);
+ GF_ASSERT ((GD_OP_REBALANCE == op) ||
+ (GD_OP_DEFRAG_BRICK_VOLUME == op));
ctx_dict = glusterd_op_get_ctx (op);
@@ -1224,10 +1226,7 @@ glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
break;
case GD_OP_REBALANCE:
- ret = glusterd_volume_rebalance_use_rsp_dict (dict);
- if (ret)
- goto out;
-
+ case GD_OP_DEFRAG_BRICK_VOLUME:
break;
default:
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 4fe8f71cb5b..18d60d0a428 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -622,6 +622,15 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
if (ret)
goto out;
+ if (volinfo->defrag_cmd == GF_DEFRAG_CMD_STATUS)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->defrag_cmd);
+ ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG,
+ buf);
+ if (ret)
+ goto out;
+
out:
if (ret)
gf_log ("", GF_LOG_ERROR, "Unable to write volume values"
@@ -1860,6 +1869,9 @@ glusterd_store_retrieve_volume (char *volname)
}
gf_log ("", GF_LOG_DEBUG, "Parsed as "GEOREP" "
" slave:key=%s,value:%s", key, value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
+ volinfo->defrag_cmd = atoi (value);
}
else {
exists = glusterd_check_option_exists (key, NULL);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index f1413955bed..f55fb8c2e48 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -59,6 +59,7 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status"
#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src"
#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status"
#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"
#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 42924a5f68a..9ec9e16f18d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2016,6 +2016,15 @@ glusterd_import_volinfo (dict_t *vols, int count,
goto out;
}
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebalance", count);
+ ret = dict_get_uint32 (vols, key, &new_volinfo->defrag_cmd);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
uuid_parse (volume_id_str, new_volinfo->volume_id);
memset (key, 0, sizeof (key));
@@ -2438,6 +2447,7 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
struct rpc_clnt *rpc = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
nodesrv_t *shd = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out);
GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out);
@@ -2449,6 +2459,11 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
shd = pending_node->node;
rpc = shd->rpc;
+ } else if (pending_node->type == GD_NODE_REBALANCE) {
+ volinfo = pending_node->node;
+ if (volinfo->defrag)
+ rpc = volinfo->defrag->rpc;
+
} else {
GF_ASSERT (0);
}
@@ -4811,3 +4826,42 @@ glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,
snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol",
path, volinfo->volname);
}
+
+int
+glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk)
+{
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX];
+ int ret = -1;
+ pid_t pid;
+
+ priv = THIS->private;
+ if (!priv)
+ return ret;
+
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ if (!glusterd_is_service_running (pidfile, &pid)) {
+ glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd,
+ cbk);
+ }
+
+ return ret;
+}
+
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char op_errstr[256];
+
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (!volinfo->defrag_cmd)
+ continue;
+ glusterd_volume_defrag_restart (volinfo, op_errstr, 256,
+ volinfo->defrag_cmd, NULL);
+ }
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 6f9a5e14d5c..e52b25e3199 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -386,4 +386,6 @@ glusterd_chk_peers_connected_befriended (uuid_t skip_uuid);
void
glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,
gf_transport_type type);
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index e9f3bd05577..28e80310ec2 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -999,6 +999,8 @@ init (xlator_t *this)
ret = glusterd_restart_gsyncds (conf);
if (ret)
goto out;
+
+ glusterd_restart_rebalance (conf);
ret = 0;
out:
if (ret < 0) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 60dbe61e04a..e200f49d1db 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -79,6 +79,7 @@ typedef enum glusterd_op_ {
GD_OP_STATEDUMP_VOLUME,
GD_OP_LIST_VOLUME,
GD_OP_CLEARLOCKS_VOLUME,
+ GD_OP_DEFRAG_BRICK_VOLUME,
GD_OP_MAX,
} glusterd_op_t;
@@ -164,6 +165,9 @@ struct glusterd_defrag_info_ {
gf_lock_t lock;
int cmd;
pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt * rpc;
+ uint32_t connected;
char mount[1024];
char databuf[131072];
struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */
@@ -210,6 +214,7 @@ struct glusterd_volinfo_ {
uint64_t rebalance_data;
uint64_t lookedup_files;
glusterd_defrag_info_t *defrag;
+ gf_cli_defrag_type defrag_cmd;
/* Replace brick status */
gf_rb_status_t rb_status;
@@ -235,7 +240,8 @@ struct glusterd_volinfo_ {
typedef enum gd_node_type_ {
GD_NODE_NONE,
GD_NODE_BRICK,
- GD_NODE_SHD
+ GD_NODE_SHD,
+ GD_NODE_REBALANCE,
} gd_node_type;
typedef struct glusterd_pending_node_ {
@@ -315,6 +321,27 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);
STACK_DESTROY (frame->root);\
} while (0)
+#define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) do { \
+ char vol_path[PATH_MAX]; \
+ GLUSTERD_GET_VOLUME_DIR(vol_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/rebalance",vol_path); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path, \
+ uuid_utoa(priv->uuid)); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.pid", defrag_path, \
+ uuid_utoa(priv->uuid)); \
+ } while (0)
+
+
int32_t
glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo,
char **new_brick);