summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht
diff options
context:
space:
mode:
authorshishirng <shishirng@gluster.com>2012-01-18 15:29:15 +0530
committerVijay Bellur <vijay@gluster.com>2012-02-19 01:31:19 -0800
commit7ba1e1ed45cee56ef51b9c04df99c976546d5d04 (patch)
treed3e4121729d51852a120ba5f067aa8a64f39b624 /xlators/cluster/dht
parent061d70e8195d082043b071118333b7e3173fa3ec (diff)
cluster/dht: Rebalance will be a new glusterfs process
rebalance will not use any maintainance clients. It is replaced by syncops, with the volfile. Brickop (communication between glusterd<->glusterfs process) is used for status and stop commands. Dept-first traversal of dir is maintained, but data is migrated as and when encounterd. fix-layout (dir) do Complete migrate-data of dir fix-layout (subdir) done Rebalance state is saved in the vol file, for restart-ability. A disconnect event and pidfile state determine the defrag-status Signed-off-by: shishirng <shishirng@gluster.com> Change-Id: Iec6c80c84bbb2142d840242c28db3d5f5be94d01 BUG: 763844 Reviewed-on: http://review.gluster.com/2540 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amarts@redhat.com>
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r--xlators/cluster/dht/src/dht-common.c67
-rw-r--r--xlators/cluster/dht/src/dht-common.h47
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h1
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c606
-rw-r--r--xlators/cluster/dht/src/dht.c46
5 files changed, 750 insertions, 17 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 29b3dca83e5..360a432cd5f 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -4296,16 +4296,22 @@ dht_forget (xlator_t *this, inode_t *inode)
int
dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
conf = this->private;
if (!conf)
@@ -4418,6 +4424,36 @@ dht_notify (xlator_t *this, int event, void *data, ...)
UNLOCK (&conf->subvolume_lock);
break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if (cmd == GF_DEFRAG_CMD_STATUS)
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ gf_defrag_stop (defrag, output);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return 0;
+ break;
+ }
+
default:
propagate = 1;
break;
@@ -4433,8 +4469,19 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* if all subvols have reported status, no need to hide anything
or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
+ if (have_heard_from_all) {
propagate = 1;
+ if (conf->defrag) {
+ ret = pthread_create (&conf->defrag->th, NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ conf->defrag = NULL;
+ GF_FREE (conf->defrag);
+ kill (getpid(), SIGTERM);
+ }
+ }
+ }
+
if (!had_heard_from_all && have_heard_from_all) {
/* This is the first event which completes aggregation
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 749abe5380d..d97ef9f5853 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -175,6 +175,43 @@ struct dht_du {
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
struct dht_conf {
gf_lock_t subvolume_lock;
int subvolume_cnt;
@@ -208,6 +245,9 @@ struct dht_conf {
/* to keep track of nodes which are decomissioned */
xlator_t **decommissioned_bricks;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
};
typedef struct dht_conf dht_conf_t;
@@ -608,6 +648,13 @@ int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
struct iatt *postparent);
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+void*
+gf_defrag_start (void *this);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 21fb5a7ca20..a12ed153499 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -37,6 +37,7 @@ enum gf_dht_mem_types_ {
gf_switch_mt_switch_struct,
gf_dht_mt_subvol_time,
gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index dfd6f3b6ecc..46fc8773eff 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -669,7 +669,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
loc->path, from->name, strerror (errno));
}
- if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) {
+ if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
/* take out the source from namespace */
ret = syncop_unlink (from, loc);
if (ret) {
@@ -805,3 +805,607 @@ dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
frame, frame);
return ret;
}
+
+int
+gf_listener_stop (void)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ ctx = glusterfs_ctx_get ();
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ this = THIS;
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((errno != ENOSPC) || (errno != ENOTCONN))
+ return 1;
+
+ if (errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+/* We do a depth first traversal of directories. But before we move into
+ * subdirs, we complete the data migration of those directories whose layouts
+ * have been fixed
+ */
+
+int
+gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ dict_t *dict = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+ if ((ret < 0) || (ret && (errno == ENOENT)))
+ break;
+
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list))
+ break;
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+ if (entry->d_stat.ia_nlink > 1)
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ /* if distribute is present, it will honor this key.
+ * -1 is returned if distribute is not present or file
+ * doesn't have a link-file. If file has link-file, the
+ * path of link-file will be the value, and also that
+ * guarantees that file has to be mostly migrated */
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_LINKINFO_KEY);
+ if (ret < 0) {
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data,
+ 0);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "setxattr "
+ "failed for %s", entry_loc.path);
+
+ if (ret == -1) {
+ op_errno = errno;
+ ret = gf_defrag_handle_migrate_error (op_errno,
+ defrag);
+
+ if (!ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr on %s failed: %s",
+ entry_loc.path,
+ strerror (op_errno));
+ else if (ret == 1)
+ continue;
+ else if (ret == -1)
+ goto out;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+
+}
+
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ dict_t *dict = NULL;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+
+ ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)
+ gf_defrag_migrate_data (this, defrag, loc, migrate_data);
+
+ gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+ if ((ret < 0) || (ret && (errno == ENOENT)))
+ break;
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list))
+ break;
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ "gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ "gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Fix layout "
+ "failed for %s", entry_loc.path);
+ goto out;
+ }
+
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+
+}
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+
+ this = data;
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data", "force");
+ else
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data",
+ "non-force");
+ if (ret)
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+
+out:
+ LOCK (&defrag->lock);
+ {
+ gf_defrag_status_get (defrag, NULL);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag)
+ GF_FREE (defrag);
+
+ return ret;
+}
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop();
+
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create"
+ " task for rebalance");
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+log:
+ gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64, files, size, lookup);
+
+
+out:
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 18fee7cd3c6..816bf868e88 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -182,11 +182,20 @@ out:
int
notify (xlator_t *this, int event, void *data, ...)
{
- int ret = -1;
+ int ret = -1;
+ va_list ap;
+ dict_t *output = NULL;
GF_VALIDATE_OR_GOTO ("dht", this, out);
- ret = dht_notify (this, event, data);
+
+ if (!data)
+ goto out;
+
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dht_notify (this, event, data, output);
out:
return ret;
@@ -343,10 +352,13 @@ out:
int
init (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -366,6 +378,24 @@ init (xlator_t *this)
goto err;
}
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ defrag->cmd = cmd;
+
+ conf->defrag = defrag;
+ }
+
+
conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
/* If option is not "auto", other options _should_ be boolean */
@@ -550,5 +580,9 @@ struct volume_options options[] = {
{ .key = {"decommissioned-bricks"},
.type = GF_OPTION_TYPE_ANY,
},
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+
{ .key = {NULL} },
};