summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-common.h
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2015-04-12 15:55:02 +0530
committerVijay Bellur <vbellur@redhat.com>2015-05-07 02:37:02 -0700
commit579186aeba940e3ec73093c48e17b5f6f94910d0 (patch)
tree5d86f55336c3d5be941718a40f91c4b3f0884f55 /xlators/cluster/dht/src/dht-common.h
parent2a4f346fe57fb21330857b7eb75153dc8abc4def (diff)
rebalance: Introducing local crawl and parallel migration
The current patch address two part of the design proposed. 1. Rebalance multiple files in parallel 2. Crawl only bricks that belong to the current node Brief design explanation for the above two points. 1. Rebalance multiple files in parallel: ------------------------------------- The existing rebalance engine is single threaded. Hence, introduced multiple threads which will be running parallel to the crawler. The current rebalance migration is converted to a "Producer-Consumer" frame work. Where Producer is : Crawler Consumer is : Migrating Threads Crawler: Crawler is the main thread. The job of the crawler is now limited to fix-layout of each directory and add the files which are eligible for the migration to a global queue in a round robin manner so that we will use all the disk resources efficiently. Hence, the crawler will not be "blocked" by migration process. Producer: Producer will monitor the global queue. If any file is added to this queue, it will dqueue that entry and migrate the file. Currently 20 migration threads are spawned at the beginning of the rebalance process. Hence, multiple file migration happens in parallel. 2. Crawl only bricks that belong to the current node: -------------------------------------------------- As rebalance process is spawned per node, it migrates only the files that belongs to it's own node for the sake of load balancing. But it also reads entries from the whole cluster, which is not necessary as readdir hits other nodes. New Design: As part of the new design the rebalancer decides the subvols that are local to the rebalancer node by checking the node-uuid of root directory prior to the crawler starts. Hence, readdir won't hit the whole cluster as it has already the context of local subvols and also node-uuid request for each file can be avoided. This makes the rebalance process "more scalable". Change-Id: I6f1b44086a09df8ca23935fd213509c70cc0c050 BUG: 1217381 Signed-off-by: Susant Palai <spalai@redhat.com> Reviewed-on: http://review.gluster.org/10466 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System Reviewed-by: N Balachandran <nbalacha@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.h')
-rw-r--r--xlators/cluster/dht/src/dht-common.h47
1 files changed, 47 insertions, 0 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 3ca626feec8..0e290465d44 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -290,6 +290,20 @@ struct gf_defrag_pattern_list {
gf_defrag_pattern_list_t *next;
};
+struct dht_container {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_dirent_t *next;
+ struct _gf_dirent_t *prev;
+ };
+ };
+ gf_dirent_t *df_entry;
+ xlator_t *this;
+ loc_t *parent_loc;
+ dict_t *migrate_data;
+};
+
struct gf_defrag_info_ {
uint64_t total_files;
uint64_t total_data;
@@ -317,6 +331,19 @@ struct gf_defrag_info_ {
uint64_t total_files_demoted;
int write_freq_threshold;
int read_freq_threshold;
+
+ pthread_cond_t parallel_migration_cond;
+ pthread_mutex_t dfq_mutex;
+ pthread_cond_t rebalance_crawler_alarm;
+ int32_t q_entry_count;
+ int32_t global_error;
+ struct dht_container *queue;
+ int32_t crawl_done;
+ int32_t abort;
+ int32_t wakeup_crawler;
+
+ /* Hard link handle requirement */
+ synclock_t link_lock;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -394,9 +421,19 @@ struct dht_conf {
dht_methods_t *methods;
struct mem_pool *lock_pool;
+
+ /*local subvol storage for rebalance*/
+ xlator_t **local_subvols;
+ int32_t local_subvols_cnt;
};
typedef struct dht_conf dht_conf_t;
+struct dht_dfoffset_ctx {
+ xlator_t *this;
+ off_t offset;
+ int32_t readdir_done;
+};
+typedef struct dht_dfoffset_ctx dht_dfoffset_ctx_t;
struct dht_disk_layout {
uint32_t cnt;
@@ -420,6 +457,14 @@ typedef enum {
GF_DHT_WEIGHTED_DISTRIBUTION
} dht_distribution_type_t;
+struct dir_dfmeta {
+ gf_dirent_t *equeue;
+ dht_dfoffset_ctx_t *offset_var;
+ struct list_head **head;
+ struct list_head **iterator;
+ int *fetch_entries;
+};
+
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
@@ -605,6 +650,8 @@ int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+int
+dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf);
/* FOPS */
int32_t dht_lookup (call_frame_t *frame,