From 261c035c7d0cd1639cc8bd0ead82c30efcc0e93f Mon Sep 17 00:00:00 2001 From: Diogenes Nunez Date: Wed, 27 Jul 2016 11:09:47 -0400 Subject: cluster/tier: Adding compaction option for metadata databases Problem: As metadata in the database fills up, querying the database take a long time. As a result, tier migration slows down. To counteract this, we added a way to enable the compaction methods of the underlying database. The goal is to reduce the size of the underlying file by eliminating database fragmentation. NOTE: There is currently a bug where sometimes a brick will attempt to activate compaction. This happens even compaction is already turned on. The cause is narrowed down to the compact_mode_switch flipping its value. Changes: libglusterfs/src/gfdb - Added a gfdb function to compact the underlying database, compact_db() This is a no-op if the database has no such option. - Added a compaction function for SQLite3 that does the following 1) Changes the auto_vacuum pragma of the database 2) Compacts the database according to the type of compaction requested - Compaction type can be changed by changing the macro GF_SQL_COMPACT_DEF to one of the 4 compaction types in gfdb_sqlite3.h It is currently set to GF_SQL_COMPACT_INCR, or incremental vacuuming. xlators/cluster/dht/src - Added the following command-line option to enable SQLite3 compaction. gluster volume set tier-compact - Added the following command-line option to change the frequency the hot and cold tier are ordered to compact. gluster volume set tier-hot-compact-frequency gluster volume set tier-cold-compact-frequency - tier daemon periodically sends the (new) GFDB_IPC_CTR_SET_COMPACT_PRAGMA IPC to the CTR xlator. The IPC triggers compaction of the database. The inputs are both gf_boolean_t. IPC Input: compact_active: Is compaction currently on for the db. compact_mode_switched: Did we flip the compaction switch recently? IPC Output: 0 if the compaction succeeds. Non-zero otherwise. xlators/features/changetimerecorder/src/ - When the CTR gets the compaction IPC, it launches a thread that will perform the compaction. The IPC ends after the thread is launched. To avoid extra allocations, the parameters are passed using static variables. Change-Id: I5e1433becb9eeff2afe8dcb4a5798977bf5ba0dd Signed-off-by: Diogenes Nunez Reviewed-on: http://review.gluster.org/15031 Reviewed-by: Milind Changire Reviewed-by: Dan Lambright Tested-by: Dan Lambright CentOS-regression: Gluster Build System NetBSD-regression: NetBSD Build System Smoke: Gluster Build System --- xlators/cluster/dht/src/dht-shared.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'xlators/cluster/dht/src/dht-shared.c') diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 873ced53eec..f410f71b5a6 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -148,7 +148,7 @@ dht_priv_dump (xlator_t *this) gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed); gf_proc_dump_write("gen", "%d", conf->gen); gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk); - gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes); + gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes); gf_proc_dump_write("disk_unit", "%c", conf->disk_unit); gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval); gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); @@ -433,14 +433,14 @@ dht_reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("lookup-optimize", conf->lookup_optimize, options, bool, out); - GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, + GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, percent_or_size, out); /* option can be any one of percent or bytes */ conf->disk_unit = 0; if (conf->min_free_disk < 100.0) conf->disk_unit = 'p'; - GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, + GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, percent, out); GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, @@ -711,8 +711,8 @@ dht_init (xlator_t *this) GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err); - GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, - err); + GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, + err); GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent, err); @@ -901,7 +901,7 @@ struct volume_options options[] = { "process starts balancing out the cluster, and logs will appear " "in log files", }, - { .key = {"min-free-inodes"}, + { .key = {"min-free-inodes"}, .type = GF_OPTION_TYPE_PERCENT, .default_value = "5%", .description = "after system has only N% of inodes, warnings " @@ -1038,6 +1038,20 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_STR, .default_value = "test", }, + { .key = {"tier-compact"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, + { .key = {"tier-hot-compact-frequency"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "604800", + .description = "Frequency to compact DBs on hot tier in system" + }, + { .key = {"tier-cold-compact-frequency"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "604800", + .description = "Frequency to compact DBs on cold tier in system" + }, { .key = {"tier-max-mb"}, .type = GF_OPTION_TYPE_INT, .default_value = "4000", -- cgit