summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDiogenes Nunez <dnunez@redhat.com>2016-07-27 11:09:47 -0400
committerDan Lambright <dlambrig@redhat.com>2016-09-04 18:37:57 -0700
commit261c035c7d0cd1639cc8bd0ead82c30efcc0e93f (patch)
treeaf3a2e498023e7ad8af417312b83ce2f969ef738
parent6459fc812219551291e4be426ed8ecf2c90813a4 (diff)
cluster/tier: Adding compaction option for metadata databases
Problem: As metadata in the database fills up, querying the database take a long time. As a result, tier migration slows down. To counteract this, we added a way to enable the compaction methods of the underlying database. The goal is to reduce the size of the underlying file by eliminating database fragmentation. NOTE: There is currently a bug where sometimes a brick will attempt to activate compaction. This happens even compaction is already turned on. The cause is narrowed down to the compact_mode_switch flipping its value. Changes: libglusterfs/src/gfdb - Added a gfdb function to compact the underlying database, compact_db() This is a no-op if the database has no such option. - Added a compaction function for SQLite3 that does the following 1) Changes the auto_vacuum pragma of the database 2) Compacts the database according to the type of compaction requested - Compaction type can be changed by changing the macro GF_SQL_COMPACT_DEF to one of the 4 compaction types in gfdb_sqlite3.h It is currently set to GF_SQL_COMPACT_INCR, or incremental vacuuming. xlators/cluster/dht/src - Added the following command-line option to enable SQLite3 compaction. gluster volume set <vol-name> tier-compact <off|on> - Added the following command-line option to change the frequency the hot and cold tier are ordered to compact. gluster volume set <vol-name> tier-hot-compact-frequency <int> gluster volume set <vol-name> tier-cold-compact-frequency <int> - tier daemon periodically sends the (new) GFDB_IPC_CTR_SET_COMPACT_PRAGMA IPC to the CTR xlator. The IPC triggers compaction of the database. The inputs are both gf_boolean_t. IPC Input: compact_active: Is compaction currently on for the db. compact_mode_switched: Did we flip the compaction switch recently? IPC Output: 0 if the compaction succeeds. Non-zero otherwise. xlators/features/changetimerecorder/src/ - When the CTR gets the compaction IPC, it launches a thread that will perform the compaction. The IPC ends after the thread is launched. To avoid extra allocations, the parameters are passed using static variables. Change-Id: I5e1433becb9eeff2afe8dcb4a5798977bf5ba0dd Signed-off-by: Diogenes Nunez <dnunez@redhat.com> Reviewed-on: http://review.gluster.org/15031 Reviewed-by: Milind Changire <mchangir@redhat.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Dan Lambright <dlambrig@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.c40
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.h19
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store_types.h50
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.c187
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.h21
-rw-r--r--libglusterfs/src/libglusterfs-messages.h23
-rw-r--r--xlators/cluster/dht/src/dht-common.h31
-rw-r--r--xlators/cluster/dht/src/dht-shared.c26
-rw-r--r--xlators/cluster/dht/src/tier.c554
-rw-r--r--xlators/cluster/dht/src/tier.h6
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.c146
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c59
14 files changed, 1057 insertions, 110 deletions
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c
index 9c042f9e82e..cb567503fa3 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store.c
+++ b/libglusterfs/src/gfdb/gfdb_data_store.c
@@ -433,6 +433,43 @@ delete_record (gfdb_conn_node_t *_conn_node,
return ret;
}
+/*Libgfdb API Function: Compact the database.
+ *
+ * Arguments:
+ * _conn_node : GFDB Connection node
+ * _compact_active : Is compaction currently on?
+ * _compact_mode_switched : Was the compaction switch flipped?
+ * Returns : if successful return 0 or
+ * -ve value in case of failure*/
+int
+compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,
+ gf_boolean_t _compact_mode_switched)
+{
+ int ret = 0;
+ gfdb_db_operations_t *db_operations_t = NULL;
+ void *gf_db_connection = NULL;
+
+ CHECK_CONN_NODE(_conn_node);
+
+ db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
+ gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
+
+ if (db_operations_t->compact_db_op) {
+
+ ret = db_operations_t->compact_db_op (gf_db_connection,
+ _compact_active,
+ _compact_mode_switched);
+ if (ret) {
+ gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,
+ LG_MSG_COMPACT_FAILED, "Compaction operation "
+ "failed");
+ }
+
+ }
+
+ return ret;
+}
+
@@ -835,5 +872,8 @@ void get_gfdb_methods (gfdb_methods_t *methods)
/* Link info related functions */
methods->gfdb_link_info_new = gfdb_link_info_new;
methods->gfdb_link_info_free = gfdb_link_info_free;
+
+ /* Compaction related functions */
+ methods->compact_db = compact_db;
}
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h
index eacb8527034..0aac4611153 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store.h
+++ b/libglusterfs/src/gfdb/gfdb_data_store.h
@@ -31,7 +31,7 @@
#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op"
#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm"
#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version"
-
+#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma"
/*
* CTR IPC INPUT/OUTPUT
*
@@ -348,6 +348,21 @@ typedef int (*set_db_params_t)(gfdb_conn_node_t *db_conn,
char *param_key,
char *param_value);
+/*Libgfdb API Function: Compact the database.
+ *
+ * Arguments:
+ * _conn_node : GFDB Connection node
+ * _compact_active : Is compaction currently on?
+ * _compact_mode_switched : Was the compaction switch flipped?
+ * Returns : if successful return 0 or
+ * -ve value in case of failure*/
+int
+compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,
+ gf_boolean_t _compact_mode_switched);
+
+typedef int (*compact_db_t)(gfdb_conn_node_t *db_conn,
+ gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched);
typedef struct gfdb_methods_s {
@@ -377,6 +392,8 @@ typedef struct gfdb_methods_s {
gfdb_link_info_new_t gfdb_link_info_new;
gfdb_link_info_free_t gfdb_link_info_free;
+ /* Compaction related functions */
+ compact_db_t compact_db;
} gfdb_methods_t;
void get_gfdb_methods (gfdb_methods_t *methods);
diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h
index 1acbdf2f99f..d0c96370eb8 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store_types.h
+++ b/libglusterfs/src/gfdb/gfdb_data_store_types.h
@@ -40,7 +40,8 @@ typedef enum gf_db_operation {
GFDB_W_DELETE_DB_OP,
GFDB_UW_DELETE_DB_OP,
GFDB_WFC_UPDATE_DB_OP,
- GFDB_RFC_UPDATE_DB_OP
+ GFDB_RFC_UPDATE_DB_OP,
+ GFDB_DB_COMPACT_DB_OP /* Added for VACUUM/manual compaction support */
} gf_db_operation_t;
@@ -81,19 +82,12 @@ gfdb_time_2_usec(gfdb_time_t *gfdb_time)
return ((uint64_t) gfdb_time->tv_sec * GFDB_MICROSEC) + gfdb_time->tv_usec;
}
-
-
-
-
/******************************************************************************
*
* Insert/Update Record related data structures/functions
*
* ****************************************************************************/
-
-
-
/*Indicated a generic synchronous write to the db
* This may or may not be implemented*/
typedef enum gfdb_sync_type {
@@ -123,11 +117,6 @@ out:
return ret;
}
-
-
-
-
-
/*Indicated different types of db*/
typedef enum gfdb_db_type {
GFDB_INVALID_DB = -1,
@@ -165,12 +154,6 @@ out:
return ret;
}
-
-
-
-
-
-
/*Tells the path of the fop*/
typedef enum gfdb_fop_path {
GFDB_FOP_INVALID = -1,
@@ -206,12 +189,6 @@ isunwindpath(gfdb_fop_path_t gfdb_fop_path)
return (gfdb_fop_path >= GFDB_FOP_UNWIND) ? _gf_true : _gf_false;
}
-
-
-
-
-
-
/*Tell what type of fop it was
* Like whether a dentry fop or a inode fop
* Read fop or a write fop etc*/
@@ -258,12 +235,6 @@ isdentrycreatefop(gfdb_fop_type_t fop_type)
_gf_true : _gf_false;
}
-
-
-
-
-
-
/*The structure that is used to send insert/update the databases
* using insert_db api*/
typedef struct gfdb_db_record {
@@ -374,6 +345,20 @@ typedef int
+/*Used to compact the database
+ * Arguments:
+ * db_conn : GFDB Connection node
+ * compact_active : Is compaction currently on?
+ * compact_mode_switched : Was the compaction switch flipped?
+ * Returns : if successful return 0 or
+ * -ve value in case of failure*/
+typedef int
+(*gfdb_compact_db_t)(void *db_conn, gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched);
+
+
+
+
/* Query all the records from the database
* Arguments:
* db_conn : plugin specific data base connection
@@ -502,6 +487,7 @@ typedef struct gfdb_db_operations {
gfdb_fini_db_t fini_db_op;
gfdb_insert_record_t insert_record_op;
gfdb_delete_record_t delete_record_op;
+ gfdb_compact_db_t compact_db_op;
gfdb_find_all_t find_all_op;
gfdb_find_unchanged_for_time_t find_unchanged_for_time_op;
gfdb_find_recently_changed_files_t find_recently_changed_files_op;
@@ -598,5 +584,3 @@ typedef struct gfdb_connection {
#endif
-
-
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c
index 04781be562a..094028361c5 100644
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.c
+++ b/libglusterfs/src/gfdb/gfdb_sqlite3.c
@@ -239,6 +239,7 @@ gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops)
gfdb_db_ops->insert_record_op = gf_sqlite3_insert;
gfdb_db_ops->delete_record_op = gf_sqlite3_delete;
+ gfdb_db_ops->compact_db_op = gf_sqlite3_vacuum;
gfdb_db_ops->find_all_op = gf_sqlite3_find_all;
gfdb_db_ops->find_unchanged_for_time_op =
@@ -1327,10 +1328,14 @@ gf_sqlite3_pragma (void *db_conn, char *pragma_key, char **pragma_value)
goto out;
}
- ret = gf_asprintf (pragma_value, "%s", sqlite3_column_text (pre_stmt, 0));
- if (ret <= 0) {
- gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to get %s from db", pragma_key);
+ if (pragma_value) {
+ ret = gf_asprintf (pragma_value, "%s",
+ sqlite3_column_text (pre_stmt, 0));
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_QUERY_FAILED, "Failed to get %s from db",
+ pragma_key);
+ }
}
ret = 0;
@@ -1382,3 +1387,177 @@ out:
return ret;
}
+
+/* Function to vacuum of sqlite db
+ * Input:
+ * void *db_conn : Sqlite connection
+ * gf_boolean_t compact_active : Is compaction on?
+ * gf_boolean_t compact_mode_switched : Did we just flip the compaction swtich?
+ * Return:
+ * On success return 0
+ * On failure return -1
+ * */
+int
+gf_sqlite3_vacuum (void *db_conn, gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched)
+{
+ int ret = -1;
+ gf_sql_connection_t *sql_conn = db_conn;
+ char *sqlstring = NULL;
+ char *sql_strerror = NULL;
+ gf_boolean_t changing_pragma = _gf_true;
+
+ CHECK_SQL_CONN (sql_conn, out);
+
+ if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_NONE) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS,
+ "VACUUM type is off: no VACUUM to do");
+ goto out;
+ }
+
+ if (compact_mode_switched) {
+ if (compact_active) { /* Then it was OFF before.
+ So turn everything on */
+ ret = 0;
+ switch (GF_SQL_COMPACT_DEF) {
+ case GF_SQL_COMPACT_FULL:
+ ret = gf_sqlite3_set_pragma (db_conn,
+ "auto_vacuum",
+ GF_SQL_AV_FULL);
+ break;
+ case GF_SQL_COMPACT_INCR:
+ ret = gf_sqlite3_set_pragma (db_conn,
+ "auto_vacuum",
+ GF_SQL_AV_INCR);
+ break;
+ case GF_SQL_COMPACT_MANUAL:
+ changing_pragma = _gf_false;
+ default:
+ ret = -1;
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_COMPACT_FAILED,
+ "VACUUM type undefined");
+ goto out;
+ break;
+ }
+
+ } else { /* Then it was ON before, so turn it all off */
+ if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_FULL ||
+ GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_INCR) {
+ ret = gf_sqlite3_set_pragma (db_conn,
+ "auto_vacuum",
+ GF_SQL_AV_NONE);
+ } else {
+ changing_pragma = _gf_false;
+ }
+ }
+
+ if (ret) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_TRACE, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed to set the pragma");
+ goto out;
+ }
+
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS, "Turning compaction %i",
+ GF_SQL_COMPACT_DEF);
+
+ /* If we move from an auto_vacuum scheme to off, */
+ /* or vice-versa, we must VACUUM to save the change. */
+ /* In the case of a manual VACUUM scheme, we might as well */
+ /* run a manual VACUUM now if we */
+ if (changing_pragma || compact_active) {
+ ret = gf_asprintf (&sqlstring, "VACUUM;");
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed allocating memory");
+ goto out;
+ }
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS, "Sealed with a VACUUM");
+ }
+ } else { /* We are active, so it's time to VACUUM */
+ if (!compact_active) { /* Did we somehow enter an inconsistent
+ state? */
+ ret = -1;
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Tried to VACUUM when compaction inactive");
+ goto out;
+ }
+
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0,
+ LG_MSG_COMPACT_STATUS,
+ "Doing regular vacuum of type %i", GF_SQL_COMPACT_DEF);
+
+ switch (GF_SQL_COMPACT_DEF) {
+ case GF_SQL_COMPACT_INCR: /* INCR auto_vacuum */
+ ret = gf_asprintf(&sqlstring,
+ "PRAGMA incremental_vacuum;");
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed allocating memory");
+ goto out;
+ }
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS,
+ "Will commence an incremental VACUUM");
+ break;
+ /* (MANUAL) Invoke the VACUUM command */
+ case GF_SQL_COMPACT_MANUAL:
+ ret = gf_asprintf(&sqlstring, "VACUUM;");
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed allocating memory");
+ goto out;
+ }
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS,
+ "Will commence a VACUUM");
+ break;
+ /* (FULL) The database does the compaction itself. */
+ /* We cannot do anything else, so we can leave */
+ /* without sending anything to the database */
+ case GF_SQL_COMPACT_FULL:
+ ret = 0;
+ goto success;
+ /* Any other state must be an error. Note that OFF */
+ /* cannot hit this statement since we immediately leave */
+ /* in that case */
+ default:
+ ret = -1;
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_COMPACT_FAILED,
+ "VACUUM type undefined");
+ goto out;
+ break;
+ }
+ }
+
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_COMPACT_STATUS,
+ "SQLString == %s", sqlstring);
+
+ ret = sqlite3_exec(sql_conn->sqlite3_db_conn, sqlstring, NULL, NULL,
+ &sql_strerror);
+
+ if (ret != SQLITE_OK) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_GET_RECORD_FAILED, "Failed to vacuum "
+ "the db : %s", sqlite3_errmsg (db_conn));
+ ret = -1;
+ goto out;
+ }
+success:
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
+ compact_mode_switched ? "Successfully changed VACUUM on/off"
+ : "DB successfully VACUUM");
+out:
+ GF_FREE(sqlstring);
+
+ return ret;
+}
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h
index 9d0d996a322..4d70a60e431 100644
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.h
+++ b/libglusterfs/src/gfdb/gfdb_sqlite3.h
@@ -73,8 +73,7 @@ do {\
#define GF_SQL_AV_NONE "none"
#define GF_SQL_AV_FULL "full"
-#define GF_SQL_AV_INCR "incr"
-
+#define GF_SQL_AV_INCR "incremental"
#define GF_SQL_SYNC_OFF "off"
#define GF_SQL_SYNC_NORMAL "normal"
@@ -87,7 +86,12 @@ do {\
#define GF_SQL_JM_WAL "wal"
#define GF_SQL_JM_OFF "off"
+#define GF_SQL_COMPACT_NONE 0
+#define GF_SQL_COMPACT_FULL 1
+#define GF_SQL_COMPACT_INCR 2
+#define GF_SQL_COMPACT_MANUAL 3
+#define GF_SQL_COMPACT_DEF GF_SQL_COMPACT_INCR
typedef enum gf_sql_auto_vacuum {
gf_sql_av_none = 0,
gf_sql_av_full,
@@ -319,7 +323,18 @@ int gf_sqlite3_pragma (void *db_conn, char *pragma_key, char **pragma_value);
int
gf_sqlite3_set_pragma (void *db_conn, char *pragma_key, char *pragma_value);
-
+/* Function to vacuum of sqlite db
+ * Input:
+ * void *db_conn : Sqlite connection
+ * gf_boolean_t compact_active : Is compaction on?
+ * gf_boolean_t compact_mode_switched : Did we just flip the compaction swtich?
+ * Return:
+ * On success return 0
+ * On failure return -1
+ * */
+int
+gf_sqlite3_vacuum (void *db_conn, gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched);
void gf_sqlite3_fill_db_operations (gfdb_db_operations_t *gfdb_db_ops);
diff --git a/libglusterfs/src/libglusterfs-messages.h b/libglusterfs/src/libglusterfs-messages.h
index d2ad44e470e..29196929eb3 100644
--- a/libglusterfs/src/libglusterfs-messages.h
+++ b/libglusterfs/src/libglusterfs-messages.h
@@ -36,7 +36,9 @@
*/
#define GLFS_LG_BASE GLFS_MSGID_COMP_LIBGLUSTERFS
-#define GLFS_LG_NUM_MESSAGES 207
+
+#define GLFS_LG_NUM_MESSAGES 209
+
#define GLFS_LG_MSGID_END (GLFS_LG_BASE + GLFS_LG_NUM_MESSAGES + 1)
/* Messaged with message IDs */
#define glfs_msg_start_lg GLFS_LG_BASE, "Invalid: Start of messages"
@@ -1762,6 +1764,7 @@
* @recommendedaction
*
*/
+
#define LG_MSG_INVALID_INODE_LIST (GLFS_LG_BASE + 207)
/*!
@@ -1770,6 +1773,24 @@
* @recommendedaction
*
*/
+
+#define LG_MSG_COMPACT_FAILED (GLFS_LG_BASE + 208)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define LG_MSG_COMPACT_STATUS (GLFS_LG_BASE + 209)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
/*------------*/
#define glfs_msg_end_lg GLFS_LG_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 3717a68273c..da1bcb6a4a1 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -310,7 +310,7 @@ typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
double avail_percent;
- double avail_inodes;
+ double avail_inodes;
uint64_t avail_space;
uint32_t log;
uint32_t chunks;
@@ -325,10 +325,10 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_START_FORCE = 1 + 4,
GF_DEFRAG_CMD_START_TIER = 1 + 5,
GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
- GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
- GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
+ GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
+ GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
+ GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
+ GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
};
typedef enum gf_defrag_type gf_defrag_type;
@@ -398,9 +398,26 @@ typedef struct gf_tier_conf {
uint64_t max_migrate_bytes;
int max_migrate_files;
tier_mode_t mode;
+ /* These flags are only used for tier-compact */
+ gf_boolean_t compact_active;
+ /* These 3 flags are set to true when the client changes the */
+ /* compaction mode on the command line. */
+ /* When they are set, the daemon will trigger compaction as */
+ /* soon as possible to activate or deactivate compaction. */
+ /* If in the middle of a compaction, then the switches take */
+ /* effect on the next compaction, not the current one. */
+ /* If the user switches it off, we want to avoid needless */
+ /* compactions. */
+ /* If the user switches it on, they want to compact as soon */
+ /* as possible. */
+ gf_boolean_t compact_mode_switched;
+ gf_boolean_t compact_mode_switched_hot;
+ gf_boolean_t compact_mode_switched_cold;
int tier_max_promote_size;
int tier_promote_frequency;
int tier_demote_frequency;
+ int tier_compact_hot_frequency;
+ int tier_compact_cold_frequency;
uint64_t st_last_promoted_size;
uint64_t st_last_demoted_size;
tier_pause_state_t pause_state;
@@ -1023,9 +1040,9 @@ int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata);
int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t mode, off_t offset, size_t len, dict_t *xdata);
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, size_t len, dict_t *xdata);
+ off_t offset, size_t len, dict_t *xdata);
int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t offset, off_t len, dict_t *xdata);
int32_t dht_ipc (call_frame_t *frame, xlator_t *this, int32_t op,
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 873ced53eec..f410f71b5a6 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -148,7 +148,7 @@ dht_priv_dump (xlator_t *this)
gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
gf_proc_dump_write("gen", "%d", conf->gen);
gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
- gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
@@ -433,14 +433,14 @@ dht_reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("lookup-optimize", conf->lookup_optimize, options,
bool, out);
- GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
+ GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
percent_or_size, out);
/* option can be any one of percent or bytes */
conf->disk_unit = 0;
if (conf->min_free_disk < 100.0)
conf->disk_unit = 'p';
- GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
+ GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
percent, out);
GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
@@ -711,8 +711,8 @@ dht_init (xlator_t *this)
GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
- GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
+ GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
+ err);
GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
err);
@@ -901,7 +901,7 @@ struct volume_options options[] = {
"process starts balancing out the cluster, and logs will appear "
"in log files",
},
- { .key = {"min-free-inodes"},
+ { .key = {"min-free-inodes"},
.type = GF_OPTION_TYPE_PERCENT,
.default_value = "5%",
.description = "after system has only N% of inodes, warnings "
@@ -1038,6 +1038,20 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.default_value = "test",
},
+ { .key = {"tier-compact"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"tier-hot-compact-frequency"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "604800",
+ .description = "Frequency to compact DBs on hot tier in system"
+ },
+ { .key = {"tier-cold-compact-frequency"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "604800",
+ .description = "Frequency to compact DBs on cold tier in system"
+ },
{ .key = {"tier-max-mb"},
.type = GF_OPTION_TYPE_INT,
.default_value = "4000",
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 0d53a62d327..7e5e1004b84 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -1240,15 +1240,15 @@ tier_process_ctr_query (tier_brick_list_t *local_brick, void *args)
gfdb_brick_info->time_stamp,
sizeof (gfdb_time_t));
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
- GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_QUERY_OPS,
- ret, out);
+ SET_DB_PARAM_TO_DICT (this->name, ctr_ipc_in_dict,
+ GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_QUERY_OPS,
+ ret, out);
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
- GFDB_IPC_CTR_GET_QFILE_PATH,
- local_brick->qfile_path,
- ret, out);
+ SET_DB_PARAM_TO_DICT (this->name, ctr_ipc_in_dict,
+ GFDB_IPC_CTR_GET_QFILE_PATH,
+ local_brick->qfile_path,
+ ret, out);
ret = dict_set_bin (ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
ipc_ctr_params, sizeof (*ipc_ctr_params));
@@ -1360,7 +1360,7 @@ tier_process_brick (tier_brick_list_t *local_brick, void *args) {
gf_msg ("tier", GF_LOG_ERROR, 0,
LG_MSG_SET_PARAM_FAILED, "Failed to set %s "
"to params dictionary",
- GFDB_IPC_CTR_GET_DB_KEY);\
+ GFDB_IPC_CTR_GET_DB_KEY);
goto out;
}
@@ -1442,11 +1442,12 @@ tier_build_migration_qfile (migration_args_t *args,
}
time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
- /* The migration daemon may run a varrying numberof usec after the sleep */
- /* call triggers. A file may be registered in CTR some number of usec X */
- /* after the daemon started and missed in the subsequent cycle if the */
- /* daemon starts Y usec after the period in seconds where Y>X. Normalize */
- /* away this problem by always setting usec to 0. */
+ /* The migration daemon may run a varying numberof usec after the */
+ /* sleep call triggers. A file may be registered in CTR some number */
+ /* of usec X after the daemon started and missed in the subsequent */
+ /* cycle if the daemon starts Y usec after the period in seconds */
+ /* where Y>X. Normalize away this problem by always setting usec */
+ /* to 0. */
time_in_past.tv_usec = 0;
gfdb_brick_info.time_stamp = &time_in_past;
@@ -1649,6 +1650,265 @@ out:
return ret;
}
+
+/*
+ * Command the CTR on a brick to compact the local database using an IPC
+ */
+static int
+tier_process_self_compact (tier_brick_list_t *local_brick, void *args)
+{
+ int ret = -1;
+ char *db_path = NULL;
+ query_cbk_args_t *query_cbk_args = NULL;
+ xlator_t *this = NULL;
+ gfdb_conn_node_t *conn_node = NULL;
+ dict_t *params_dict = NULL;
+ dict_t *ctr_ipc_dict = NULL;
+ gfdb_brick_info_t *gfdb_brick_info = args;
+ int is_changing = -1;
+
+ /*Init of all the essentials*/
+ GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_info , out);
+ query_cbk_args = gfdb_brick_info->_query_cbk_args;
+
+ GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);
+ this = query_cbk_args->this;
+
+ GF_VALIDATE_OR_GOTO (this->name,
+ gfdb_brick_info->_query_cbk_args, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out);
+
+ db_path = local_brick->brick_db_path;
+
+ /*Preparing DB parameters before init_db i.e getting db connection*/
+ params_dict = dict_new ();
+ if (!params_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "DB Params cannot initialized");
+ goto out;
+ }
+ SET_DB_PARAM_TO_DICT (this->name, params_dict,
+ (char *) gfdb_methods.get_db_path_key(), db_path,
+ ret, out);
+
+ /*Get the db connection*/
+ conn_node = gfdb_methods.init_db ((void *)params_dict,
+ dht_tier_db_type);
+ if (!conn_node) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "FATAL: Failed initializing db operations");
+ goto out;
+ }
+
+ ret = 0;
+
+ /*Preparing ctr_ipc_dict*/
+ ctr_ipc_dict = dict_new ();
+ if (!ctr_ipc_dict) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "ctr_ipc_dict cannot initialized");
+ goto out;
+ }
+
+ ret = dict_set_int32 (ctr_ipc_dict, "compact_active",
+ query_cbk_args->defrag->
+ tier_conf.compact_active);
+
+ if (ret) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ LG_MSG_SET_PARAM_FAILED, "Failed to set %s "
+ "to params dictionary",
+ "compact_active");
+ goto out;
+ }
+
+ ret = dict_set_int32 (ctr_ipc_dict, "compact_mode_switched",
+ query_cbk_args->defrag->
+ tier_conf.compact_mode_switched);
+
+ if (ret) {
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ LG_MSG_SET_PARAM_FAILED, "Failed to set %s "
+ "to params dictionary",
+ "compact_mode_switched");
+ goto out;
+ }
+
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict,
+ GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_SET_COMPACT_PRAGMA,
+ ret, out);
+
+ gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
+ "Starting Compaction IPC");
+
+ ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
+ NULL);
+
+ gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
+ "Ending Compaction IPC");
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Failed compaction "
+ "on db %s error %d", local_brick->brick_db_path, ret);
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
+ "SUCCESS: %s Compaction", local_brick->brick_name);
+
+ ret = 0;
+out:
+ if (params_dict) {
+ dict_unref (params_dict);
+ params_dict = NULL;
+ }
+
+ if (ctr_ipc_dict) {
+ dict_unref (ctr_ipc_dict);
+ ctr_ipc_dict = NULL;
+ }
+
+ gfdb_methods.fini_db (conn_node);
+
+ return ret;
+}
+
+/*
+ * This is the call back function for each brick from hot/cold bricklist.
+ * It determines the database type on each brick and calls the corresponding
+ * function to prepare the compaction IPC.
+ */
+static int
+tier_compact_db_brick (tier_brick_list_t *local_brick, void *args)
+{
+ int ret = -1;
+ char *strval = NULL;
+
+ GF_VALIDATE_OR_GOTO ("tier", local_brick, out);
+
+ GF_VALIDATE_OR_GOTO ("tier", local_brick->xlator, out);
+
+ ret = tier_process_self_compact (local_brick, args);
+ if (ret) {
+ gf_msg ("tier", GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Brick %s did not compact",
+ local_brick->brick_name);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+
+ return ret;
+}
+
+static int
+tier_send_compact (migration_args_t *args,
+ query_cbk_args_t *query_cbk_args)
+{
+ gfdb_time_t current_time;
+ gfdb_brick_info_t gfdb_brick_info;
+ gfdb_time_t time_in_past;
+ int ret = -1;
+ tier_brick_list_t *local_brick = NULL;
+
+ time_in_past.tv_sec = args->freq_time;
+ time_in_past.tv_usec = 0;
+
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_msg (args->this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED,
+ "Failed to get current time");
+ goto out;
+ }
+ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
+
+ /* The migration daemon may run a varying numberof usec after the sleep
+ call triggers. A file may be registered in CTR some number of usec X
+ after the daemon started and missed in the subsequent cycle if the
+ daemon starts Y usec after the period in seconds where Y>X. Normalize
+ away this problem by always setting usec to 0. */
+ time_in_past.tv_usec = 0;
+
+ gfdb_brick_info.time_stamp = &time_in_past;
+
+ /* This is meant to say we are always compacting at this point */
+ /* We simply borrow the promotion flag to do this */
+ gfdb_brick_info._gfdb_promote = 1;
+
+ gfdb_brick_info._query_cbk_args = query_cbk_args;
+
+ list_for_each_entry (local_brick, args->brick_list, list) {
+
+ gf_msg (args->this->name, GF_LOG_TRACE, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Start compaction for %s",
+ local_brick->brick_name);
+
+ ret = tier_compact_db_brick (local_brick,
+ &gfdb_brick_info);
+ if (ret) {
+ gf_msg (args->this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_BRICK_QUERY_FAILED,
+ "Brick %s compaction failed\n",
+ local_brick->brick_db_path);
+ }
+
+ gf_msg (args->this->name, GF_LOG_TRACE, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "End compaction for %s",
+ local_brick->brick_name);
+
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+tier_compact (void *args)
+{
+ int ret = -1;
+ query_cbk_args_t query_cbk_args;
+ migration_args_t *compaction_args = args;
+
+ GF_VALIDATE_OR_GOTO ("tier", compaction_args->this, out);
+ GF_VALIDATE_OR_GOTO (compaction_args->this->name,
+ compaction_args->brick_list, out);
+ GF_VALIDATE_OR_GOTO (compaction_args->this->name,
+ compaction_args->defrag, out);
+
+ THIS = compaction_args->this;
+
+ query_cbk_args.this = compaction_args->this;
+ query_cbk_args.defrag = compaction_args->defrag;
+ query_cbk_args.is_compaction = 1;
+
+ /* Send the compaction pragma out to all the bricks on the bricklist. */
+ /* tier_get_bricklist ensures all bricks on the list are local to */
+ /* this node. */
+ ret = tier_send_compact (compaction_args, &query_cbk_args);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ compaction_args->return_value = ret;
+ return ret;
+ }
+
static int
tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head)
{
@@ -1755,6 +2015,18 @@ tier_get_freq_promote (gf_tier_conf_t *tier_conf)
return tier_conf->tier_promote_frequency;
}
+int
+tier_get_freq_compact_hot (gf_tier_conf_t *tier_conf)
+{
+ return tier_conf->tier_compact_hot_frequency;
+}
+
+int
+tier_get_freq_compact_cold (gf_tier_conf_t *tier_conf)
+{
+ return tier_conf->tier_compact_cold_frequency;
+}
+
static int
tier_check_demote (gfdb_time_t current_time, int freq)
{
@@ -1776,8 +2048,21 @@ tier_check_promote (gf_tier_conf_t *tier_conf,
_gf_true : _gf_false;
}
+static gf_boolean_t
+tier_check_compact (gf_tier_conf_t *tier_conf,
+ gfdb_time_t current_time,
+ int freq_compact)
+{
+
+ if (!(tier_conf->compact_active ||
+ tier_conf->compact_mode_switched))
+ return _gf_false;
+ return ((current_time.tv_sec % freq_compact) == 0) ?
+ _gf_true : _gf_false;
+}
+
void
clear_bricklist (struct list_head *brick_list)
@@ -1824,6 +2109,72 @@ out:
return;
}
+static int
+tier_prepare_compact (migration_args_t *args, gfdb_time_t current_time)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ gf_tier_conf_t *tier_conf = NULL;
+ gf_boolean_t is_hot_tier = _gf_false;
+ int freq = 0;
+ int ret = -1;
+ const char *tier_type = is_hot_tier ? "hot" : "cold";
+
+ this = args->this;
+
+ conf = this->private;
+
+ defrag = conf->defrag;
+
+ tier_conf = &defrag->tier_conf;
+
+ is_hot_tier = args->is_hot_tier;
+
+ freq = is_hot_tier ? tier_get_freq_compact_hot (tier_conf) :
+ tier_get_freq_compact_cold (tier_conf);
+
+ defrag->tier_conf.compact_mode_switched = is_hot_tier ?
+ defrag->tier_conf.compact_mode_switched_hot :
+ defrag->tier_conf.compact_mode_switched_cold;
+
+ gf_msg(this->name, GF_LOG_TRACE, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Compact mode %i",
+ defrag->tier_conf.compact_mode_switched);
+
+ if (tier_check_compact (tier_conf, current_time,
+ freq)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Start compaction on %s tier",
+ tier_type);
+
+ args->freq_time = freq;
+ ret = tier_compact (args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR, "Compaction failed on "
+ "%s tier", tier_type);
+ goto out;
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
+ "End compaction on %s tier", tier_type);
+
+ if (is_hot_tier) {
+ defrag->tier_conf.compact_mode_switched_hot =
+ _gf_false;
+ } else {
+ defrag->tier_conf.compact_mode_switched_cold =
+ _gf_false;
+ }
+ }
+
+out:
+ return ret;
+}
+
/*
* Main tiering loop. This is called from the promotion and the
* demotion threads spawned in tier_start().
@@ -1846,8 +2197,9 @@ static void
int check_watermark = 0;
gf_defrag_info_t *defrag = NULL;
xlator_t *this = NULL;
+ struct list_head *bricklist_temp = NULL;
migration_args_t *args = in_args;
-
+ gf_boolean_t compacted = _gf_false;
GF_VALIDATE_OR_GOTO ("tier", args, out);
GF_VALIDATE_OR_GOTO ("tier", args->brick_list, out);
@@ -1884,7 +2236,8 @@ static void
if (xlator != this) {
gf_msg (this->name, GF_LOG_INFO, 0,
DHT_MSG_LOG_TIER_STATUS,
- "Detected graph switch. Exiting migration daemon.");
+ "Detected graph switch. Exiting migration "
+ "daemon.");
goto out;
}
@@ -1912,10 +2265,10 @@ static void
goto out;
}
- if (gf_defrag_get_pause_state (&defrag->tier_conf) != TIER_RUNNING)
+ if (gf_defrag_get_pause_state (&defrag->tier_conf) !=
+ TIER_RUNNING)
continue;
-
/* To have proper synchronization amongst all
* brick holding nodes, so that promotion and demotions
* start atomicly w.r.t promotion/demotion frequency
@@ -1950,7 +2303,6 @@ static void
}
if (args->is_promotion) {
-
freq = tier_get_freq_promote (tier_conf);
if (tier_check_promote (tier_conf, current_time, freq)) {
@@ -1962,21 +2314,22 @@ static void
"Promotion failed");
}
}
-
+ } else if (args->is_compaction) {
+ tier_prepare_compact (args, current_time);
} else {
-
freq = tier_get_freq_demote (tier_conf);
if (tier_check_demote (current_time, freq)) {
args->freq_time = freq;
ret = tier_demote (args);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
+ gf_msg (this->name,
+ GF_LOG_ERROR,
+ 0,
DHT_MSG_LOG_TIER_ERROR,
"Demotion failed");
}
}
-
}
/* Check the statfs immediately after the processing threads
@@ -1997,11 +2350,15 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
{
pthread_t promote_thread;
pthread_t demote_thread;
+ pthread_t hot_compact_thread;
+ pthread_t cold_compact_thread;
int ret = -1;
struct list_head bricklist_hot = { 0 };
struct list_head bricklist_cold = { 0 };
migration_args_t promotion_args = { 0 };
migration_args_t demotion_args = { 0 };
+ migration_args_t hot_compaction_args = { 0 };
+ migration_args_t cold_compaction_args = { 0 };
dht_conf_t *conf = NULL;
INIT_LIST_HEAD ((&bricklist_hot));
@@ -2016,6 +2373,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
demotion_args.brick_list = &bricklist_hot;
demotion_args.defrag = defrag;
demotion_args.is_promotion = _gf_false;
+ demotion_args.is_compaction = _gf_false;
ret = pthread_create (&demote_thread,
NULL, &tier_run,
@@ -2047,6 +2405,47 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
goto waitforspawned;
}
+ hot_compaction_args.this = this;
+ hot_compaction_args.brick_list = &bricklist_hot;
+ hot_compaction_args.defrag = defrag;
+ hot_compaction_args.is_promotion = _gf_false;
+ hot_compaction_args.is_compaction = _gf_true;
+ hot_compaction_args.is_hot_tier = _gf_true;
+
+ ret = pthread_create (&hot_compact_thread,
+ NULL, &tier_run,
+ &hot_compaction_args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to start compaction thread.");
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ goto waitforspawnedpromote;
+ }
+
+ cold_compaction_args.this = this;
+ cold_compaction_args.brick_list = &bricklist_cold;
+ cold_compaction_args.defrag = defrag;
+ cold_compaction_args.is_promotion = _gf_false;
+ cold_compaction_args.is_compaction = _gf_true;
+ cold_compaction_args.is_hot_tier = _gf_false;
+
+ ret = pthread_create (&cold_compact_thread,
+ NULL, &tier_run,
+ &cold_compaction_args);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to start compaction thread.");
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ goto waitforspawnedhotcompact;
+ }
+ pthread_join (cold_compact_thread, NULL);
+
+waitforspawnedhotcompact:
+ pthread_join (hot_compact_thread, NULL);
+
+waitforspawnedpromote:
pthread_join (promote_thread, NULL);
waitforspawned:
@@ -2055,7 +2454,6 @@ waitforspawned:
cleanup:
clear_bricklist (&bricklist_cold);
clear_bricklist (&bricklist_hot);
-
return ret;
}
@@ -2167,8 +2565,8 @@ out:
return ret;
}
-static
-int tier_validate_mode (char *mode)
+static int
+tier_validate_mode (char *mode)
{
int ret = -1;
@@ -2181,6 +2579,26 @@ int tier_validate_mode (char *mode)
return ret;
}
+static gf_boolean_t
+tier_validate_compact_mode (char *mode)
+{
+ gf_boolean_t ret = _gf_false;
+
+ gf_msg ("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
+ "tier_validate_compact_mode: mode = %s", mode);
+
+ if (!strcmp (mode, "on")) {
+ ret = _gf_true;
+ } else {
+ ret = _gf_false;
+ }
+
+ gf_msg ("tier", GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "tier_validate_compact_mode: ret = %i", ret);
+
+ return ret;
+}
int
tier_init_methods (xlator_t *this)
@@ -2205,8 +2623,6 @@ err:
return ret;
}
-
-
int
tier_init (xlator_t *this)
{
@@ -2291,6 +2707,22 @@ tier_init (xlator_t *this)
defrag->tier_conf.tier_demote_frequency = freq;
ret = dict_get_int32 (this->options,
+ "tier-hot-compact-frequency", &freq);
+ if (ret) {
+ freq = DEFAULT_HOT_COMPACT_FREQ_SEC;
+ }
+
+ defrag->tier_conf.tier_compact_hot_frequency = freq;
+
+ ret = dict_get_int32 (this->options,
+ "tier-cold-compact-frequency", &freq);
+ if (ret) {
+ freq = DEFAULT_COLD_COMPACT_FREQ_SEC;
+ }
+
+ defrag->tier_conf.tier_compact_cold_frequency = freq;
+
+ ret = dict_get_int32 (this->options,
"watermark-hi", &freq);
if (ret) {
freq = DEFAULT_WM_HI;
@@ -2339,6 +2771,29 @@ tier_init (xlator_t *this)
defrag->tier_conf.max_migrate_files = freq;
ret = dict_get_str (this->options,
+ "tier-compact", &mode);
+
+ if (ret) {
+ defrag->tier_conf.compact_active = DEFAULT_COMP_MODE;
+ } else {
+ ret = tier_validate_compact_mode (mode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "tier_init failed - invalid compaction mode");
+ goto out;
+ }
+
+ /* If compaction is now active, we need to inform the bricks on
+ the hot and cold tier of this. See dht-common.h for more. */
+ defrag->tier_conf.compact_active = ret;
+ if (ret) {
+ defrag->tier_conf.compact_mode_switched_hot = _gf_true;
+ defrag->tier_conf.compact_mode_switched_cold = _gf_true;
+ }
+ }
+
+ ret = dict_get_str (this->options,
"tier-mode", &mode);
if (ret) {
defrag->tier_conf.mode = DEFAULT_TIER_MODE;
@@ -2361,7 +2816,8 @@ tier_init (xlator_t *this)
"tier-pause", &paused);
if (paused && strcmp (paused, "on") == 0)
- gf_defrag_set_pause_state (&defrag->tier_conf, TIER_REQUEST_PAUSE);
+ gf_defrag_set_pause_state (&defrag->tier_conf,
+ TIER_REQUEST_PAUSE);
ret = gf_asprintf(&voldir, "%s/%s",
DEFAULT_VAR_RUN_DIRECTORY,
@@ -2411,7 +2867,6 @@ out:
return ret;
}
-
int
tier_cli_pause_done (int op_ret, call_frame_t *sync_frame, void *data)
{
@@ -2445,17 +2900,17 @@ exit:
return ret;
}
-
int
tier_reconfigure (xlator_t *this, dict_t *options)
{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *mode = NULL;
- int migrate_mb = 0;
- gf_boolean_t req_pause = _gf_false;
- int ret = 0;
- call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ char *mode = NULL;
+ int migrate_mb = 0;
+ gf_boolean_t req_pause = _gf_false;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t last_compact_setting = _gf_false;
conf = this->private;
@@ -2489,6 +2944,28 @@ tier_reconfigure (xlator_t *this, dict_t *options)
defrag->tier_conf.watermark_low, options,
int32, out);
+ last_compact_setting = defrag->tier_conf.compact_active;
+
+ GF_OPTION_RECONF ("tier-compact",
+ defrag->tier_conf.compact_active, options,
+ bool, out);
+
+ if (last_compact_setting != defrag->tier_conf.compact_active) {
+ defrag->tier_conf.compact_mode_switched_hot = _gf_true;
+ defrag->tier_conf.compact_mode_switched_cold = _gf_true;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "compact mode switched");
+ }
+
+ GF_OPTION_RECONF ("tier-hot-compact-frequency",
+ defrag->tier_conf.tier_compact_hot_frequency,
+ options, int32, out);
+
+ GF_OPTION_RECONF ("tier-cold-compact-frequency",
+ defrag->tier_conf.tier_compact_cold_frequency,
+ options, int32, out);
+
GF_OPTION_RECONF ("tier-mode",
mode, options,
str, out);
@@ -2558,7 +3035,6 @@ class_methods_t class_methods = {
.notify = dht_notify
};
-
struct xlator_fops fops = {
.lookup = dht_lookup,
@@ -2611,9 +3087,7 @@ struct xlator_fops fops = {
.zerofill = dht_zerofill,
};
-
struct xlator_cbks cbks = {
.release = dht_release,
.forget = dht_forget
};
-
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
index 0807608fda2..ffb04173bd5 100644
--- a/xlators/cluster/dht/src/tier.h
+++ b/xlators/cluster/dht/src/tier.h
@@ -54,6 +54,7 @@ typedef struct _query_cbk_args {
/* This is write */
int query_fd;
int is_promotion;
+ int is_compaction;
/* This is for read */
tier_qfile_array_t *qfile_array;
} query_cbk_args_t;
@@ -82,6 +83,8 @@ typedef struct _dm_thread_args {
int freq_time;
int return_value;
int is_promotion;
+ int is_compaction;
+ gf_boolean_t is_hot_tier;
} migration_args_t;
typedef enum tier_watermark_op_ {
@@ -93,12 +96,15 @@ typedef enum tier_watermark_op_ {
#define DEFAULT_PROMOTE_FREQ_SEC 120
#define DEFAULT_DEMOTE_FREQ_SEC 120
+#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
+#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
#define DEFAULT_DEMOTE_DEGRADED 10
#define DEFAULT_WRITE_FREQ_SEC 0
#define DEFAULT_READ_FREQ_SEC 0
#define DEFAULT_WM_LOW 75
#define DEFAULT_WM_HI 90
#define DEFAULT_TIER_MODE TIER_MODE_TEST
+#define DEFAULT_COMP_MODE _gf_true
#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
index 5f3a074acd5..933f496028c 100644
--- a/xlators/features/changetimerecorder/src/changetimerecorder.c
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.c
@@ -15,6 +15,8 @@
#include "ctr-messages.h"
#include "syscall.h"
+#include "changetimerecorder.h"
+
/*******************************inode forget***********************************/
int
@@ -1789,6 +1791,61 @@ out:
return ret;
}
+void *
+ctr_compact_thread (void *args)
+{
+ int ret = -1;
+ void *db_conn = NULL;
+
+ xlator_t *this = NULL;
+ gf_ctr_private_t *priv = NULL;
+ gf_boolean_t compact_active = _gf_false;
+ gf_boolean_t compact_mode_switched = _gf_false;
+
+ this = (xlator_t *)args;
+
+ GF_VALIDATE_OR_GOTO("ctr", this, out);
+
+ priv = this->private;
+
+ db_conn = priv->_db_conn;
+ compact_active = priv->compact_active;
+ compact_mode_switched = priv->compact_mode_switched;
+
+ gf_msg ("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET,
+ "Starting compaction");
+
+ ret = compact_db(db_conn, compact_active,
+ compact_mode_switched);
+
+ if (ret) {
+ gf_msg ("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to perform the compaction");
+ }
+
+ ret = pthread_mutex_lock (&priv->compact_lock);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to acquire lock");
+ goto out;
+ }
+
+ /* We are done compaction on this brick. Set all flags to false */
+ priv->compact_active = _gf_false;
+ priv->compact_mode_switched = _gf_false;
+
+ ret = pthread_mutex_unlock (&priv->compact_lock);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to release lock");
+ goto out;
+ }
+
+out:
+ return NULL;
+}
int
ctr_ipc_helper (xlator_t *this, dict_t *in_dict,
@@ -1802,7 +1859,8 @@ ctr_ipc_helper (xlator_t *this, dict_t *in_dict,
char *db_param = NULL;
char *query_file = NULL;
gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
-
+ int result = 0;
+ pthread_t compact_thread;
GF_VALIDATE_OR_GOTO ("ctr", this, out);
GF_VALIDATE_OR_GOTO (this->name, this->private, out);
@@ -1888,12 +1946,78 @@ ctr_ipc_helper (xlator_t *this, dict_t *in_dict,
SET_DB_PARAM_TO_DICT(this->name, out_dict,
db_param_key,
db_param, ret, error);
+ } /* if its an attempt to compact the database */
+ else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA,
+ strlen (GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) {
+
+ ret = pthread_mutex_lock (&priv->compact_lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to acquire lock for compaction");
+ goto out;
+ }
+
+ if ((priv->compact_active || priv->compact_mode_switched)) {
+ /* Compaction in progress. LEAVE */
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Compaction already in progress.");
+ pthread_mutex_unlock (&priv->compact_lock);
+ goto out;
+ }
+ /* At this point, we should be the only one on the brick */
+ /* compacting */
+
+ /* Grab the arguments from the dictionary */
+ ret = dict_get_int32 (in_dict, "compact_active", &result);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to get compaction type");
+ goto out;
+ }
+
+ if (result) {
+ priv->compact_active = _gf_true;
+ }
+
+ ret = dict_get_int32 (in_dict, "compact_mode_switched"
+ , &result);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to see if compaction switched");
+ goto out;
+ }
+
+ if (result) {
+ priv->compact_mode_switched = _gf_true;
+ gf_msg ("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
+ "Pre-thread: Compact mode switch is true");
+ } else {
+ gf_msg ("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
+ "Pre-thread: Compact mode switch is false");
+ }
+
+ ret = pthread_mutex_unlock (&priv->compact_lock);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to release lock for compaction");
+ goto out;
+ }
+
+ ret = pthread_create (&compact_thread, NULL, ctr_compact_thread,
+ (void *)this);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
+ "Failed to spawn compaction thread");
+ goto out;
+ }
+
+ goto out;
} /* default case */
else {
goto out;
}
-
ret = 0;
goto out;
error:
@@ -2079,6 +2203,18 @@ init (xlator_t *this)
priv->ctr_lookupheal_inode_timeout =
CTR_DEFAULT_INODE_EXP_PERIOD;
+ /* For compaction */
+ priv->compact_active = _gf_false;
+ priv->compact_mode_switched = _gf_false;
+ ret_db = pthread_mutex_init (&priv->compact_lock, NULL);
+
+ if (ret_db) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ CTR_MSG_FATAL_ERROR,
+ "FATAL: Failed initializing compaction mutex");
+ goto error;
+ }
+
/*Extract ctr xlator options*/
ret_db = extract_ctr_options (this, priv);
if (ret_db) {
@@ -2123,6 +2259,7 @@ init (xlator_t *this)
goto error;
}
+
ret_db = 0;
goto out;
@@ -2185,6 +2322,11 @@ fini (xlator_t *this)
"db connection");
}
GF_FREE (priv->ctr_db_path);
+ if (pthread_mutex_destroy (&priv->compact_lock)) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ CTR_MSG_CLOSE_DB_CONN_FAILED, "Failed to "
+ "destroy the compaction mutex");
+ }
}
GF_FREE (priv);
mem_pool_destroy (this->local_pool);
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h
index d5615270184..4fd4f745f4d 100644
--- a/xlators/features/changetimerecorder/src/ctr-helper.h
+++ b/xlators/features/changetimerecorder/src/ctr-helper.h
@@ -22,6 +22,7 @@
#include "common-utils.h"
#include <time.h>
#include <sys/time.h>
+#include <pthread.h>
#include "gfdb_data_store.h"
#include "ctr-xlator-ctx.h"
@@ -52,6 +53,9 @@ typedef struct gf_ctr_private {
gfdb_conn_node_t *_db_conn;
uint64_t ctr_lookupheal_link_timeout;
uint64_t ctr_lookupheal_inode_timeout;
+ gf_boolean_t compact_active;
+ gf_boolean_t compact_mode_switched;
+ pthread_mutex_t compact_lock;
} gf_ctr_private_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index e520c69add2..2ba1876b6ec 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -4755,4 +4755,3 @@
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* !_GLUSTERD_MESSAGES_H_ */
-
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index ce34ffd2b05..d87082e9e89 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -387,6 +387,14 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
goto out;
}
goto out;
+ } else if (strstr (key, "tier-compact")) {
+ if (strcmp (value, "on") &&
+ strcmp (value, "off")) {
+ ret = -1;
+ goto out;
+ }
+
+ goto out;
}
/*
@@ -452,7 +460,9 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
strstr (key, "tier-max-mb") ||
strstr (key, "tier-max-promote-file-size") ||
strstr (key, "tier-max-files") ||
- strstr (key, "tier-demote-frequency")) {
+ strstr (key, "tier-demote-frequency") ||
+ strstr (key, "tier-hot-compact-frequency") ||
+ strstr (key, "tier-cold-compact-frequency")) {
if (origin_val < 1) {
snprintf (errstr, sizeof (errstr), "%s is not a "
" compatible value. %s expects a positive "
@@ -464,7 +474,6 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
ret = -1;
goto out;
}
-
}
out:
gf_msg_debug (this->name, 0, "Returning %d", ret);
@@ -1589,17 +1598,17 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.flags = OPT_FLAG_CLIENT_OPT
},
- /* Crypt xlator options */
+ /* Crypt xlator options */
- { .key = "features.encryption",
- .voltype = "encryption/crypt",
- .option = "!feat",
- .value = "off",
- .op_version = 3,
- .description = "enable/disable client-side encryption for "
+ { .key = "features.encryption",
+ .voltype = "encryption/crypt",
+ .option = "!feat",
+ .value = "off",
+ .op_version = 3,
+ .description = "enable/disable client-side encryption for "
"the volume.",
- .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
- },
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
{ .key = "encryption.master-key",
.voltype = "encryption/crypt",
@@ -1968,7 +1977,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.flags = OPT_FLAG_CLIENT_OPT
},
- /* Feature translators */
+ /* Feature translators */
{ .key = "features.uss",
.voltype = "features/snapview-server",
.op_version = GD_OP_VERSION_3_6_0,
@@ -2730,6 +2739,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.description = "The maximum number of files that may be migrated"
" in any direction in a given cycle by a single node."
},
+ { .key = "cluster.tier-compact",
+ .voltype = "cluster/tier",
+ .option = "tier-compact",
+ .value = "on",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "Activate or deactivate the compaction of the DB"
+ " for the volume's metadata."
+ },
+ { .key = "cluster.tier-hot-compact-frequency",
+ .voltype = "cluster/tier",
+ .value = "604800",
+ .option = "tier-hot-compact-frequency",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ },
+ { .key = "cluster.tier-cold-compact-frequency",
+ .voltype = "cluster/tier",
+ .value = "604800",
+ .option = "tier-cold-compact-frequency",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ },
{ .key = "features.ctr-enabled",
.voltype = "features/changetimerecorder",
.value = "off",