summaryrefslogtreecommitdiffstats
path: root/libglusterfs
diff options
context:
space:
mode:
authorDiogenes Nunez <dnunez@redhat.com>2016-07-27 11:09:47 -0400
committerDan Lambright <dlambrig@redhat.com>2016-09-04 18:37:57 -0700
commit261c035c7d0cd1639cc8bd0ead82c30efcc0e93f (patch)
treeaf3a2e498023e7ad8af417312b83ce2f969ef738 /libglusterfs
parent6459fc812219551291e4be426ed8ecf2c90813a4 (diff)
cluster/tier: Adding compaction option for metadata databases
Problem: As metadata in the database fills up, querying the database take a long time. As a result, tier migration slows down. To counteract this, we added a way to enable the compaction methods of the underlying database. The goal is to reduce the size of the underlying file by eliminating database fragmentation. NOTE: There is currently a bug where sometimes a brick will attempt to activate compaction. This happens even compaction is already turned on. The cause is narrowed down to the compact_mode_switch flipping its value. Changes: libglusterfs/src/gfdb - Added a gfdb function to compact the underlying database, compact_db() This is a no-op if the database has no such option. - Added a compaction function for SQLite3 that does the following 1) Changes the auto_vacuum pragma of the database 2) Compacts the database according to the type of compaction requested - Compaction type can be changed by changing the macro GF_SQL_COMPACT_DEF to one of the 4 compaction types in gfdb_sqlite3.h It is currently set to GF_SQL_COMPACT_INCR, or incremental vacuuming. xlators/cluster/dht/src - Added the following command-line option to enable SQLite3 compaction. gluster volume set <vol-name> tier-compact <off|on> - Added the following command-line option to change the frequency the hot and cold tier are ordered to compact. gluster volume set <vol-name> tier-hot-compact-frequency <int> gluster volume set <vol-name> tier-cold-compact-frequency <int> - tier daemon periodically sends the (new) GFDB_IPC_CTR_SET_COMPACT_PRAGMA IPC to the CTR xlator. The IPC triggers compaction of the database. The inputs are both gf_boolean_t. IPC Input: compact_active: Is compaction currently on for the db. compact_mode_switched: Did we flip the compaction switch recently? IPC Output: 0 if the compaction succeeds. Non-zero otherwise. xlators/features/changetimerecorder/src/ - When the CTR gets the compaction IPC, it launches a thread that will perform the compaction. The IPC ends after the thread is launched. To avoid extra allocations, the parameters are passed using static variables. Change-Id: I5e1433becb9eeff2afe8dcb4a5798977bf5ba0dd Signed-off-by: Diogenes Nunez <dnunez@redhat.com> Reviewed-on: http://review.gluster.org/15031 Reviewed-by: Milind Changire <mchangir@redhat.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Dan Lambright <dlambrig@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'libglusterfs')
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.c40
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.h19
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store_types.h50
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.c187
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.h21
-rw-r--r--libglusterfs/src/libglusterfs-messages.h23
6 files changed, 298 insertions, 42 deletions
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c
index 9c042f9e82e..cb567503fa3 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store.c
+++ b/libglusterfs/src/gfdb/gfdb_data_store.c
@@ -433,6 +433,43 @@ delete_record (gfdb_conn_node_t *_conn_node,
return ret;
}
+/*Libgfdb API Function: Compact the database.
+ *
+ * Arguments:
+ * _conn_node : GFDB Connection node
+ * _compact_active : Is compaction currently on?
+ * _compact_mode_switched : Was the compaction switch flipped?
+ * Returns : if successful return 0 or
+ * -ve value in case of failure*/
+int
+compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,
+ gf_boolean_t _compact_mode_switched)
+{
+ int ret = 0;
+ gfdb_db_operations_t *db_operations_t = NULL;
+ void *gf_db_connection = NULL;
+
+ CHECK_CONN_NODE(_conn_node);
+
+ db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
+ gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
+
+ if (db_operations_t->compact_db_op) {
+
+ ret = db_operations_t->compact_db_op (gf_db_connection,
+ _compact_active,
+ _compact_mode_switched);
+ if (ret) {
+ gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,
+ LG_MSG_COMPACT_FAILED, "Compaction operation "
+ "failed");
+ }
+
+ }
+
+ return ret;
+}
+
@@ -835,5 +872,8 @@ void get_gfdb_methods (gfdb_methods_t *methods)
/* Link info related functions */
methods->gfdb_link_info_new = gfdb_link_info_new;
methods->gfdb_link_info_free = gfdb_link_info_free;
+
+ /* Compaction related functions */
+ methods->compact_db = compact_db;
}
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h
index eacb8527034..0aac4611153 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store.h
+++ b/libglusterfs/src/gfdb/gfdb_data_store.h
@@ -31,7 +31,7 @@
#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op"
#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm"
#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version"
-
+#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma"
/*
* CTR IPC INPUT/OUTPUT
*
@@ -348,6 +348,21 @@ typedef int (*set_db_params_t)(gfdb_conn_node_t *db_conn,
char *param_key,
char *param_value);
+/*Libgfdb API Function: Compact the database.
+ *
+ * Arguments:
+ * _conn_node : GFDB Connection node
+ * _compact_active : Is compaction currently on?
+ * _compact_mode_switched : Was the compaction switch flipped?
+ * Returns : if successful return 0 or
+ * -ve value in case of failure*/
+int
+compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,
+ gf_boolean_t _compact_mode_switched);
+
+typedef int (*compact_db_t)(gfdb_conn_node_t *db_conn,
+ gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched);
typedef struct gfdb_methods_s {
@@ -377,6 +392,8 @@ typedef struct gfdb_methods_s {
gfdb_link_info_new_t gfdb_link_info_new;
gfdb_link_info_free_t gfdb_link_info_free;
+ /* Compaction related functions */
+ compact_db_t compact_db;
} gfdb_methods_t;
void get_gfdb_methods (gfdb_methods_t *methods);
diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h
index 1acbdf2f99f..d0c96370eb8 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store_types.h
+++ b/libglusterfs/src/gfdb/gfdb_data_store_types.h
@@ -40,7 +40,8 @@ typedef enum gf_db_operation {
GFDB_W_DELETE_DB_OP,
GFDB_UW_DELETE_DB_OP,
GFDB_WFC_UPDATE_DB_OP,
- GFDB_RFC_UPDATE_DB_OP
+ GFDB_RFC_UPDATE_DB_OP,
+ GFDB_DB_COMPACT_DB_OP /* Added for VACUUM/manual compaction support */
} gf_db_operation_t;
@@ -81,19 +82,12 @@ gfdb_time_2_usec(gfdb_time_t *gfdb_time)
return ((uint64_t) gfdb_time->tv_sec * GFDB_MICROSEC) + gfdb_time->tv_usec;
}
-
-
-
-
/******************************************************************************
*
* Insert/Update Record related data structures/functions
*
* ****************************************************************************/
-
-
-
/*Indicated a generic synchronous write to the db
* This may or may not be implemented*/
typedef enum gfdb_sync_type {
@@ -123,11 +117,6 @@ out:
return ret;
}
-
-
-
-
-
/*Indicated different types of db*/
typedef enum gfdb_db_type {
GFDB_INVALID_DB = -1,
@@ -165,12 +154,6 @@ out:
return ret;
}
-
-
-
-
-
-
/*Tells the path of the fop*/
typedef enum gfdb_fop_path {
GFDB_FOP_INVALID = -1,
@@ -206,12 +189,6 @@ isunwindpath(gfdb_fop_path_t gfdb_fop_path)
return (gfdb_fop_path >= GFDB_FOP_UNWIND) ? _gf_true : _gf_false;
}
-
-
-
-
-
-
/*Tell what type of fop it was
* Like whether a dentry fop or a inode fop
* Read fop or a write fop etc*/
@@ -258,12 +235,6 @@ isdentrycreatefop(gfdb_fop_type_t fop_type)
_gf_true : _gf_false;
}
-
-
-
-
-
-
/*The structure that is used to send insert/update the databases
* using insert_db api*/
typedef struct gfdb_db_record {
@@ -374,6 +345,20 @@ typedef int
+/*Used to compact the database
+ * Arguments:
+ * db_conn : GFDB Connection node
+ * compact_active : Is compaction currently on?
+ * compact_mode_switched : Was the compaction switch flipped?
+ * Returns : if successful return 0 or
+ * -ve value in case of failure*/
+typedef int
+(*gfdb_compact_db_t)(void *db_conn, gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched);
+
+
+
+
/* Query all the records from the database
* Arguments:
* db_conn : plugin specific data base connection
@@ -502,6 +487,7 @@ typedef struct gfdb_db_operations {
gfdb_fini_db_t fini_db_op;
gfdb_insert_record_t insert_record_op;
gfdb_delete_record_t delete_record_op;
+ gfdb_compact_db_t compact_db_op;
gfdb_find_all_t find_all_op;
gfdb_find_unchanged_for_time_t find_unchanged_for_time_op;
gfdb_find_recently_changed_files_t find_recently_changed_files_op;
@@ -598,5 +584,3 @@ typedef struct gfdb_connection {
#endif
-
-
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c
index 04781be562a..094028361c5 100644
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.c
+++ b/libglusterfs/src/gfdb/gfdb_sqlite3.c
@@ -239,6 +239,7 @@ gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops)
gfdb_db_ops->insert_record_op = gf_sqlite3_insert;
gfdb_db_ops->delete_record_op = gf_sqlite3_delete;
+ gfdb_db_ops->compact_db_op = gf_sqlite3_vacuum;
gfdb_db_ops->find_all_op = gf_sqlite3_find_all;
gfdb_db_ops->find_unchanged_for_time_op =
@@ -1327,10 +1328,14 @@ gf_sqlite3_pragma (void *db_conn, char *pragma_key, char **pragma_value)
goto out;
}
- ret = gf_asprintf (pragma_value, "%s", sqlite3_column_text (pre_stmt, 0));
- if (ret <= 0) {
- gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to get %s from db", pragma_key);
+ if (pragma_value) {
+ ret = gf_asprintf (pragma_value, "%s",
+ sqlite3_column_text (pre_stmt, 0));
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_QUERY_FAILED, "Failed to get %s from db",
+ pragma_key);
+ }
}
ret = 0;
@@ -1382,3 +1387,177 @@ out:
return ret;
}
+
+/* Function to vacuum of sqlite db
+ * Input:
+ * void *db_conn : Sqlite connection
+ * gf_boolean_t compact_active : Is compaction on?
+ * gf_boolean_t compact_mode_switched : Did we just flip the compaction swtich?
+ * Return:
+ * On success return 0
+ * On failure return -1
+ * */
+int
+gf_sqlite3_vacuum (void *db_conn, gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched)
+{
+ int ret = -1;
+ gf_sql_connection_t *sql_conn = db_conn;
+ char *sqlstring = NULL;
+ char *sql_strerror = NULL;
+ gf_boolean_t changing_pragma = _gf_true;
+
+ CHECK_SQL_CONN (sql_conn, out);
+
+ if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_NONE) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS,
+ "VACUUM type is off: no VACUUM to do");
+ goto out;
+ }
+
+ if (compact_mode_switched) {
+ if (compact_active) { /* Then it was OFF before.
+ So turn everything on */
+ ret = 0;
+ switch (GF_SQL_COMPACT_DEF) {
+ case GF_SQL_COMPACT_FULL:
+ ret = gf_sqlite3_set_pragma (db_conn,
+ "auto_vacuum",
+ GF_SQL_AV_FULL);
+ break;
+ case GF_SQL_COMPACT_INCR:
+ ret = gf_sqlite3_set_pragma (db_conn,
+ "auto_vacuum",
+ GF_SQL_AV_INCR);
+ break;
+ case GF_SQL_COMPACT_MANUAL:
+ changing_pragma = _gf_false;
+ default:
+ ret = -1;
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_COMPACT_FAILED,
+ "VACUUM type undefined");
+ goto out;
+ break;
+ }
+
+ } else { /* Then it was ON before, so turn it all off */
+ if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_FULL ||
+ GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_INCR) {
+ ret = gf_sqlite3_set_pragma (db_conn,
+ "auto_vacuum",
+ GF_SQL_AV_NONE);
+ } else {
+ changing_pragma = _gf_false;
+ }
+ }
+
+ if (ret) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_TRACE, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed to set the pragma");
+ goto out;
+ }
+
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS, "Turning compaction %i",
+ GF_SQL_COMPACT_DEF);
+
+ /* If we move from an auto_vacuum scheme to off, */
+ /* or vice-versa, we must VACUUM to save the change. */
+ /* In the case of a manual VACUUM scheme, we might as well */
+ /* run a manual VACUUM now if we */
+ if (changing_pragma || compact_active) {
+ ret = gf_asprintf (&sqlstring, "VACUUM;");
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed allocating memory");
+ goto out;
+ }
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS, "Sealed with a VACUUM");
+ }
+ } else { /* We are active, so it's time to VACUUM */
+ if (!compact_active) { /* Did we somehow enter an inconsistent
+ state? */
+ ret = -1;
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Tried to VACUUM when compaction inactive");
+ goto out;
+ }
+
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0,
+ LG_MSG_COMPACT_STATUS,
+ "Doing regular vacuum of type %i", GF_SQL_COMPACT_DEF);
+
+ switch (GF_SQL_COMPACT_DEF) {
+ case GF_SQL_COMPACT_INCR: /* INCR auto_vacuum */
+ ret = gf_asprintf(&sqlstring,
+ "PRAGMA incremental_vacuum;");
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed allocating memory");
+ goto out;
+ }
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS,
+ "Will commence an incremental VACUUM");
+ break;
+ /* (MANUAL) Invoke the VACUUM command */
+ case GF_SQL_COMPACT_MANUAL:
+ ret = gf_asprintf(&sqlstring, "VACUUM;");
+ if (ret <= 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_PREPARE_FAILED,
+ "Failed allocating memory");
+ goto out;
+ }
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0,
+ LG_MSG_COMPACT_STATUS,
+ "Will commence a VACUUM");
+ break;
+ /* (FULL) The database does the compaction itself. */
+ /* We cannot do anything else, so we can leave */
+ /* without sending anything to the database */
+ case GF_SQL_COMPACT_FULL:
+ ret = 0;
+ goto success;
+ /* Any other state must be an error. Note that OFF */
+ /* cannot hit this statement since we immediately leave */
+ /* in that case */
+ default:
+ ret = -1;
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_COMPACT_FAILED,
+ "VACUUM type undefined");
+ goto out;
+ break;
+ }
+ }
+
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_COMPACT_STATUS,
+ "SQLString == %s", sqlstring);
+
+ ret = sqlite3_exec(sql_conn->sqlite3_db_conn, sqlstring, NULL, NULL,
+ &sql_strerror);
+
+ if (ret != SQLITE_OK) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_GET_RECORD_FAILED, "Failed to vacuum "
+ "the db : %s", sqlite3_errmsg (db_conn));
+ ret = -1;
+ goto out;
+ }
+success:
+ gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
+ compact_mode_switched ? "Successfully changed VACUUM on/off"
+ : "DB successfully VACUUM");
+out:
+ GF_FREE(sqlstring);
+
+ return ret;
+}
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h
index 9d0d996a322..4d70a60e431 100644
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.h
+++ b/libglusterfs/src/gfdb/gfdb_sqlite3.h
@@ -73,8 +73,7 @@ do {\
#define GF_SQL_AV_NONE "none"
#define GF_SQL_AV_FULL "full"
-#define GF_SQL_AV_INCR "incr"
-
+#define GF_SQL_AV_INCR "incremental"
#define GF_SQL_SYNC_OFF "off"
#define GF_SQL_SYNC_NORMAL "normal"
@@ -87,7 +86,12 @@ do {\
#define GF_SQL_JM_WAL "wal"
#define GF_SQL_JM_OFF "off"
+#define GF_SQL_COMPACT_NONE 0
+#define GF_SQL_COMPACT_FULL 1
+#define GF_SQL_COMPACT_INCR 2
+#define GF_SQL_COMPACT_MANUAL 3
+#define GF_SQL_COMPACT_DEF GF_SQL_COMPACT_INCR
typedef enum gf_sql_auto_vacuum {
gf_sql_av_none = 0,
gf_sql_av_full,
@@ -319,7 +323,18 @@ int gf_sqlite3_pragma (void *db_conn, char *pragma_key, char **pragma_value);
int
gf_sqlite3_set_pragma (void *db_conn, char *pragma_key, char *pragma_value);
-
+/* Function to vacuum of sqlite db
+ * Input:
+ * void *db_conn : Sqlite connection
+ * gf_boolean_t compact_active : Is compaction on?
+ * gf_boolean_t compact_mode_switched : Did we just flip the compaction swtich?
+ * Return:
+ * On success return 0
+ * On failure return -1
+ * */
+int
+gf_sqlite3_vacuum (void *db_conn, gf_boolean_t compact_active,
+ gf_boolean_t compact_mode_switched);
void gf_sqlite3_fill_db_operations (gfdb_db_operations_t *gfdb_db_ops);
diff --git a/libglusterfs/src/libglusterfs-messages.h b/libglusterfs/src/libglusterfs-messages.h
index d2ad44e470e..29196929eb3 100644
--- a/libglusterfs/src/libglusterfs-messages.h
+++ b/libglusterfs/src/libglusterfs-messages.h
@@ -36,7 +36,9 @@
*/
#define GLFS_LG_BASE GLFS_MSGID_COMP_LIBGLUSTERFS
-#define GLFS_LG_NUM_MESSAGES 207
+
+#define GLFS_LG_NUM_MESSAGES 209
+
#define GLFS_LG_MSGID_END (GLFS_LG_BASE + GLFS_LG_NUM_MESSAGES + 1)
/* Messaged with message IDs */
#define glfs_msg_start_lg GLFS_LG_BASE, "Invalid: Start of messages"
@@ -1762,6 +1764,7 @@
* @recommendedaction
*
*/
+
#define LG_MSG_INVALID_INODE_LIST (GLFS_LG_BASE + 207)
/*!
@@ -1770,6 +1773,24 @@
* @recommendedaction
*
*/
+
+#define LG_MSG_COMPACT_FAILED (GLFS_LG_BASE + 208)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define LG_MSG_COMPACT_STATUS (GLFS_LG_BASE + 209)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
/*------------*/
#define glfs_msg_end_lg GLFS_LG_MSGID_END, "Invalid: End of messages"