summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMilind Changire <mchangir@redhat.com>2016-10-15 10:49:19 +0530
committerDan Lambright <dlambrig@redhat.com>2016-10-19 12:51:48 -0700
commit460016428cf27484c333227f534c2e2f73a37fb1 (patch)
tree321a1bb0517f622a134b3ab0634d6fea842fdb06
parent20d2b36f3acc2f27527b7913d2ad939848395aeb (diff)
cluster/tier: handle fast demotions
Demote files on priority if hi-watermark has been breached and continue to demote until the watermark drops below hi-watermark. Monitor watermark more frequently. Trigger demotion as soon as hi-watermark is breached. Add cluster.tier-emergency-demote-query-limit option to limit number of files returned from the database query for every iteration of tier_migrate_using_query_file(). If watermark hasn't dropped below hi-watermark during the first iteration, the next iteration will be triggered approximately 1 second after tier_demote() returns to the main tiering loop. Update changetimerecorder xlator to handle query for emergency demote mode. Add tier-ctr-interface.h: Move tier and ctr interface specific macros and struct definition from libglusterfs/src/gfdb/gfdb_data_store.h to new header libglusterfs/src/tier-ctr-interface.h Change-Id: If56af78c6c81d37529b9b6e65ae606ba5c99a811 BUG: 1366648 Signed-off-by: Milind Changire <mchangir@redhat.com> Reviewed-on: http://review.gluster.org/15158 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
-rw-r--r--libglusterfs/src/Makefile.am3
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.c8
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.h48
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store_types.h8
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.c37
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.h3
-rw-r--r--libglusterfs/src/tier-ctr-interface.h44
-rw-r--r--xlators/cluster/dht/src/dht-common.h1
-rw-r--r--xlators/cluster/dht/src/dht-shared.c4
-rw-r--r--xlators/cluster/dht/src/tier.c93
-rw-r--r--xlators/cluster/dht/src/tier.h3
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c15
13 files changed, 213 insertions, 69 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 20163da769c..e9e690ee4bd 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -61,7 +61,8 @@ noinst_HEADERS = unittest/unittest.h \
$(CONTRIBDIR)/rbtree/rb.h \
$(CONTRIBDIR)/mount/mntent_compat.h \
$(CONTRIBDIR)/libexecinfo/execinfo_compat.h \
- $(CONTRIBDIR)/timer-wheel/timer-wheel.h
+ $(CONTRIBDIR)/timer-wheel/timer-wheel.h \
+ tier-ctr-interface.h
if !HAVE_LIBUUID
# FIXME: unbundle libuuid, see compat-uuid.h.
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c
index cb567503fa3..7074c4a51c2 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store.c
+++ b/libglusterfs/src/gfdb/gfdb_data_store.c
@@ -481,12 +481,14 @@ compact_db (gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,
* for every record found
* _query_cbk_args : Custom argument passed for the call back
* function query_callback
+ * query_limit : number to limit number of rows returned by the query
* Returns : if successful return 0 or
* -ve value in case of failure*/
int
find_all (gfdb_conn_node_t *_conn_node,
gf_query_callback_t query_callback,
- void *_query_cbk_args)
+ void *_query_cbk_args,
+ int query_limit)
{
int ret = 0;
gfdb_db_operations_t *db_operations_t = NULL;
@@ -500,7 +502,8 @@ find_all (gfdb_conn_node_t *_conn_node,
if (db_operations_t->find_all_op) {
ret = db_operations_t->find_all_op (gf_db_connection,
query_callback,
- _query_cbk_args);
+ _query_cbk_args,
+ query_limit);
if (ret) {
gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0,
LG_MSG_FIND_OP_FAILED, "Find all operation "
@@ -851,6 +854,7 @@ void get_gfdb_methods (gfdb_methods_t *methods)
{
methods->init_db = init_db;
methods->fini_db = fini_db;
+ methods->find_all = find_all;
methods->find_unchanged_for_time = find_unchanged_for_time;
methods->find_recently_changed_files = find_recently_changed_files;
methods->find_unchanged_for_time_freq = find_unchanged_for_time_freq;
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h
index 0aac4611153..beb954c190a 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store.h
+++ b/libglusterfs/src/gfdb/gfdb_data_store.h
@@ -20,42 +20,6 @@
#include "gfdb_data_store_types.h"
-#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op"
-
-/*
- * CTR IPC OPERATIONS
- *
- *
- */
-#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op"
-#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op"
-#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm"
-#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version"
-#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma"
-/*
- * CTR IPC INPUT/OUTPUT
- *
- *
- */
-#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path"
-#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms"
-#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count"
-#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key"
-#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version"
-
-/*
- * gfdb ipc ctr params for query
- *
- *
- */
-typedef struct gfdb_ipc_ctr_params {
- gf_boolean_t is_promote;
- int write_freq_threshold;
- int read_freq_threshold;
- gfdb_time_t time_stamp;
-} gfdb_ipc_ctr_params_t;
-
-
/* GFDB Connection Node:
* ~~~~~~~~~~~~~~~~~~~~
* Represents the connection to the database while using libgfdb
@@ -146,11 +110,20 @@ delete_record(gfdb_conn_node_t *, gfdb_db_record_t *gfdb_db_record);
* for every record found
* _query_cbk_args : Custom argument passed for the call back
* function query_callback
+ * query_limit : 0 - umlimited,
+ * any positive value - adds the LIMIT clause
+ * to the SQL query
+ *
* Returns : if successful return 0 or
* -ve value in case of failure*/
int find_all(gfdb_conn_node_t *, gf_query_callback_t query_callback,
- void *_query_cbk_args);
+ void *_query_cbk_args,
+ int query_limit);
+typedef int (*find_all_t) (gfdb_conn_node_t *,
+ gf_query_callback_t query_callback,
+ void *_query_cbk_args,
+ int query_limit);
@@ -368,6 +341,7 @@ typedef int (*compact_db_t)(gfdb_conn_node_t *db_conn,
typedef struct gfdb_methods_s {
init_db_t init_db;
fini_db_t fini_db;
+ find_all_t find_all;
find_unchanged_for_time_t find_unchanged_for_time;
find_recently_changed_files_t find_recently_changed_files;
find_unchanged_for_time_freq_t find_unchanged_for_time_freq;
diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h
index d0c96370eb8..02b7aa0fd33 100644
--- a/libglusterfs/src/gfdb/gfdb_data_store_types.h
+++ b/libglusterfs/src/gfdb/gfdb_data_store_types.h
@@ -366,12 +366,18 @@ typedef int
* for every record found
* _query_cbk_args : Custom argument passed for the call back
* function query_callback
+ * query_limit : 0 - list all files
+ * positive value - add the LIMIT clause to
+ * the SQL query to limit the number of records
+ * returned
+ *
* Returns : if successful return 0 or
* -ve value in case of failure*/
typedef int
(*gfdb_find_all_t)(void *db_conn,
gf_query_callback_t query_callback,
- void *_cbk_args);
+ void *_cbk_args,
+ int query_limit);
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c
index 094028361c5..4284ed9a69d 100644
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.c
+++ b/libglusterfs/src/gfdb/gfdb_sqlite3.c
@@ -632,12 +632,15 @@ gf_get_basic_query_stmt (char **out_stmt)
* */
int
gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback,
- void *query_cbk_args)
+ void *query_cbk_args,
+ int query_limit)
{
int ret = -1;
char *query_str = NULL;
gf_sql_connection_t *sql_conn = db_conn;
sqlite3_stmt *prep_stmt = NULL;
+ char *limit_query = NULL;
+ char *query = NULL;
CHECK_SQL_CONN (sql_conn, out);
GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out);
@@ -647,12 +650,28 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback,
goto out;
}
- ret = sqlite3_prepare (sql_conn->sqlite3_db_conn, query_str, -1,
+ query = query_str;
+
+ if (query_limit > 0) {
+ ret = gf_asprintf (&limit_query, "%s LIMIT %d",
+ query, query_limit);
+ if (ret < 0) {
+ gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
+ LG_MSG_QUERY_FAILED,
+ "Failed creating limit query statement");
+ limit_query = NULL;
+ goto out;
+ }
+
+ query = limit_query;
+ }
+
+ ret = sqlite3_prepare (sql_conn->sqlite3_db_conn, query, -1,
&prep_stmt, 0);
if (ret != SQLITE_OK) {
gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_PREPARE_FAILED, "Failed to prepare statement %s :"
- "%s", query_str,
+ LG_MSG_PREPARE_FAILED,
+ "Failed to prepare statement %s: %s", query,
sqlite3_errmsg (sql_conn->sqlite3_db_conn));
ret = -1;
goto out;
@@ -661,7 +680,7 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback,
ret = gf_sql_query_function (prep_stmt, query_callback, query_cbk_args);
if (ret) {
gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed Query %s", query_str);
+ "Failed Query %s", query);
goto out;
}
@@ -669,6 +688,10 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback,
out:
sqlite3_finalize (prep_stmt);
GF_FREE (query_str);
+
+ if (limit_query)
+ GF_FREE (limit_query);
+
return ret;
}
@@ -1070,10 +1093,10 @@ gf_sqlite3_find_unchanged_for_time_freq (void *db_conn,
GF_COL_TB_WMSEC ") >= ? ) ) )"
" AND "
/*Second condition: For Reads
- * Files that have reaASCd wind time smaller than for_time
+ * Files that have read wind time smaller than for_time
* OR
* File that have read wind time greater than for_time,
- * but write_frequency less than freq_write_cnt*/
+ * but read_frequency less than freq_read_cnt*/
"( ((" GF_COL_TB_RWSEC " * " TOSTRING(GFDB_MICROSEC) " + "
GF_COL_TB_RWMSEC ") < ? )"
" OR "
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h
index 52b84da251c..5b55b0ace5e 100644
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.h
+++ b/libglusterfs/src/gfdb/gfdb_sqlite3.h
@@ -258,7 +258,8 @@ int gf_sqlite3_delete (void *db_conn, gfdb_db_record_t *);
/*querying modules*/
int gf_sqlite3_find_all (void *db_conn, gf_query_callback_t,
- void *_query_cbk_args);
+ void *_query_cbk_args,
+ int query_limit);
int gf_sqlite3_find_unchanged_for_time (void *db_conn,
gf_query_callback_t query_callback,
void *_query_cbk_args,
diff --git a/libglusterfs/src/tier-ctr-interface.h b/libglusterfs/src/tier-ctr-interface.h
new file mode 100644
index 00000000000..cfd3f8a5e5d
--- /dev/null
+++ b/libglusterfs/src/tier-ctr-interface.h
@@ -0,0 +1,44 @@
+#ifndef _TIER_CTR_INTERFACE_H_
+#define _TIER_CTR_INTERFACE_H_
+
+#include "common-utils.h"
+#include "gfdb_data_store_types.h"
+
+#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op"
+
+/*
+ * CTR IPC OPERATIONS
+ *
+ *
+ */
+#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op"
+#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op"
+#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm"
+#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version"
+#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma"
+/*
+ * CTR IPC INPUT/OUTPUT
+ *
+ *
+ */
+#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path"
+#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms"
+#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count"
+#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key"
+#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version"
+
+/*
+ * gfdb ipc ctr params for query
+ *
+ *
+ */
+typedef struct gfdb_ipc_ctr_params {
+ gf_boolean_t is_promote;
+ int write_freq_threshold;
+ int read_freq_threshold;
+ gfdb_time_t time_stamp;
+ int query_limit;
+ gf_boolean_t emergency_demote;
+} gfdb_ipc_ctr_params_t;
+
+#endif
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 7adc849fb63..719d214f92d 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -396,6 +396,7 @@ typedef struct gf_tier_conf {
int percent_full;
uint64_t max_migrate_bytes;
int max_migrate_files;
+ int query_limit;
tier_mode_t mode;
/* These flags are only used for tier-compact */
gf_boolean_t compact_active;
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index f410f71b5a6..46bf461cf63 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -1064,6 +1064,10 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_INT,
.default_value = "10000",
},
+ { .key = {"tier-query-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "100",
+ },
/* switch option */
{ .key = {"pattern.switch.case"},
.type = GF_OPTION_TYPE_ANY
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 2f8eddbc07d..60e967a88a2 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -15,6 +15,7 @@
#include "tier-common.h"
#include "syscall.h"
#include "events.h"
+#include "tier-ctr-interface.h"
/*Hard coded DB info*/
static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
@@ -193,6 +194,7 @@ out:
/* Check and update the watermark every WM_INTERVAL seconds */
#define WM_INTERVAL 5
+#define WM_INTERVAL_EMERG 1
static int
tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
@@ -523,7 +525,7 @@ tier_can_promote_file (xlator_t *this, char const *file_name,
defrag->tier_conf.blocks_used;
/* test if the estimated block usage goes above HI watermark */
- if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) >
+ if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) >=
defrag->tier_conf.watermark_hi) {
gf_msg (this->name, GF_LOG_INFO, 0,
DHT_MSG_LOG_TIER_STATUS,
@@ -575,6 +577,7 @@ tier_migrate_using_query_file (void *_args)
gfdb_time_t current_time = { 0 };
int total_time = 0;
int max_time = 0;
+ gf_boolean_t emergency_demote_mode = _gf_false;
GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out);
@@ -591,6 +594,9 @@ tier_migrate_using_query_file (void *_args)
if (!migrate_data)
goto out;
+ emergency_demote_mode = (!query_cbk_args->is_promotion &&
+ is_hot_tier_full(&defrag->tier_conf));
+
xdata_request = dict_new ();
if (!xdata_request) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -1013,6 +1019,18 @@ per_file_out:
gfdb_methods.gfdb_query_record_free (query_record);
query_record = NULL;
+
+ /* If we are demoting and the entry watermark was HI, then
+ * we are done with emergency demotions if the current
+ * watermark has fallen below hi-watermark level
+ */
+ if (emergency_demote_mode) {
+ if (tier_check_watermark (this) == 0) {
+ if (!is_hot_tier_full (&defrag->tier_conf)) {
+ break;
+ }
+ }
+ }
}
out:
@@ -1126,14 +1144,23 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args)
goto out;
}
if (!gfdb_brick_info->_gfdb_promote) {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_unchanged_for_time (
- conn_node,
- tier_gf_query_callback,
- (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
+ if (query_cbk_args->defrag->tier_conf.watermark_last ==
+ TIER_WM_HI) {
+ /* emergency demotion mode */
+ ret = gfdb_methods.find_all (conn_node,
+ tier_gf_query_callback,
+ (void *)query_cbk_args,
+ query_cbk_args->defrag->tier_conf.
+ query_limit);
} else {
+ if (query_cbk_args->defrag->write_freq_threshold == 0 &&
+ query_cbk_args->defrag->read_freq_threshold == 0) {
+ ret = gfdb_methods.find_unchanged_for_time (
+ conn_node,
+ tier_gf_query_callback,
+ (void *)query_cbk_args,
+ gfdb_brick_info->time_stamp);
+ } else {
ret = gfdb_methods.find_unchanged_for_time_freq (
conn_node,
tier_gf_query_callback,
@@ -1144,6 +1171,7 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args)
query_cbk_args->defrag->
read_freq_threshold,
_gf_false);
+ }
}
} else {
if (query_cbk_args->defrag->write_freq_threshold == 0 &&
@@ -1159,8 +1187,7 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args)
tier_gf_query_callback,
(void *)query_cbk_args,
gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->
- write_freq_threshold,
+ query_cbk_args->defrag->write_freq_threshold,
query_cbk_args->defrag->read_freq_threshold,
_gf_false);
}
@@ -1267,10 +1294,21 @@ tier_process_ctr_query (tier_brick_list_t *local_brick, void *args)
/* set all the query params*/
ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
- ipc_ctr_params->write_freq_threshold = query_cbk_args->
- defrag->write_freq_threshold;
- ipc_ctr_params->read_freq_threshold = query_cbk_args->
- defrag->read_freq_threshold;
+
+ ipc_ctr_params->write_freq_threshold =
+ query_cbk_args->defrag->write_freq_threshold;
+
+ ipc_ctr_params->read_freq_threshold =
+ query_cbk_args->defrag->read_freq_threshold;
+
+ ipc_ctr_params->query_limit =
+ query_cbk_args->defrag->tier_conf.query_limit;
+
+ ipc_ctr_params->emergency_demote =
+ (!gfdb_brick_info->_gfdb_promote &&
+ query_cbk_args->defrag->tier_conf.watermark_last ==
+ TIER_WM_HI);
+
memcpy (&ipc_ctr_params->time_stamp,
gfdb_brick_info->time_stamp,
sizeof (gfdb_time_t));
@@ -2208,6 +2246,15 @@ out:
return ret;
}
+static int
+tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm)
+{
+ if (mode == TIER_MODE_WM && wm == TIER_WM_HI)
+ return WM_INTERVAL_EMERG;
+
+ return WM_INTERVAL;
+}
+
/*
* Main tiering loop. This is called from the promotion and the
* demotion threads spawned in tier_start().
@@ -2316,7 +2363,10 @@ static void
check_watermark++;
- if (check_watermark >= WM_INTERVAL) {
+ /* emergency demotion requires frequent watermark monitoring */
+ if (check_watermark >=
+ tier_get_wm_interval(tier_conf->mode,
+ tier_conf->watermark_last)) {
check_watermark = 0;
if (tier_conf->mode == TIER_MODE_WM) {
ret = tier_get_fs_stat (this, &root_loc);
@@ -2828,6 +2878,15 @@ tier_init (xlator_t *this)
defrag->tier_conf.max_migrate_files = freq;
+
+ ret = dict_get_int32 (this->options,
+ "tier-query-limit",
+ &(defrag->tier_conf.query_limit));
+ if (ret) {
+ defrag->tier_conf.query_limit =
+ DEFAULT_TIER_QUERY_LIMIT;
+ }
+
ret = dict_get_str (this->options,
"tier-compact", &mode);
@@ -3041,6 +3100,10 @@ tier_reconfigure (xlator_t *this, dict_t *options)
defrag->tier_conf.max_migrate_files, options,
int32, out);
+ GF_OPTION_RECONF ("tier-query-limit",
+ defrag->tier_conf.query_limit,
+ options, int32, out);
+
GF_OPTION_RECONF ("tier-pause",
req_pause, options,
bool, out);
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
index ffb04173bd5..764860e6884 100644
--- a/xlators/cluster/dht/src/tier.h
+++ b/xlators/cluster/dht/src/tier.h
@@ -98,7 +98,7 @@ typedef enum tier_watermark_op_ {
#define DEFAULT_DEMOTE_FREQ_SEC 120
#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
-#define DEFAULT_DEMOTE_DEGRADED 10
+#define DEFAULT_DEMOTE_DEGRADED 1
#define DEFAULT_WRITE_FREQ_SEC 0
#define DEFAULT_READ_FREQ_SEC 0
#define DEFAULT_WM_LOW 75
@@ -107,5 +107,6 @@ typedef enum tier_watermark_op_ {
#define DEFAULT_COMP_MODE _gf_true
#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
+#define DEFAULT_TIER_QUERY_LIMIT 100
#endif
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
index 4e4ea851a86..ffc4625626b 100644
--- a/xlators/features/changetimerecorder/src/changetimerecorder.c
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.c
@@ -16,6 +16,7 @@
#include "syscall.h"
#include "changetimerecorder.h"
+#include "tier-ctr-interface.h"
/*******************************inode forget***********************************/
@@ -1726,14 +1727,21 @@ ctr_db_query (xlator_t *this,
goto out;
}
if (!ipc_ctr_params->is_promote) {
- if (ipc_ctr_params->write_freq_threshold == 0 &&
- ipc_ctr_params->read_freq_threshold == 0) {
+ if (ipc_ctr_params->emergency_demote) {
+ /* emergency demotion mode */
+ ret = find_all (conn_node,
+ ctr_db_query_callback,
+ (void *)&query_cbk_args,
+ ipc_ctr_params->query_limit);
+ } else {
+ if (ipc_ctr_params->write_freq_threshold == 0 &&
+ ipc_ctr_params->read_freq_threshold == 0) {
ret = find_unchanged_for_time (
conn_node,
ctr_db_query_callback,
(void *)&query_cbk_args,
&ipc_ctr_params->time_stamp);
- } else {
+ } else {
ret = find_unchanged_for_time_freq (
conn_node,
ctr_db_query_callback,
@@ -1742,6 +1750,7 @@ ctr_db_query (xlator_t *this,
ipc_ctr_params->write_freq_threshold,
ipc_ctr_params->read_freq_threshold,
_gf_false);
+ }
}
} else {
if (ipc_ctr_params->write_freq_threshold == 0 &&
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 0c67135db3f..931f99ba5af 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -462,7 +462,8 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
strstr (key, "tier-max-files") ||
strstr (key, "tier-demote-frequency") ||
strstr (key, "tier-hot-compact-frequency") ||
- strstr (key, "tier-cold-compact-frequency")) {
+ strstr (key, "tier-cold-compact-frequency") ||
+ strstr (key, "tier-query-limit")) {
if (origin_val < 1) {
snprintf (errstr, sizeof (errstr), "%s is not a "
" compatible value. %s expects a positive "
@@ -2739,6 +2740,18 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.description = "The maximum number of files that may be migrated"
" in any direction in a given cycle by a single node."
},
+ { .key = "cluster.tier-query-limit",
+ .voltype = "cluster/tier",
+ .option = "tier-query-limit",
+ .value = "100",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .type = NO_DOC,
+ .description = "The maximum number of files that may be migrated "
+ "during an emergency demote. An emergency condition "
+ "is flagged when writes breach the hi-watermark."
+ },
{ .key = "cluster.tier-compact",
.voltype = "cluster/tier",
.option = "tier-compact",