summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src
diff options
context:
space:
mode:
authorMilind Changire <mchangir@redhat.com>2016-06-23 12:37:51 +0530
committerDan Lambright <dlambrig@redhat.com>2016-07-29 07:14:37 -0700
commit1f4e41e8c2f5f4af4564caba0a08996853f089f4 (patch)
treef9ea7a017b719e047bd1bfec516acce92dad5882 /xlators/cluster/dht/src
parent8ef81bf02c779e7d126a654bde28d319d0fa823d (diff)
cluster/tier: dont promote if estimated block consumption > hi watermark
Add test to fail promotion if estimated block consumption grows beyond hi watermark. Skip file migrations until next cycle if tier_get_fs_stat() fails in tier_migrate_using_query_file() Change-Id: Ice04572fa739c09109c4433e65965197482a7beb BUG: 1349284 Signed-off-by: Milind Changire <mchangir@redhat.com> Reviewed-on: http://review.gluster.org/14780 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: mohammed rafi kc <rkavunga@redhat.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r--xlators/cluster/dht/src/dht-common.h1
-rw-r--r--xlators/cluster/dht/src/tier.c202
2 files changed, 153 insertions, 50 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 080e98594ec..760a86cccdb 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -391,6 +391,7 @@ typedef struct gf_tier_conf {
int watermark_hi;
int watermark_low;
int watermark_last;
+ unsigned long block_size;
fsblkcnt_t blocks_total;
fsblkcnt_t blocks_used;
int percent_full;
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 9c8f15bbce2..8134a63370b 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -31,6 +31,7 @@ static void *libhandle;
static gfdb_methods_t gfdb_methods;
#define DB_QUERY_RECORD_SIZE 4096
+#define GF_PERCENTAGE(val, total) (((val)*100)/(total))
/*
* Closes all the fds and frees the qfile_array
@@ -240,31 +241,36 @@ out:
}
int
-tier_check_watermark (xlator_t *this, loc_t *root_loc)
+tier_get_fs_stat (xlator_t *this, loc_t *root_loc)
{
- tier_watermark_op_t wm = TIER_WM_NONE;
- int ret = -1;
+ int ret = 0;
gf_defrag_info_t *defrag = NULL;
dht_conf_t *conf = NULL;
dict_t *xdata = NULL;
struct statvfs statfs = {0, };
gf_tier_conf_t *tier_conf = NULL;
+
conf = this->private;
- if (!conf)
+ if (!conf) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "conf is NULL");
+ ret = -1;
goto exit;
+ }
defrag = conf->defrag;
- if (!defrag)
+ if (!defrag) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "defrag is NULL");
+ ret = -1;
goto exit;
+ }
tier_conf = &defrag->tier_conf;
- if (tier_conf->mode != TIER_MODE_WM) {
- ret = 0;
- goto exit;
- }
-
xdata = dict_new ();
if (!xdata) {
gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
@@ -301,13 +307,39 @@ tier_check_watermark (xlator_t *this, loc_t *root_loc)
pthread_mutex_lock (&dm_stat_mutex);
+ tier_conf->block_size = statfs.f_bsize;
tier_conf->blocks_total = statfs.f_blocks;
tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
- tier_conf->percent_full = (100 * tier_conf->blocks_used) /
- statfs.f_blocks;
+ tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used,
+ statfs.f_blocks);
pthread_mutex_unlock (&dm_stat_mutex);
+exit:
+ if (xdata)
+ dict_unref (xdata);
+ return ret;
+}
+
+int
+tier_check_watermark (xlator_t *this)
+{
+ int ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+ dht_conf_t *conf = NULL;
+ gf_tier_conf_t *tier_conf = NULL;
+ tier_watermark_op_t wm = TIER_WM_NONE;
+
+ conf = this->private;
+ if (!conf)
+ goto exit;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto exit;
+
+ tier_conf = &defrag->tier_conf;
+
if (tier_conf->percent_full < tier_conf->watermark_low) {
wm = TIER_WM_LOW;
@@ -326,9 +358,9 @@ tier_check_watermark (xlator_t *this, loc_t *root_loc)
"Tier watermark now %d", wm);
}
+ ret = 0;
+
exit:
- if (xdata)
- dict_unref (xdata);
return ret;
}
@@ -344,8 +376,9 @@ is_hot_tier_full (gf_tier_conf_t *tier_conf)
}
int
-tier_do_migration (xlator_t *this, int promote, loc_t *root_loc)
+tier_do_migration (xlator_t *this, int promote)
{
+ int ret = -1;
gf_defrag_info_t *defrag = NULL;
dht_conf_t *conf = NULL;
long rand = 0;
@@ -360,12 +393,7 @@ tier_do_migration (xlator_t *this, int promote, loc_t *root_loc)
if (!defrag)
goto exit;
- if (defrag->tier_conf.mode != TIER_MODE_WM) {
- migrate = 1;
- goto exit;
- }
-
- if (tier_check_watermark (this, root_loc) != 0) {
+ if (tier_check_watermark (this) != 0) {
gf_msg (this->name, GF_LOG_CRITICAL, errno,
DHT_MSG_LOG_TIER_ERROR,
"Failed to get watermark");
@@ -422,6 +450,61 @@ tier_migrate (xlator_t *this, int is_promotion, dict_t *migrate_data,
return ret;
}
+/* returns _gf_true: if file can be promoted
+ * returns _gf_false: if file cannot be promoted
+ */
+static gf_boolean_t
+tier_can_promote_file (xlator_t *this, char const *file_name,
+ struct iatt *current, gf_defrag_info_t *defrag)
+{
+ gf_boolean_t ret = _gf_false;
+ fsblkcnt_t estimated_usage = 0;
+
+ if (defrag->tier_conf.tier_max_promote_size &&
+ (current->ia_size > defrag->tier_conf.tier_max_promote_size)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "File %s (gfid:%s) with size (%lu) exceeds maxsize "
+ "(%d) for promotion. File will not be promoted.",
+ file_name,
+ uuid_utoa(current->ia_gfid),
+ current->ia_size,
+ defrag->tier_conf.tier_max_promote_size);
+ goto abort;
+ }
+
+ /* bypass further validations for TEST mode */
+ if (defrag->tier_conf.mode != TIER_MODE_WM) {
+ ret = _gf_true;
+ goto abort;
+ }
+
+ /* convert the file size to blocks as per the block size of the
+ * destination tier
+ * NOTE: add (block_size - 1) to get the correct block size when
+ * there is a remainder after a modulo
+ */
+ estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) /
+ defrag->tier_conf.block_size) +
+ defrag->tier_conf.blocks_used;
+
+ /* test if the estimated block usage goes above HI watermark */
+ if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) >
+ defrag->tier_conf.watermark_hi) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Estimated block count consumption on "
+ "hot tier (%lu) exceeds hi watermark (%d%%). "
+ "File will not be promoted.",
+ estimated_usage,
+ defrag->tier_conf.watermark_hi);
+ goto abort;
+ }
+ ret = _gf_true;
+abort:
+ return ret;
+}
+
static int
tier_migrate_using_query_file (void *_args)
{
@@ -553,25 +636,38 @@ tier_migrate_using_query_file (void *_args)
break;
}
- if (!tier_do_migration (this, query_cbk_args->is_promotion, &root_loc)) {
- gfdb_methods.gfdb_query_record_free (query_record);
- query_record = NULL;
+ if (defrag->tier_conf.mode == TIER_MODE_WM) {
+ ret = tier_get_fs_stat (this, &root_loc);
+ if (ret != 0) {
+ gfdb_methods.gfdb_query_record_free (query_record);
+ query_record = NULL;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "tier_get_fs_stat() FAILED ... "
+ "skipping file migrations until next cycle");
+ break;
+ }
+
+ if (!tier_do_migration (this, query_cbk_args->is_promotion)) {
+ gfdb_methods.gfdb_query_record_free (query_record);
+ query_record = NULL;
- /* We have crossed the high watermark. Stop processing
- * files if this is a promotion cycle so demotion gets
- * a chance to start if not already running*/
+ /* We have crossed the high watermark. Stop processing
+ * files if this is a promotion cycle so demotion gets
+ * a chance to start if not already running*/
- if (query_cbk_args->is_promotion &&
- is_hot_tier_full (&defrag->tier_conf)) {
+ if (query_cbk_args->is_promotion &&
+ is_hot_tier_full (&defrag->tier_conf)) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS,
- "High watermark crossed during "
- "promotion. Exiting "
- "tier_migrate_using_query_file");
- break;
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "High watermark crossed during "
+ "promotion. Exiting "
+ "tier_migrate_using_query_file");
+ break;
+ }
+ continue;
}
- continue;
}
if (!list_empty (&query_record->link_list)) {
@@ -725,14 +821,14 @@ tier_migrate_using_query_file (void *_args)
goto abort;
}
- if (query_cbk_args->is_promotion &&
- defrag->tier_conf.tier_max_promote_size &&
- (current.ia_size > defrag->tier_conf.tier_max_promote_size)) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS,
- "File size exceeds maxsize for promotion. ");
- per_link_status = 1;
- goto abort;
+ if (query_cbk_args->is_promotion) {
+ if (!tier_can_promote_file (this,
+ link_info->file_name,
+ &current,
+ defrag)) {
+ per_link_status = 1;
+ goto abort;
+ }
}
linked_inode = inode_link (loc.inode, NULL, NULL,
@@ -1839,12 +1935,18 @@ static void
if (check_watermark >= WM_INTERVAL) {
check_watermark = 0;
- ret = tier_check_watermark (this, &root_loc);
- if (ret != 0) {
- gf_msg (this->name, GF_LOG_CRITICAL, errno,
- DHT_MSG_LOG_TIER_ERROR,
- "Failed to get watermark");
- continue;
+ if (tier_conf->mode == TIER_MODE_WM) {
+ ret = tier_get_fs_stat (this, &root_loc);
+ if (ret != 0) {
+ continue;
+ }
+ ret = tier_check_watermark (this);
+ if (ret != 0) {
+ gf_msg (this->name, GF_LOG_CRITICAL, errno,
+ DHT_MSG_LOG_TIER_ERROR,
+ "Failed to get watermark");
+ continue;
+ }
}
}