diff options
| author | Milind Changire <mchangir@redhat.com> | 2016-06-23 12:37:51 +0530 | 
|---|---|---|
| committer | Dan Lambright <dlambrig@redhat.com> | 2016-07-29 07:14:37 -0700 | 
| commit | 1f4e41e8c2f5f4af4564caba0a08996853f089f4 (patch) | |
| tree | f9ea7a017b719e047bd1bfec516acce92dad5882 /xlators | |
| parent | 8ef81bf02c779e7d126a654bde28d319d0fa823d (diff) | |
cluster/tier: dont promote if estimated block consumption > hi watermark
Add test to fail promotion if estimated block consumption grows
beyond hi watermark.
Skip file migrations until next cycle if tier_get_fs_stat() fails
in tier_migrate_using_query_file()
Change-Id: Ice04572fa739c09109c4433e65965197482a7beb
BUG: 1349284
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-on: http://review.gluster.org/14780
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: mohammed rafi  kc <rkavunga@redhat.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 1 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.c | 202 | 
2 files changed, 153 insertions, 50 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 080e98594ec..760a86cccdb 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -391,6 +391,7 @@ typedef struct gf_tier_conf {          int                          watermark_hi;          int                          watermark_low;          int                          watermark_last; +        unsigned long                block_size;          fsblkcnt_t                   blocks_total;          fsblkcnt_t                   blocks_used;          int                          percent_full; diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 9c8f15bbce2..8134a63370b 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -31,6 +31,7 @@ static void *libhandle;  static gfdb_methods_t gfdb_methods;  #define DB_QUERY_RECORD_SIZE 4096 +#define GF_PERCENTAGE(val, total) (((val)*100)/(total))  /*   * Closes all the fds and frees the qfile_array @@ -240,31 +241,36 @@ out:  }  int -tier_check_watermark (xlator_t *this, loc_t *root_loc) +tier_get_fs_stat (xlator_t *this, loc_t *root_loc)  { -        tier_watermark_op_t     wm = TIER_WM_NONE; -        int                     ret = -1; +        int                     ret = 0;          gf_defrag_info_t       *defrag = NULL;          dht_conf_t             *conf   = NULL;          dict_t                 *xdata  = NULL;          struct statvfs          statfs = {0, };          gf_tier_conf_t         *tier_conf = NULL; +          conf = this->private; -        if (!conf) +        if (!conf) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "conf is NULL"); +                ret = -1;                  goto exit; +        }          defrag = conf->defrag; -        if (!defrag) +        if (!defrag) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "defrag is NULL"); +                ret = -1;                  goto exit; +        }          tier_conf = &defrag->tier_conf; -        if (tier_conf->mode != TIER_MODE_WM) { -                ret = 0; -                goto exit; -        } -          xdata = dict_new ();          if (!xdata) {                  gf_msg (this->name, GF_LOG_ERROR, ENOMEM, @@ -301,13 +307,39 @@ tier_check_watermark (xlator_t *this, loc_t *root_loc)          pthread_mutex_lock (&dm_stat_mutex); +        tier_conf->block_size = statfs.f_bsize;          tier_conf->blocks_total = statfs.f_blocks;          tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree; -        tier_conf->percent_full = (100 * tier_conf->blocks_used) / -                statfs.f_blocks; +        tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used, +                                                statfs.f_blocks);          pthread_mutex_unlock (&dm_stat_mutex); +exit: +        if (xdata) +                dict_unref (xdata); +        return ret; +} + +int +tier_check_watermark (xlator_t *this) +{ +        int                     ret       = -1; +        gf_defrag_info_t       *defrag    = NULL; +        dht_conf_t             *conf      = NULL; +        gf_tier_conf_t         *tier_conf = NULL; +        tier_watermark_op_t     wm        = TIER_WM_NONE; + +        conf = this->private; +        if (!conf) +                goto exit; + +        defrag = conf->defrag; +        if (!defrag) +                goto exit; + +        tier_conf = &defrag->tier_conf; +          if (tier_conf->percent_full < tier_conf->watermark_low) {                  wm = TIER_WM_LOW; @@ -326,9 +358,9 @@ tier_check_watermark (xlator_t *this, loc_t *root_loc)                          "Tier watermark now %d", wm);          } +        ret = 0; +  exit: -        if (xdata) -                dict_unref (xdata);          return ret;  } @@ -344,8 +376,9 @@ is_hot_tier_full (gf_tier_conf_t *tier_conf)  }  int -tier_do_migration (xlator_t *this, int promote, loc_t *root_loc) +tier_do_migration (xlator_t *this, int promote)  { +        int                     ret = -1;          gf_defrag_info_t       *defrag = NULL;          dht_conf_t             *conf   = NULL;          long                    rand = 0; @@ -360,12 +393,7 @@ tier_do_migration (xlator_t *this, int promote, loc_t *root_loc)          if (!defrag)                  goto exit; -        if (defrag->tier_conf.mode != TIER_MODE_WM) { -                migrate = 1; -                goto exit; -        } - -        if (tier_check_watermark (this, root_loc) != 0) { +        if (tier_check_watermark (this) != 0) {                  gf_msg (this->name, GF_LOG_CRITICAL, errno,                          DHT_MSG_LOG_TIER_ERROR,                          "Failed to get watermark"); @@ -422,6 +450,61 @@ tier_migrate (xlator_t *this, int is_promotion, dict_t *migrate_data,          return ret;  } +/* returns  _gf_true: if file can be promoted + * returns _gf_false: if file cannot be promoted + */ +static gf_boolean_t +tier_can_promote_file (xlator_t *this, char const *file_name, +                       struct iatt *current, gf_defrag_info_t *defrag) +{ +        gf_boolean_t ret = _gf_false; +        fsblkcnt_t estimated_usage = 0; + +        if (defrag->tier_conf.tier_max_promote_size && +            (current->ia_size > defrag->tier_conf.tier_max_promote_size)) { +                gf_msg (this->name, GF_LOG_INFO, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "File %s (gfid:%s) with size (%lu) exceeds maxsize " +                        "(%d) for promotion. File will not be promoted.", +                        file_name, +                        uuid_utoa(current->ia_gfid), +                        current->ia_size, +                        defrag->tier_conf.tier_max_promote_size); +                goto abort; +        } + +        /* bypass further validations for TEST mode */ +        if (defrag->tier_conf.mode != TIER_MODE_WM) { +                ret = _gf_true; +                goto abort; +        } + +        /* convert the file size to blocks as per the block size of the +         * destination tier +         * NOTE: add (block_size - 1) to get the correct block size when +         *       there is a remainder after a modulo +         */ +        estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) / +                                defrag->tier_conf.block_size) + +                                defrag->tier_conf.blocks_used; + +        /* test if the estimated block usage goes above HI watermark */ +        if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) > +                        defrag->tier_conf.watermark_hi) { +                gf_msg (this->name, GF_LOG_INFO, 0, +                        DHT_MSG_LOG_TIER_STATUS, +                        "Estimated block count consumption on " +                        "hot tier (%lu) exceeds hi watermark (%d%%). " +                        "File will not be promoted.", +                        estimated_usage, +                        defrag->tier_conf.watermark_hi); +                goto abort; +        } +        ret = _gf_true; +abort: +        return ret; +} +  static int  tier_migrate_using_query_file (void *_args)  { @@ -553,25 +636,38 @@ tier_migrate_using_query_file (void *_args)                          break;                  } -                if (!tier_do_migration (this, query_cbk_args->is_promotion, &root_loc)) { -                        gfdb_methods.gfdb_query_record_free (query_record); -                        query_record = NULL; +                if (defrag->tier_conf.mode == TIER_MODE_WM) { +                        ret = tier_get_fs_stat (this, &root_loc); +                        if (ret != 0) { +                                gfdb_methods.gfdb_query_record_free (query_record); +                                query_record = NULL; +                                gf_msg (this->name, GF_LOG_ERROR, 0, +                                        DHT_MSG_LOG_TIER_STATUS, +                                        "tier_get_fs_stat() FAILED ... " +                                        "skipping file migrations until next cycle"); +                                break; +                        } + +                        if (!tier_do_migration (this, query_cbk_args->is_promotion)) { +                                gfdb_methods.gfdb_query_record_free (query_record); +                                query_record = NULL; -                        /* We have crossed the high watermark. Stop processing -                         * files if this is a promotion cycle so demotion gets -                         * a chance to start if not already running*/ +                                /* We have crossed the high watermark. Stop processing +                                 * files if this is a promotion cycle so demotion gets +                                 * a chance to start if not already running*/ -                        if (query_cbk_args->is_promotion && -                            is_hot_tier_full (&defrag->tier_conf)) { +                                if (query_cbk_args->is_promotion && +                                    is_hot_tier_full (&defrag->tier_conf)) { -                                gf_msg (this->name, GF_LOG_INFO, 0, -                                        DHT_MSG_LOG_TIER_STATUS, -                                        "High watermark crossed during " -                                        "promotion. Exiting " -                                        "tier_migrate_using_query_file"); -                                break; +                                        gf_msg (this->name, GF_LOG_INFO, 0, +                                                DHT_MSG_LOG_TIER_STATUS, +                                                "High watermark crossed during " +                                                "promotion. Exiting " +                                                "tier_migrate_using_query_file"); +                                        break; +                                } +                                continue;                          } -                        continue;                  }                  if (!list_empty (&query_record->link_list)) { @@ -725,14 +821,14 @@ tier_migrate_using_query_file (void *_args)                                  goto abort;                          } -                        if (query_cbk_args->is_promotion && -                            defrag->tier_conf.tier_max_promote_size && -                            (current.ia_size > defrag->tier_conf.tier_max_promote_size)) { -                                gf_msg (this->name, GF_LOG_INFO, 0, -                                        DHT_MSG_LOG_TIER_STATUS, -                                        "File size exceeds maxsize for promotion. "); -                                per_link_status = 1; -                                goto abort; +                        if (query_cbk_args->is_promotion) { +                                if (!tier_can_promote_file (this, +                                                            link_info->file_name, +                                                            ¤t, +                                                            defrag)) { +                                        per_link_status = 1; +                                        goto abort; +                                }                          }                          linked_inode = inode_link (loc.inode, NULL, NULL, @@ -1839,12 +1935,18 @@ static void                  if (check_watermark >= WM_INTERVAL) {                          check_watermark = 0; -                        ret = tier_check_watermark (this, &root_loc); -                        if (ret != 0) { -                                gf_msg (this->name, GF_LOG_CRITICAL, errno, -                                        DHT_MSG_LOG_TIER_ERROR, -                                        "Failed to get watermark"); -                                continue; +                        if (tier_conf->mode == TIER_MODE_WM) { +                                ret = tier_get_fs_stat (this, &root_loc); +                                if (ret != 0) { +                                        continue; +                                } +                                ret = tier_check_watermark (this); +                                if (ret != 0) { +                                        gf_msg (this->name, GF_LOG_CRITICAL, errno, +                                                DHT_MSG_LOG_TIER_ERROR, +                                                "Failed to get watermark"); +                                        continue; +                                }                          }                  }  | 
