diff options
| author | N Balachandran <nbalacha@redhat.com> | 2017-06-01 22:13:41 +0530 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-06-13 14:18:57 +0000 | 
| commit | 1037b006ad818c862d7af9308ca02e5f83ebd02a (patch) | |
| tree | fa1af78c6b07b0ac9602b3d07082918afeb2ae57 | |
| parent | 6dd06f21770ff35828af009eb022b80f3ffaeb26 (diff) | |
cluster/dht: Include dirs in rebalance estimates
Empty directories were not being considered while
calculating rebalance estimates leading to negative
time-left values being displayed as part of the
rebalance status.
> BUG: 1457985
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17448
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Change-Id: I48d41d702e72db30af10e6b87b628baa605afa98
BUG: 1460894
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://review.gluster.org/17527
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 5 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 103 | 
3 files changed, 83 insertions, 31 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 41a71116bf8..cb63d0ad13b 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -9053,7 +9053,7 @@ dht_notify (xlator_t *this, int event, void *data, ...)                                  DHT_MSG_CHILD_DOWN,                                  "Received CHILD_DOWN. Exiting");                          if (conf->defrag) { -                                gf_defrag_stop (conf->defrag, +                                gf_defrag_stop (conf,                                                  GF_DEFRAG_STATUS_FAILED, NULL);                          } else {                                  kill (getpid(), SIGTERM); @@ -9136,7 +9136,7 @@ dht_notify (xlator_t *this, int event, void *data, ...)                          if ((cmd == GF_DEFRAG_CMD_STATUS) ||                              (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||                              (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) -                                gf_defrag_status_get (defrag, output); +                                gf_defrag_status_get (conf, output);                          else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)                                  gf_defrag_start_detach_tier(defrag);                          else if (cmd == GF_DEFRAG_CMD_DETACH_START) @@ -9144,7 +9144,7 @@ dht_notify (xlator_t *this, int event, void *data, ...)                          else if (cmd == GF_DEFRAG_CMD_STOP ||                                   cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER ||                                   cmd == GF_DEFRAG_CMD_DETACH_STOP) -                                gf_defrag_stop (defrag, +                                gf_defrag_stop (conf,                                                  GF_DEFRAG_STATUS_STOPPED, output);                          else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)                                  ret = gf_defrag_pause_tier (this, defrag); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index e5076146c22..3429851804b 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -507,6 +507,7 @@ struct gf_defrag_info_ {          uint64_t                     num_files_lookedup;          uint64_t                     total_failures;          uint64_t                     skipped; +        uint64_t                     num_dirs_processed;          gf_lock_t                    lock;          int                          cmd;          pthread_t                    th; @@ -1153,7 +1154,7 @@ int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                       struct iatt *postparent, dict_t *xdata);  int -gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict); +gf_defrag_status_get (dht_conf_t *conf, dict_t *dict);  void  gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state); @@ -1174,7 +1175,7 @@ int  gf_defrag_start_detach_tier (gf_defrag_info_t *defrag);  int -gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, +gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status,                  dict_t *output);  void* diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index a7c3ce68d79..ee4e7df61d0 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -3315,6 +3315,11 @@ out:                  ret = 2;          } +        /* It does not matter if it errored out - this number is +         * used to calculate rebalance estimated time to complete. +         * No locking required as dirs are processed by a single thread. +         */ +        defrag->num_dirs_processed++;          return ret;  }  int @@ -4431,7 +4436,7 @@ out:          LOCK (&defrag->lock);          {                  status = dict_new (); -                gf_defrag_status_get (defrag, status); +                gf_defrag_status_get (conf, status);                  if (ctx && ctx->notify)                          ctx->notify (GF_EN_DEFRAG_STATUS, status);                  if (status) @@ -4513,8 +4518,66 @@ out:          return NULL;  } + +uint64_t +gf_defrag_get_estimates (dht_conf_t *conf) +{ +        gf_defrag_info_t *defrag = NULL; +        double rate_lookedup = 0; +        uint64_t dirs_processed = 0; +        uint64_t total_processed = 0; +        uint64_t tmp_count = 0; +        uint64_t time_to_complete = 0; +        struct timeval end = {0,}; +        double   elapsed = 0; + +        defrag = conf->defrag; + +        if (!g_totalfiles) +                return 0; + +        gettimeofday (&end, NULL); +        elapsed = end.tv_sec - defrag->start_time.tv_sec; + +        /* I tried locking before accessing num_files_lookedup and +         * num_dirs_processed but the status function +         * never seemed to get the lock, causing the status cli to +         * hang. +         */ + +        dirs_processed = defrag->num_dirs_processed; + +        total_processed = defrag->num_files_lookedup +                           + dirs_processed; + +        /* rate at which files looked up */ +        rate_lookedup = (total_processed)/elapsed; + + +        /* We initially sum up dirs across all local subvols. +         * The same directories will be counted for each subvol so +         * we want to ensure that they are only counted once. +         */ + +        tmp_count = g_totalfiles +                     - (dirs_processed * (conf->local_subvols_cnt - 1)); + +        if (total_processed > g_totalfiles) +                g_totalfiles = total_processed + 10000; + +        time_to_complete = (tmp_count)/rate_lookedup; + +        gf_log (THIS->name, GF_LOG_INFO, +                "TIME: total_processed=%"PRIu64" tmp_cnt = %"PRIu64"," +                "rate_lookedup=%f", total_processed, tmp_count, +                rate_lookedup); + +        return time_to_complete; +} + +  int -gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) +gf_defrag_status_get (dht_conf_t *conf, dict_t *dict)  {          int      ret    = 0;          uint64_t files  = 0; @@ -4526,11 +4589,10 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)          uint64_t demoted = 0;          char    *status = "";          double   elapsed = 0; -        uint64_t time_left = 0; -        uint64_t time_to_complete = 0; -        double rate_lookedup = 0;          struct timeval end = {0,}; - +        uint64_t time_to_complete = 0; +        uint64_t time_left = 0; +        gf_defrag_info_t *defrag = conf->defrag;          if (!defrag)                  goto out; @@ -4551,34 +4613,20 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)          elapsed = end.tv_sec - defrag->start_time.tv_sec; -/*START */ - -/* rate at which files looked up */ -          if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) -                && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) -                && g_totalfiles) { +                && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { -                rate_lookedup = (defrag->num_files_lookedup)/elapsed; -                if (defrag->num_files_lookedup > g_totalfiles) -                        g_totalfiles = defrag->num_files_lookedup + 10000; -                time_to_complete = (g_totalfiles)/rate_lookedup; +                time_to_complete = gf_defrag_get_estimates (conf);                  time_left = time_to_complete - elapsed;                  gf_log (THIS->name, GF_LOG_INFO, -                        "TIME: num_files_lookedup=%"PRIu64",elapsed time = %f," -                        "rate_lookedup=%f", defrag->num_files_lookedup, elapsed, -                        rate_lookedup); -                gf_log (THIS->name, GF_LOG_INFO,                          "TIME: Estimated total time to complete = %"PRIu64 -                        " seconds", time_to_complete); +                        " seconds, seconds left = %"PRIu64"", +                        time_to_complete, time_left); -                gf_log (THIS->name, GF_LOG_INFO, -                        "TIME: Seconds left = %"PRIu64" seconds", time_left);          } -/*END */          if (!dict)                  goto log; @@ -4675,6 +4723,7 @@ gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state)          pthread_mutex_unlock (&tier_conf->pause_mutex);  } +  tier_pause_state_t  gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf)  { @@ -4828,12 +4877,14 @@ gf_defrag_start_detach_tier (gf_defrag_info_t *defrag)  }  int -gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, +gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status,                  dict_t *output)  {          /* TODO: set a variable 'stop_defrag' here, it should be checked             in defrag loop */          int     ret = -1; +        gf_defrag_info_t *defrag = conf->defrag; +          GF_ASSERT (defrag);          if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { @@ -4845,7 +4896,7 @@ gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,          defrag->defrag_status = status;          if (output) -                gf_defrag_status_get (defrag, output); +                gf_defrag_status_get (conf, output);          ret = 0;  out:          gf_msg_debug ("", 0, "Returning %d", ret);  | 
