summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorN Balachandran <nbalacha@redhat.com>2017-07-24 18:27:39 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-07-31 17:20:17 +0000
commit4da056a30214ad4563bdfd24ffce724b740c6b4f (patch)
tree8c2ca9cbea2d66798bacd71ce2affc89e2748977
parent8cf3fc7b27c4f3737a1f969056dd3fa2223a9892 (diff)
cluster/dht: Fix negative rebalance estimates
The calculation of the rebalance estimates will start after the rebalance operation has been running for 10 minutes. This patch also changes the cli rebalance status code to use unsigned variables for the time calculations. > BUG: 1457985 > Signed-off-by: N Balachandran <nbalacha@redhat.com> > Reviewed-on: https://review.gluster.org/17863 > Reviewed-by: Amar Tumballi <amarts@redhat.com> > Smoke: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Atin Mukherjee <amukherj@redhat.com> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> (cherry picked from commit e21c915679244ddc1fae886e52badf02b4d95efc) Change-Id: Ic76f517c59ad938a407f1cf5e3b9add571690a6c BUG: 1475399 Signed-off-by: N Balachandran <nbalacha@redhat.com> Reviewed-on: https://review.gluster.org/17882 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
-rw-r--r--cli/src/cli-rpc-ops.c86
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c28
2 files changed, 79 insertions, 35 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index d6abd8f253a..ff44a22be78 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -19,6 +19,10 @@
#define INDENT_MAIN_HEAD "%-25s %s "
+/* Do not show estimates if greater than this number */
+#define REBAL_ESTIMATE_SEC_UPPER_LIMIT (60*24*3600)
+#define REBAL_ESTIMATE_START_TIME 600
+
#include "cli.h"
#include "compat-errno.h"
#include "cli-cmd.h"
@@ -1589,27 +1593,28 @@ int
gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
gf_boolean_t is_tier)
{
- int ret = -1;
- int count = 0;
- int i = 1;
- char key[256] = {0,};
- gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- char *node_name = NULL;
- uint64_t failures = 0;
- uint64_t skipped = 0;
- double elapsed = 0;
- char *status_str = NULL;
- char *size_str = NULL;
- int hrs = 0;
- int min = 0;
- int sec = 0;
- gf_boolean_t down = _gf_false;
- gf_boolean_t fix_layout = _gf_false;
- uint64_t max_time = 0;
- uint64_t time_left = 0;
+ int ret = -1;
+ int count = 0;
+ int i = 1;
+ char key[256] = {0,};
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ char *node_name = NULL;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *status_str = NULL;
+ char *size_str = NULL;
+ int32_t hrs = 0;
+ uint32_t min = 0;
+ uint32_t sec = 0;
+ gf_boolean_t down = _gf_false;
+ gf_boolean_t fix_layout = _gf_false;
+ uint64_t max_time = 0;
+ uint64_t time_left = 0;
+ gf_boolean_t show_estimates = _gf_false;
ret = dict_get_int32 (dict, "count", &count);
@@ -1688,6 +1693,8 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
continue;
+ if (GF_DEFRAG_STATUS_STARTED == status_rcd)
+ show_estimates = _gf_true;
snprintf (key, 256, "node-name-%d", i);
ret = dict_get_str (dict, key, &node_name);
@@ -1747,6 +1754,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
if (ret)
gf_log ("cli", GF_LOG_TRACE,
"failed to get time left");
+
if (time_left > max_time)
max_time = time_left;
@@ -1757,8 +1765,8 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
status_str = cli_vol_task_status_str[status_rcd];
size_str = gf_uint64_2human_readable(size);
hrs = elapsed / 3600;
- min = ((int) elapsed % 3600) / 60;
- sec = ((int) elapsed % 3600) % 60;
+ min = ((uint64_t) elapsed % 3600) / 60;
+ sec = ((uint64_t) elapsed % 3600) % 60;
if (fix_layout) {
cli_out ("%35s %50s %8d:%d:%d", node_name, status_str,
@@ -1785,12 +1793,36 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
" Please check the nodes that are down using \'gluster"
" peer status\' and start the glusterd on those nodes,"
" else tier detach commit might fail!");
+
+ /* Max time will be non-zero if rebalance is still running */
if (max_time) {
hrs = max_time / 3600;
- min = ((int) max_time % 3600) / 60;
- sec = ((int) max_time % 3600) % 60;
- cli_out ("Estimated time left for rebalance to complete :"
- " %8d:%02d:%02d", hrs, min, sec);
+ min = (max_time % 3600) / 60;
+ sec = (max_time % 3600) % 60;
+
+ if (hrs < REBAL_ESTIMATE_SEC_UPPER_LIMIT) {
+ cli_out ("Estimated time left for rebalance to "
+ "complete : %8d:%02d:%02d", hrs, min, sec);
+ } else {
+ cli_out ("Estimated time left for rebalance to "
+ "complete : > 2 months. Please try again "
+ "later.");
+ }
+ } else {
+ /* Rebalance will return 0 if it could not calculate the
+ * estimates or if it is complete.
+ */
+ if (!show_estimates) {
+ goto out;
+ }
+ if (elapsed <= REBAL_ESTIMATE_START_TIME) {
+ cli_out ("The estimated time for rebalance to complete "
+ "will be unavailable for the first 10 "
+ "minutes.");
+ } else {
+ cli_out ("Rebalance estimated time unavailable. Please "
+ "try again later.");
+ }
}
out:
return ret;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 266dc5a99a9..f1d32a9a00c 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -18,13 +18,14 @@
#include <signal.h>
#include "events.h"
-#define GF_DISK_SECTOR_SIZE 512
+#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
-#define MAX_MIGRATE_QUEUE_COUNT 500
-#define MIN_MIGRATE_QUEUE_COUNT 200
-#define MAX_REBAL_TYPE_SIZE 16
-#define FILE_CNT_INTERVAL 600 /* 10 mins */
+#define MAX_MIGRATE_QUEUE_COUNT 500
+#define MIN_MIGRATE_QUEUE_COUNT 200
+#define MAX_REBAL_TYPE_SIZE 16
+#define FILE_CNT_INTERVAL 600 /* 10 mins */
+#define ESTIMATE_START_INTERVAL 600 /* 10 mins */
#ifndef MAX
#define MAX(a, b) (((a) > (b))?(a):(b))
@@ -2972,7 +2973,6 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
!strcmp (df_entry->d_name, ".."))
continue;
-
if (IA_ISDIR (df_entry->d_stat.ia_type)) {
defrag->size_processed += df_entry->d_stat.ia_size;
continue;
@@ -4723,6 +4723,19 @@ gf_defrag_get_estimates_based_on_size (dht_conf_t *conf)
gettimeofday (&now, NULL);
elapsed = now.tv_sec - defrag->start_time.tv_sec;
+ /* Don't calculate the estimates for the first 10 minutes.
+ * It is unlikely to be accurate and estimates are not required
+ * if the process finishes in less than 10 mins.
+ */
+
+ if (elapsed < ESTIMATE_START_INTERVAL) {
+ gf_msg (THIS->name, GF_LOG_INFO, 0, 0,
+ "Rebalance estimates will not be available for the "
+ "first %d seconds.", ESTIMATE_START_INTERVAL);
+
+ goto out;
+ }
+
total_processed = defrag->size_processed;
/* rate at which files processed */
@@ -4734,7 +4747,6 @@ gf_defrag_get_estimates_based_on_size (dht_conf_t *conf)
time_to_complete = (tmp_count)/rate_processed;
} else {
-
gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
"Unable to calculate estimated time for rebalance");
}
@@ -4880,8 +4892,8 @@ gf_defrag_status_get (dht_conf_t *conf, dict_t *dict)
"TIME: Estimated total time to complete based on"
" count = %"PRIu64 " seconds, seconds left = %"PRIu64"",
time_to_complete, time_left);
-
*/
+
time_to_complete = gf_defrag_get_estimates_based_on_size (conf);
if (time_to_complete && (time_to_complete > elapsed))