summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoseph Fernandes <josferna@redhat.com>2015-06-06 10:28:51 +0530
committerDan Lambright <dlambrig@redhat.com>2015-06-27 03:19:42 -0700
commit8456e5b8cc92b61d340aadcbbcd58af25d302cec (patch)
tree415a869f1da591d59558c07cb6977d6d42ef25f3
parent0c919396be6284f90de53cba9dede98980fa1692 (diff)
tier/dht: Fixing non atomic promotion/demotion w.r.t to frequency period
This fixes the ping-pong issue i.e files getting demoted immediately after promition, caused by off-sync promotion/demotion processes. The solution is do promotion/demotion refering to the system time. To have the fix working all the file serving nodes should have thier system time synchronized with each other either manually or using a NTP Server. NOTE: The ping-pong issue can re-appear even with this fix, if the admin have different promotion freq period and demotion freq period, but this would be under the control of the admin. Backport of http://review.gluster.org/#/c/11110/ to 3.7.x: > Change-Id: I1b33a5881d0cac143662ddb48e5b7b653aeb1271 > BUG: 1218717 > Signed-off-by: Joseph Fernandes <josferna@redhat.com> > Reviewed-on: http://review.gluster.org/11110 > Reviewed-by: Dan Lambright <dlambrig@redhat.com> > Tested-by: Dan Lambright <dlambrig@redhat.com> > Tested-by: Gluster Build System <jenkins@build.gluster.com> Signed-off-by: Joseph Fernandes <josferna@redhat.com> Change-Id: I81bd1d677487ebc0fc46df4980500102571de68e BUG: 1230857 Reviewed-on: http://review.gluster.org/11191 Reviewed-by: Niels de Vos <ndevos@redhat.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Dan Lambright <dlambrig@redhat.com>
-rw-r--r--xlators/cluster/dht/src/tier.c99
1 files changed, 59 insertions, 40 deletions
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 3e1b20eaedc..c2a9d6b4ebd 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -734,7 +734,7 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
if (!db_path) {
gf_msg ("tier", GF_LOG_ERROR, 0,
DHT_MSG_LOG_TIER_STATUS,
- "Failed to allocate memory for bricklist");
+ "Failed. to allocate memory for bricklist");
goto out;
}
@@ -763,9 +763,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
dict_t *bricklist_cold = NULL;
dict_t *bricklist_hot = NULL;
dht_conf_t *conf = NULL;
- int tick = 0;
- int next_demote = 0;
- int next_promote = 0;
+ gfdb_time_t current_time;
int freq_promote = 0;
int freq_demote = 0;
promotion_args_t promotion_args = { 0 };
@@ -775,6 +773,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
int ret = 0;
pthread_t promote_thread;
pthread_t demote_thread;
+ gf_boolean_t is_promotion_triggered = _gf_false;
+ gf_boolean_t is_demotion_triggered = _gf_false;
conf = this->private;
@@ -789,16 +789,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
tier_get_bricklist (conf->subvolumes[0], bricklist_cold);
tier_get_bricklist (conf->subvolumes[1], bricklist_hot);
- freq_promote = defrag->tier_promote_frequency;
- freq_demote = defrag->tier_demote_frequency;
-
- next_promote = defrag->tier_promote_frequency % TIMER_SECS;
- next_demote = defrag->tier_demote_frequency % TIMER_SECS;
-
-
gf_msg (this->name, GF_LOG_INFO, 0,
- DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d demote %d",
- next_promote, next_demote);
+ DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d"
+ " demote %d", freq_promote, freq_demote);
defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
@@ -806,9 +799,6 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
sleep(1);
- ret_promotion = -1;
- ret_demotion = -1;
-
if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
ret = 1;
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -820,7 +810,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
ret = 0;
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_COMPLETE;
gf_msg (this->name, GF_LOG_DEBUG, 0,
DHT_MSG_LOG_TIER_ERROR,
"defrag->defrag_cmd == "
@@ -828,49 +819,72 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
goto out;
}
- tick = (tick + 1) % TIMER_SECS;
+ freq_promote = defrag->tier_promote_frequency;
+ freq_demote = defrag->tier_demote_frequency;
+
- if (freq_promote != defrag->tier_promote_frequency)
- next_promote = tick;
- if (freq_demote != defrag->tier_demote_frequency)
- next_demote = tick;
+ /* To have proper synchronization amongst all
+ * brick holding nodes, so that promotion and demotions
+ * start atomicly w.r.t promotion/demotion frequency
+ * period, all nodes should have thier system time
+ * in-sync with each other either manually set or
+ * using a NTP server*/
+ ret = gettimeofday (&current_time, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get current time");
+ goto out;
+ }
- if ((next_demote != tick) && (next_promote != tick))
+ is_demotion_triggered = ((current_time.tv_sec %
+ freq_demote) == 0) ? _gf_true :
+ _gf_false;
+ is_promotion_triggered = ((current_time.tv_sec %
+ freq_promote) == 0) ? _gf_true :
+ _gf_false;
+
+ /* If no promotion and no demotion is
+ * scheduled/triggered skip a iteration */
+ if (!is_promotion_triggered && !is_demotion_triggered)
continue;
- if (next_demote >= tick) {
+
+ ret_promotion = -1;
+ ret_demotion = -1;
+
+ if (is_demotion_triggered) {
demotion_args.this = this;
demotion_args.brick_list = bricklist_hot;
demotion_args.defrag = defrag;
demotion_args.freq_time = freq_demote;
- ret_demotion = pthread_create (&demote_thread, NULL,
- &tier_demote, &demotion_args);
+ ret_demotion = pthread_create (&demote_thread,
+ NULL, &tier_demote,
+ &demotion_args);
if (ret_demotion) {
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_LOG_TIER_ERROR,
- "Failed starting Demotion thread!");
+ "Failed starting Demotion "
+ "thread!");
}
- freq_demote = defrag->tier_demote_frequency;
- next_demote = (tick + freq_demote) % TIMER_SECS;
}
- if (next_promote >= tick) {
+ if (is_promotion_triggered) {
promotion_args.this = this;
promotion_args.brick_list = bricklist_cold;
promotion_args.defrag = defrag;
promotion_args.freq_time = freq_promote;
- ret_promotion = pthread_create (&promote_thread, NULL,
- &tier_promote, &promotion_args);
+ ret_promotion = pthread_create (&promote_thread,
+ NULL, &tier_promote,
+ &promotion_args);
if (ret_promotion) {
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_LOG_TIER_ERROR,
- "Failed starting Promotion thread!");
+ "Failed starting Promotion "
+ "thread!");
}
- freq_promote = defrag->tier_promote_frequency;
- next_promote = (tick + freq_promote) % TIMER_SECS;
}
- if (ret_demotion == 0) {
+ if (is_promotion_triggered && (ret_demotion == 0)) {
pthread_join (demote_thread, NULL);
if (demotion_args.return_value) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -880,7 +894,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
ret_demotion = demotion_args.return_value;
}
- if (ret_promotion == 0) {
+ if (is_demotion_triggered && (ret_promotion == 0)) {
pthread_join (promote_thread, NULL);
if (promotion_args.return_value) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -890,10 +904,15 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
ret_promotion = promotion_args.return_value;
}
- /*Collect previous and current cummulative status */
- ret = ret | ret_demotion | ret_promotion;
+ /* Collect previous and current cummulative status */
+ /* If demotion was not triggered just pass 0 to ret */
+ ret = (is_demotion_triggered) ? ret_demotion : 0;
+ /* If promotion was not triggered just pass 0 to ret */
+ ret = ret | (is_promotion_triggered) ?
+ ret_promotion : 0;
- /*reseting promotion and demotion arguments for next iteration*/
+ /* reseting promotion and demotion arguments for
+ * next iteration*/
memset (&demotion_args, 0, sizeof(demotion_args_t));
memset (&promotion_args, 0, sizeof(promotion_args_t));