diff options
author | Joseph Fernandes <josferna@redhat.com> | 2015-06-06 10:28:51 +0530 |
---|---|---|
committer | Dan Lambright <dlambrig@redhat.com> | 2015-06-27 03:19:42 -0700 |
commit | 8456e5b8cc92b61d340aadcbbcd58af25d302cec (patch) | |
tree | 415a869f1da591d59558c07cb6977d6d42ef25f3 | |
parent | 0c919396be6284f90de53cba9dede98980fa1692 (diff) |
tier/dht: Fixing non atomic promotion/demotion w.r.t to frequency period
This fixes the ping-pong issue i.e files getting demoted immediately
after promition, caused by off-sync promotion/demotion processes.
The solution is do promotion/demotion refering to the system time.
To have the fix working all the file serving nodes should have
thier system time synchronized with each other either manually or
using a NTP Server.
NOTE: The ping-pong issue can re-appear even with this fix, if the admin
have different promotion freq period and demotion freq period, but this
would be under the control of the admin.
Backport of http://review.gluster.org/#/c/11110/ to 3.7.x:
> Change-Id: I1b33a5881d0cac143662ddb48e5b7b653aeb1271
> BUG: 1218717
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
> Reviewed-on: http://review.gluster.org/11110
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
> Tested-by: Dan Lambright <dlambrig@redhat.com>
> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
Change-Id: I81bd1d677487ebc0fc46df4980500102571de68e
BUG: 1230857
Reviewed-on: http://review.gluster.org/11191
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Dan Lambright <dlambrig@redhat.com>
-rw-r--r-- | xlators/cluster/dht/src/tier.c | 99 |
1 files changed, 59 insertions, 40 deletions
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 3e1b20eaedc..c2a9d6b4ebd 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -734,7 +734,7 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist) if (!db_path) { gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, - "Failed to allocate memory for bricklist"); + "Failed. to allocate memory for bricklist"); goto out; } @@ -763,9 +763,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) dict_t *bricklist_cold = NULL; dict_t *bricklist_hot = NULL; dht_conf_t *conf = NULL; - int tick = 0; - int next_demote = 0; - int next_promote = 0; + gfdb_time_t current_time; int freq_promote = 0; int freq_demote = 0; promotion_args_t promotion_args = { 0 }; @@ -775,6 +773,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) int ret = 0; pthread_t promote_thread; pthread_t demote_thread; + gf_boolean_t is_promotion_triggered = _gf_false; + gf_boolean_t is_demotion_triggered = _gf_false; conf = this->private; @@ -789,16 +789,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) tier_get_bricklist (conf->subvolumes[0], bricklist_cold); tier_get_bricklist (conf->subvolumes[1], bricklist_hot); - freq_promote = defrag->tier_promote_frequency; - freq_demote = defrag->tier_demote_frequency; - - next_promote = defrag->tier_promote_frequency % TIMER_SECS; - next_demote = defrag->tier_demote_frequency % TIMER_SECS; - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d demote %d", - next_promote, next_demote); + DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d" + " demote %d", freq_promote, freq_demote); defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; @@ -806,9 +799,6 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) sleep(1); - ret_promotion = -1; - ret_demotion = -1; - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ret = 1; gf_msg (this->name, GF_LOG_ERROR, 0, @@ -820,7 +810,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) { ret = 0; - defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + defrag->defrag_status = + GF_DEFRAG_STATUS_COMPLETE; gf_msg (this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR, "defrag->defrag_cmd == " @@ -828,49 +819,72 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) goto out; } - tick = (tick + 1) % TIMER_SECS; + freq_promote = defrag->tier_promote_frequency; + freq_demote = defrag->tier_demote_frequency; + - if (freq_promote != defrag->tier_promote_frequency) - next_promote = tick; - if (freq_demote != defrag->tier_demote_frequency) - next_demote = tick; + /* To have proper synchronization amongst all + * brick holding nodes, so that promotion and demotions + * start atomicly w.r.t promotion/demotion frequency + * period, all nodes should have thier system time + * in-sync with each other either manually set or + * using a NTP server*/ + ret = gettimeofday (¤t_time, NULL); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get current time"); + goto out; + } - if ((next_demote != tick) && (next_promote != tick)) + is_demotion_triggered = ((current_time.tv_sec % + freq_demote) == 0) ? _gf_true : + _gf_false; + is_promotion_triggered = ((current_time.tv_sec % + freq_promote) == 0) ? _gf_true : + _gf_false; + + /* If no promotion and no demotion is + * scheduled/triggered skip a iteration */ + if (!is_promotion_triggered && !is_demotion_triggered) continue; - if (next_demote >= tick) { + + ret_promotion = -1; + ret_demotion = -1; + + if (is_demotion_triggered) { demotion_args.this = this; demotion_args.brick_list = bricklist_hot; demotion_args.defrag = defrag; demotion_args.freq_time = freq_demote; - ret_demotion = pthread_create (&demote_thread, NULL, - &tier_demote, &demotion_args); + ret_demotion = pthread_create (&demote_thread, + NULL, &tier_demote, + &demotion_args); if (ret_demotion) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed starting Demotion thread!"); + "Failed starting Demotion " + "thread!"); } - freq_demote = defrag->tier_demote_frequency; - next_demote = (tick + freq_demote) % TIMER_SECS; } - if (next_promote >= tick) { + if (is_promotion_triggered) { promotion_args.this = this; promotion_args.brick_list = bricklist_cold; promotion_args.defrag = defrag; promotion_args.freq_time = freq_promote; - ret_promotion = pthread_create (&promote_thread, NULL, - &tier_promote, &promotion_args); + ret_promotion = pthread_create (&promote_thread, + NULL, &tier_promote, + &promotion_args); if (ret_promotion) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed starting Promotion thread!"); + "Failed starting Promotion " + "thread!"); } - freq_promote = defrag->tier_promote_frequency; - next_promote = (tick + freq_promote) % TIMER_SECS; } - if (ret_demotion == 0) { + if (is_promotion_triggered && (ret_demotion == 0)) { pthread_join (demote_thread, NULL); if (demotion_args.return_value) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -880,7 +894,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) ret_demotion = demotion_args.return_value; } - if (ret_promotion == 0) { + if (is_demotion_triggered && (ret_promotion == 0)) { pthread_join (promote_thread, NULL); if (promotion_args.return_value) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -890,10 +904,15 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) ret_promotion = promotion_args.return_value; } - /*Collect previous and current cummulative status */ - ret = ret | ret_demotion | ret_promotion; + /* Collect previous and current cummulative status */ + /* If demotion was not triggered just pass 0 to ret */ + ret = (is_demotion_triggered) ? ret_demotion : 0; + /* If promotion was not triggered just pass 0 to ret */ + ret = ret | (is_promotion_triggered) ? + ret_promotion : 0; - /*reseting promotion and demotion arguments for next iteration*/ + /* reseting promotion and demotion arguments for + * next iteration*/ memset (&demotion_args, 0, sizeof(demotion_args_t)); memset (&promotion_args, 0, sizeof(promotion_args_t)); |