summaryrefslogtreecommitdiffstats
path: root/geo-replication
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2015-07-03 16:32:56 +0530
committerVenky Shankar <vshankar@redhat.com>2015-07-28 22:09:46 -0700
commit62c2e7f8b9211ba149368d26f772f175fe51b43b (patch)
treee2cc105e1d9af5914c0050ee1f548a41361bf98a /geo-replication
parent8f04ec33bc86aa464a5f8b77f9d64e5608cb6f1b (diff)
geo-rep: Fix history failure
Both ACTIVE and PASSIVE workers register to changelog at almost same time. When PASSIVE worker becomes ACTIVE, the start and end time would be current stime and register_time repectively for history API. Hence register_time would be less then stime for which history obviously fails. But it will be successful for the next restart as new register_time > stime. Fix is to pass current time as the end time to history call instead of the register_time. Also improvised the logging for ACTIVE/PASSIVE switching. Change-Id: Idc08b4b55c7a4c575ba44918a98389164ccbee8f BUG: 1239044 Signed-off-by: Kotresh HR <khiremat@redhat.com> Reviewed-on: http://review.gluster.org/11524 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Aravinda VK <avishwan@redhat.com> Reviewed-by: Venky Shankar <vshankar@redhat.com>
Diffstat (limited to 'geo-replication')
-rw-r--r--geo-replication/syncdaemon/gconf.py6
-rw-r--r--geo-replication/syncdaemon/master.py18
2 files changed, 19 insertions, 5 deletions
diff --git a/geo-replication/syncdaemon/gconf.py b/geo-replication/syncdaemon/gconf.py
index 1fc7c381bc4..39a70a650b3 100644
--- a/geo-replication/syncdaemon/gconf.py
+++ b/geo-replication/syncdaemon/gconf.py
@@ -21,5 +21,11 @@ class GConf(object):
log_exit = False
permanent_handles = []
log_metadata = {}
+ """One variable is sufficient to track the
+ switching of worker to ACTIVE. Two variables
+ are intentionally used to track worker going
+ to PASSIVE as well mainly for debugging"""
+ active_earlier = False
+ passive_earlier = False
gconf = GConf()
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 5b8abc5fd9f..1bc2450c101 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -441,6 +441,7 @@ class GMasterCommon(object):
t.start()
def mgmt_lock(self):
+
"""Take management volume lock """
fd = None
bname = str(self.uuid) + "_" + str(gconf.slave_id) + "_subvol_" \
@@ -473,10 +474,16 @@ class GMasterCommon(object):
os.close(fd)
if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
# cannot grab, it's taken
- logging.debug("Lock held by someother worker process")
+ if not gconf.passive_earlier:
+ gconf.passive_earlier = True
+ logging.info("Didn't get lock : %s : Becoming PASSIVE"
+ % gconf.local_path)
return False
raise
- logging.debug("Got the lock")
+
+ if not gconf.active_earlier:
+ gconf.active_earlier = True
+ logging.info("Got lock : %s : Becoming ACTIVE" % gconf.local_path)
return True
def should_crawl(self):
@@ -1123,8 +1130,9 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
self.status.set_worker_crawl_status("History Crawl")
purge_time = self.get_purge_time()
- logging.info('starting history crawl... turns: %s, stime: %s'
- % (self.history_turns, repr(purge_time)))
+ end_time = int(time.time())
+ logging.info('starting history crawl... turns: %s, stime: %s, etime: %s'
+ % (self.history_turns, repr(purge_time), repr(end_time)))
if not purge_time or purge_time == URXTIME:
logging.info("stime not available, abandoning history crawl")
@@ -1138,7 +1146,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
ret, actual_end = self.changelog_agent.history(
changelog_path,
purge_time[0],
- self.changelog_register_time,
+ end_time,
int(gconf.sync_jobs))
# scan followed by getchanges till scan returns zero.