summaryrefslogtreecommitdiffstats
path: root/geo-replication
diff options
context:
space:
mode:
Diffstat (limited to 'geo-replication')
-rw-r--r--geo-replication/syncdaemon/monitor.py16
1 files changed, 12 insertions, 4 deletions
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index f3700c1a390..0bde216d761 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -189,21 +189,22 @@ class Monitor(object):
self.lock.release()
os.close(pw)
- t0 = time.time()
- so = select((pr,), (), (), conn_timeout)[0]
- os.close(pr)
-
# close all RPC pipes in monitor
os.close(ra)
os.close(wa)
os.close(rw)
os.close(ww)
+ t0 = time.time()
+ so = select((pr,), (), (), conn_timeout)[0]
+ os.close(pr)
+
if so:
ret = nwait(cpid, os.WNOHANG)
if ret is not None:
logging.info("worker(%s) died before establishing "
"connection" % w[0])
+ nwait(apid) #wait for agent
else:
logging.debug("worker(%s) connected" % w[0])
while time.time() < t0 + conn_timeout:
@@ -211,15 +212,20 @@ class Monitor(object):
if ret is not None:
logging.info("worker(%s) died in startup "
"phase" % w[0])
+ nwait(apid) #wait for agent
break
time.sleep(1)
else:
logging.info("worker(%s) not confirmed in %d sec, "
"aborting it" % (w[0], conn_timeout))
os.kill(cpid, signal.SIGKILL)
+ nwait(apid) #wait for agent
ret = nwait(cpid)
if ret is None:
self.set_state(self.ST_STABLE, w)
+ #If worker dies, agent terminates on EOF.
+ #So lets wait for agent first.
+ nwait(apid)
ret = nwait(cpid)
if exit_signalled(ret):
ret = 0
@@ -249,6 +255,8 @@ class Monitor(object):
self.lock.acquire()
for cpid in cpids:
os.kill(cpid, signal.SIGKILL)
+ for apid in agents:
+ os.kill(apid, signal.SIGKILL)
self.lock.release()
finalize(exval=1)
t = Thread(target=wmon, args=[wx])