diff options
Diffstat (limited to 'xlators/features/marker/utils/syncdaemon/monitor.py')
-rw-r--r-- | xlators/features/marker/utils/syncdaemon/monitor.py | 129 |
1 files changed, 0 insertions, 129 deletions
diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py deleted file mode 100644 index b8956dcc2b9..00000000000 --- a/xlators/features/marker/utils/syncdaemon/monitor.py +++ /dev/null @@ -1,129 +0,0 @@ -import os -import sys -import time -import signal -import logging -from gconf import gconf -from syncdutils import update_file, select, waitpid, set_term_handler - -class Monitor(object): - """class which spawns and manages gsyncd workers""" - - def __init__(self): - self.state = None - - def set_state(self, state): - """set the state that can be used by external agents - like glusterd for status reporting""" - if state == self.state: - return - self.state = state - logging.info('new state: %s' % state) - if getattr(gconf, 'state_file', None): - update_file(gconf.state_file, lambda f: f.write(state + '\n')) - - def monitor(self): - """the monitor loop - - Basic logic is a blantantly simple blunt heuristics: - if spawned client survives 60 secs, it's considered OK. - This servers us pretty well as it's not vulneralbe to - any kind of irregular behavior of the child... - - ... well, except for one: if children is hung up on - waiting for some event, it can survive aeons, still - will be defunct. So we tweak the above logic to - expect the worker to send us a signal within 60 secs - (in the form of closing its end of a pipe). The worker - does this when it's done with the setup stage - ready to enter the service loop (note it's the setup - stage which is vulnerable to hangs -- the full - blown worker blows up on EPIPE if the net goes down, - due to the keep-alive thread) - """ - def sigcont_handler(*a): - """ - Re-init logging and send group kill signal - """ - md = gconf.log_metadata - logging.shutdown() - lcls = logging.getLoggerClass() - lcls.setup(label=md.get('saved_label'), **md) - pid = os.getpid() - os.kill(-pid, signal.SIGUSR1) - signal.signal(signal.SIGUSR1, lambda *a: ()) - signal.signal(signal.SIGCONT, sigcont_handler) - - argv = sys.argv[:] - for o in ('-N', '--no-daemon', '--monitor'): - while o in argv: - argv.remove(o) - argv.extend(('-N', '-p', '')) - argv.insert(0, os.path.basename(sys.executable)) - - self.set_state('starting...') - ret = 0 - def nwait(p, o=0): - p2, r = waitpid(p, o) - if not p2: - return - return r - def exit_signalled(s): - """ child teminated due to receipt of SIGUSR1 """ - return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1)) - def exit_status(s): - if os.WIFEXITED(s): - return os.WEXITSTATUS(s) - return 1 - conn_timeout = int(gconf.connection_timeout) - while ret in (0, 1): - logging.info('-' * conn_timeout) - logging.info('starting gsyncd worker') - pr, pw = os.pipe() - cpid = os.fork() - if cpid == 0: - os.close(pr) - os.execv(sys.executable, argv + ['--feedback-fd', str(pw)]) - os.close(pw) - t0 = time.time() - so = select((pr,), (), (), conn_timeout)[0] - os.close(pr) - if so: - ret = nwait(cpid, os.WNOHANG) - if ret != None: - logging.debug("worker died before establishing connection") - else: - logging.debug("worker seems to be connected (?? racy check)") - while time.time() < t0 + conn_timeout: - ret = nwait(cpid, os.WNOHANG) - if ret != None: - logging.debug("worker died in startup phase") - break - time.sleep(1) - else: - logging.debug("worker not confirmed in %d sec, aborting it" % \ - conn_timeout) - # relax one SIGTERM by setting a handler that sets back - # standard handler - set_term_handler(lambda *a: set_term_handler()) - # give a chance to graceful exit - os.kill(-os.getpid(), signal.SIGTERM) - time.sleep(1) - os.kill(cpid, signal.SIGKILL) - ret = nwait(cpid) - if ret == None: - self.set_state('OK') - ret = nwait(cpid) - if exit_signalled(ret): - ret = 0 - else: - ret = exit_status(ret) - if ret in (0,1): - self.set_state('faulty') - time.sleep(10) - self.set_state('inconsistent') - return ret - -def monitor(): - """oh yeah, actually Monitor is used as singleton, too""" - return Monitor().monitor() |