From 1569424d1425a2f81c428d3968cd103ab2cad49e Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Tue, 15 Feb 2011 10:52:32 +0000 Subject: syncdaemon: change pidfile handling approach Signal handling of the python interpreter is a bit messy, so we cannot rely on executing a final clause upon termination. Switch over fcntl lock based pidfile handling which can provide reliable info about status of the process. (Due to aforementioned reason, pidfile as such is not guaranteed to be cleaned up, but lock acquisition is a reliable measure.) Signed-off-by: Csaba Henk Signed-off-by: Anand V. Avati BUG: 1570 (geosync related changes) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1570 --- xlators/features/marker/utils/syncdaemon/gconf.py | 1 + xlators/features/marker/utils/syncdaemon/gsyncd.py | 73 ++++++++++++++-------- .../features/marker/utils/syncdaemon/resource.py | 2 +- 3 files changed, 48 insertions(+), 28 deletions(-) (limited to 'xlators') diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py index 7bedce5148a..cec5be0789b 100644 --- a/xlators/features/marker/utils/syncdaemon/gconf.py +++ b/xlators/features/marker/utils/syncdaemon/gconf.py @@ -4,6 +4,7 @@ class GConf(object): ssh_ctl_dir = None ssh_ctl_args = None cpid = None + permanent_handles = [] @classmethod def setup_ssh_ctl(cls, ctld): diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py index 8a91c5ef959..b8b92056b54 100644 --- a/xlators/features/marker/utils/syncdaemon/gsyncd.py +++ b/xlators/features/marker/utils/syncdaemon/gsyncd.py @@ -9,9 +9,10 @@ import signal import select import shutil import optparse +import fcntl from optparse import OptionParser, SUPPRESS_HELP from logging import Logger -from errno import EEXIST, ENOENT +from errno import EEXIST, ENOENT, EACCES, EAGAIN from gconf import gconf from configinterface import GConffile @@ -53,26 +54,42 @@ class GLogger(Logger): logging.basicConfig(**lprm) -def startup(**kw): - def write_pid(fn): - fd = None +def grabfile(fname, content=None): + # damn those messy open() mode codes + fd = os.open(fname, os.O_CREAT|os.O_RDWR) + f = os.fdopen(fd, 'r+b', 0) + try: + fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB) + except: + ex = sys.exc_info()[1] + f.close() + if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN): + # cannot grab, it's taken + return + raise + if content: try: - fd = os.open(fn, os.O_CREAT|os.O_TRUNC|os.O_WRONLY|os.O_EXCL) - os.write(fd, str(os.getpid()) + '\n') - finally: - if fd: - os.close(fd) + f.truncate() + f.write(content) + except: + f.close() + raise + gconf.permanent_handles.append(f) + return f + +def grabpidfile(fname=None, setpid=True): + if not fname: + fname = gconf.pid_file + content = None + if setpid: + content = str(os.getpid()) + '\n' + return grabfile(fname, content=content) +def startup(**kw): if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn': - try: - write_pid(gconf.pid_file) - except OSError: - gconf.pid_file = None - ex = sys.exc_info()[1] - if ex.errno == EEXIST: - sys.stderr.write("pidfile is taken, exiting.\n") - exit(2) - raise + if not grabpidfile(): + sys.stderr.write("pidfile is taken, exiting.\n") + exit(2) if kw.get('go_daemon') == 'should': x, y = os.pipe() @@ -86,7 +103,8 @@ def startup(**kw): for f in (sys.stdin, sys.stdout, sys.stderr): os.dup2(dn, f.fileno()) if getattr(gconf, 'pid_file', None): - write_pid(gconf.pid_file + '.tmp') + if not grabpidfile(gconf.pid_file + '.tmp'): + raise RuntimeError("cannot grap temporary pidfile") os.rename(gconf.pid_file + '.tmp', gconf.pid_file) # wait for parent to terminate # so we can start up with @@ -102,20 +120,21 @@ def startup(**kw): def finalize(*a): if getattr(gconf, 'pid_file', None): + rm_pidf = True if gconf.cpid: + # exit path from parent branch of daemonization + rm_pidf = False while True: - f = open(gconf.pid_file) - pid = f.read() - f.close() - pid = int(pid.strip()) - if pid == gconf.cpid: + f = grabpidfile(setpid=False) + if not f: + # child has already taken over pidfile break - if pid != os.getpid(): - raise RuntimeError("corrupt pidfile") if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid: + # child has terminated + rm_pidf = True break; time.sleep(0.1) - else: + if rm_pidf: try: os.unlink(gconf.pid_file) except: diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py index 1005e408680..6697ab8c485 100644 --- a/xlators/features/marker/utils/syncdaemon/resource.py +++ b/xlators/features/marker/utils/syncdaemon/resource.py @@ -433,7 +433,7 @@ class SSH(AbstractUrl, SlaveRemote): repce.recv(inf) # hack hack hack: store a global reference to the file # to save it from getting GC'd which implies closing it - gconf._in_fd_reference = inf + gconf.permanent_handles.append(inf) self.fd_pair = (i, o) return 'should' -- cgit