summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCsaba Henk <csaba@gluster.com>2011-04-15 09:10:03 +0000
committerAnand Avati <avati@gluster.com>2011-04-16 04:10:32 -0700
commitf727e847133e479a37e86a7feb5826496a7313f7 (patch)
tree76a13334b590fc6c82d09877a8fb919676385fb3
parent43d95c00ee49cb4852d199f11e2647a5930b07c8 (diff)
syncdaemon: yet another try to exit properly
The final cleanup sequence + call to _exit, which was just done in the main thread, now is called for in each thread when the thread crashes. Seems we aren't left there hanging this way. Signed-off-by: Csaba Henk <csaba@gluster.com> Signed-off-by: Anand Avati <avati@gluster.com> BUG: 2736 (gsyncd hangs if crash occurs in the non-main thread) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2736
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py1
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py86
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py88
3 files changed, 92 insertions, 83 deletions
diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py
index cec5be078..24165b619 100644
--- a/xlators/features/marker/utils/syncdaemon/gconf.py
+++ b/xlators/features/marker/utils/syncdaemon/gconf.py
@@ -4,6 +4,7 @@ class GConf(object):
ssh_ctl_dir = None
ssh_ctl_args = None
cpid = None
+ pid_file_owned = False
permanent_handles = []
@classmethod
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py
index 0ed120e03..ba4d7a6dd 100644
--- a/xlators/features/marker/utils/syncdaemon/gsyncd.py
+++ b/xlators/features/marker/utils/syncdaemon/gsyncd.py
@@ -7,15 +7,14 @@ import time
import logging
import signal
import select
-import shutil
import optparse
import fcntl
from optparse import OptionParser, SUPPRESS_HELP
from logging import Logger
-from errno import EEXIST, ENOENT, EACCES, EAGAIN
+from errno import EEXIST, ENOENT
from gconf import gconf
-from syncdutils import FreeObject, norm
+from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception
from configinterface import GConffile
import resource
from monitor import monitor
@@ -53,47 +52,12 @@ class GLogger(Logger):
logging.basicConfig(**lprm)
-def grabfile(fname, content=None):
- # damn those messy open() mode codes
- fd = os.open(fname, os.O_CREAT|os.O_RDWR)
- f = os.fdopen(fd, 'r+b', 0)
- try:
- fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
- except:
- ex = sys.exc_info()[1]
- f.close()
- if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
- # cannot grab, it's taken
- return
- raise
- if content:
- try:
- f.truncate()
- f.write(content)
- except:
- f.close()
- raise
- gconf.permanent_handles.append(f)
- return f
-
-pid_file_owned = False
-
-def grabpidfile(fname=None, setpid=True):
- if not fname:
- fname = gconf.pid_file
- content = None
- if setpid:
- content = str(os.getpid()) + '\n'
- return grabfile(fname, content=content)
-
def startup(**kw):
- global pid_file_owned
-
if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn':
if not grabpidfile():
sys.stderr.write("pidfile is taken, exiting.\n")
sys.exit(2)
- pid_file_owned = True
+ gconf.pid_file_owned = True
if kw.get('go_daemon') == 'should':
x, y = os.pipe()
@@ -129,57 +93,19 @@ def startup(**kw):
lkw['filename'] = kw['log_file']
GLogger.setup(label=kw.get('label'), **lkw)
-def finalize(*a):
- if getattr(gconf, 'pid_file', None):
- rm_pidf = pid_file_owned
- if gconf.cpid:
- # exit path from parent branch of daemonization
- rm_pidf = False
- while True:
- f = grabpidfile(setpid=False)
- if not f:
- # child has already taken over pidfile
- break
- if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid:
- # child has terminated
- rm_pidf = True
- break;
- time.sleep(0.1)
- if rm_pidf:
- try:
- os.unlink(gconf.pid_file)
- except:
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- pass
- else:
- raise
- if gconf.ssh_ctl_dir and not gconf.cpid:
- shutil.rmtree(gconf.ssh_ctl_dir)
- sys.stdout.flush()
- sys.stderr.flush()
-
def main():
signal.signal(signal.SIGTERM, lambda *a: (finalize(*a), os._exit(1)))
GLogger.setup()
- exval = 0
+ excont = FreeObject(exval = 0)
try:
try:
main_i()
except:
- exc = sys.exc_info()[1]
- if isinstance(exc, SystemExit):
- exval = exc.code or 0
- raise
- else:
- logging.exception("FAIL: ")
- sys.stderr.write("failed with %s.\n" % type(exc).__name__)
- exval = 1
- sys.exit(exval)
+ log_raise_exception(excont)
finally:
finalize()
# force exit in non-main thread too
- os._exit(exval)
+ os._exit(excont.exval)
def main_i():
rconf = {'go_daemon': 'should'}
diff --git a/xlators/features/marker/utils/syncdaemon/syncdutils.py b/xlators/features/marker/utils/syncdaemon/syncdutils.py
index 48694d238..c8f751d33 100644
--- a/xlators/features/marker/utils/syncdaemon/syncdutils.py
+++ b/xlators/features/marker/utils/syncdaemon/syncdutils.py
@@ -1,7 +1,15 @@
import os
+import sys
+import time
import fcntl
+import shutil
+import logging
from threading import Thread as baseThread
-from signal import SIGTERM
+from errno import EACCES, EAGAIN
+from signal import SIGTERM, SIGKILL
+from time import sleep
+
+from gconf import gconf
try:
# py 3
@@ -49,6 +57,78 @@ def update_file(path, updater, merger = lambda f: True):
if fx:
fx.close()
+def grabfile(fname, content=None):
+ # damn those messy open() mode codes
+ fd = os.open(fname, os.O_CREAT|os.O_RDWR)
+ f = os.fdopen(fd, 'r+b', 0)
+ try:
+ fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
+ except:
+ ex = sys.exc_info()[1]
+ f.close()
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ # cannot grab, it's taken
+ return
+ raise
+ if content:
+ try:
+ f.truncate()
+ f.write(content)
+ except:
+ f.close()
+ raise
+ gconf.permanent_handles.append(f)
+ return f
+
+def grabpidfile(fname=None, setpid=True):
+ if not fname:
+ fname = gconf.pid_file
+ content = None
+ if setpid:
+ content = str(os.getpid()) + '\n'
+ return grabfile(fname, content=content)
+
+def finalize(*a):
+ if getattr(gconf, 'pid_file', None):
+ rm_pidf = gconf.pid_file_owned
+ if gconf.cpid:
+ # exit path from parent branch of daemonization
+ rm_pidf = False
+ while True:
+ f = grabpidfile(setpid=False)
+ if not f:
+ # child has already taken over pidfile
+ break
+ if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid:
+ # child has terminated
+ rm_pidf = True
+ break;
+ time.sleep(0.1)
+ if rm_pidf:
+ try:
+ os.unlink(gconf.pid_file)
+ except:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ pass
+ else:
+ raise
+ if gconf.ssh_ctl_dir and not gconf.cpid:
+ shutil.rmtree(gconf.ssh_ctl_dir)
+ sys.stdout.flush()
+ sys.stderr.flush()
+
+def log_raise_exception(excont):
+ exc = sys.exc_info()[1]
+ if isinstance(exc, SystemExit):
+ excont.exval = exc.code or 0
+ raise
+ else:
+ logging.exception("FAIL: ")
+ sys.stderr.write("failed with %s.\n" % type(exc).__name__)
+ excont.exval = 1
+ sys.exit(excont.exval)
+
class FreeObject(object):
"""wildcard class for which any attribute can be set"""
@@ -63,13 +143,15 @@ class Thread(baseThread):
tf = kw.get('target')
if tf:
def twrap(*aa):
+ excont = FreeObject(exval = 0)
try:
tf(*aa)
except:
try:
- raise
+ log_raise_exception(excont)
finally:
- os.kill(os.getpid(), SIGTERM)
+ finalize()
+ os._exit(excont.exval)
kw['target'] = twrap
baseThread.__init__(self, *a, **kw)
self.setDaemon(True)