From 8e171a03d81452a2998b01997da7647c0b3bf52b Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Mon, 24 Oct 2011 15:29:00 +0530 Subject: cli: add geo-replication log-rotate command Rotating geo-replication master/monitor log files from cli. On invocation, the log file for a given master-slave session is backed up with the current timestamp suffixed to the file name and signal is sent to gsyncd to start logging to a new log file. Sample commands: * Rotate log file for this : session: gluster volume geo-replication log-rotate * Rotate log files for all session for master volume gluster volume geo-replication log-rotate * Rotate log files for all sessions: gluster volume geo-replication log-rotate Change-Id: If801743e9f37bd282e68d262203141626ce77e55 BUG: 3519 Reviewed-on: http://review.gluster.com/653 Tested-by: Gluster Build System Reviewed-by: Csaba Henk --- xlators/features/marker/utils/syncdaemon/gconf.py | 1 + xlators/features/marker/utils/syncdaemon/gsyncd.py | 7 ++-- .../features/marker/utils/syncdaemon/monitor.py | 41 ++++++++++++++++------ xlators/features/marker/utils/syncdaemon/repce.py | 5 ++- .../features/marker/utils/syncdaemon/resource.py | 6 ++-- .../features/marker/utils/syncdaemon/syncdutils.py | 25 ++++++++++++- 6 files changed, 66 insertions(+), 19 deletions(-) (limited to 'xlators/features') diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py index 24165b6191a..803ca2acdde 100644 --- a/xlators/features/marker/utils/syncdaemon/gconf.py +++ b/xlators/features/marker/utils/syncdaemon/gconf.py @@ -6,6 +6,7 @@ class GConf(object): cpid = None pid_file_owned = False permanent_handles = [] + log_metadata = {} @classmethod def setup_ssh_ctl(cls, ctld): diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py index 193af9d5f37..99e2204b2e4 100644 --- a/xlators/features/marker/utils/syncdaemon/gsyncd.py +++ b/xlators/features/marker/utils/syncdaemon/gsyncd.py @@ -6,7 +6,6 @@ import sys import time import logging import signal -import select import optparse import fcntl from optparse import OptionParser, SUPPRESS_HELP @@ -15,6 +14,7 @@ from errno import EEXIST, ENOENT from gconf import gconf from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception +from syncdutils import select from configinterface import GConffile import resource from monitor import monitor @@ -78,7 +78,7 @@ def startup(**kw): # so we can start up with # no messing from the dirty # ol' bustard - select.select((x,), (), ()) + select((x,), (), ()) os.close(x) lkw = {} @@ -93,6 +93,9 @@ def startup(**kw): lkw['filename'] = kw['log_file'] GLogger.setup(label=kw.get('label'), **lkw) + lkw.update({'saved_label': kw.get('label')}) + gconf.log_metadata = lkw + def main(): signal.signal(signal.SIGTERM, lambda *a: finalize(*a, **{'exval': 1})) GLogger.setup() diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py index 365e91435fd..f58aa592c9f 100644 --- a/xlators/features/marker/utils/syncdaemon/monitor.py +++ b/xlators/features/marker/utils/syncdaemon/monitor.py @@ -1,11 +1,10 @@ import os import sys import time +import signal import logging -import select -from signal import SIGKILL from gconf import gconf -from syncdutils import update_file +from syncdutils import update_file, select, waitpid class Monitor(object): @@ -21,6 +20,19 @@ class Monitor(object): update_file(gconf.state_file, lambda f: f.write(state + '\n')) def monitor(self): + def sigcont_handler(*a): + """ + Re-init logging and send group kill signal + """ + md = gconf.log_metadata + logging.shutdown() + lcls = logging.getLoggerClass() + lcls.setup(label=md.get('saved_label'), **md) + pid = os.getpid() + os.kill(-pid, signal.SIGUSR1) + signal.signal(signal.SIGUSR1, lambda *a: ()) + signal.signal(signal.SIGCONT, sigcont_handler) + argv = sys.argv[:] for o in ('-N', '--no-daemon', '--monitor'): while o in argv: @@ -31,11 +43,16 @@ class Monitor(object): self.set_state('starting...') ret = 0 def nwait(p, o=0): - p2, r = os.waitpid(p, o) + p2, r = waitpid(p, o) if not p2: return - if os.WIFEXITED(r): - return os.WEXITSTATUS(r) + return r + def exit_signalled(s): + """ child teminated due to receipt of SIGUSR1 """ + return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1)) + def exit_status(s): + if os.WIFEXITED(s): + return os.WEXITSTATUS(s) return 1 conn_timeout = 60 while ret in (0, 1): @@ -48,7 +65,7 @@ class Monitor(object): os.execv(sys.executable, argv + ['--feedback-fd', str(pw)]) os.close(pw) t0 = time.time() - so = select.select((pr,), (), (), conn_timeout)[0] + so = select((pr,), (), (), conn_timeout)[0] os.close(pr) if so: ret = nwait(cpid, os.WNOHANG) @@ -65,13 +82,17 @@ class Monitor(object): else: logging.debug("worker not confirmed in %d sec, aborting it" % \ conn_timeout) - os.kill(cpid, SIGKILL) + os.kill(cpid, signal.SIGKILL) ret = nwait(cpid) if ret == None: self.set_state('OK') ret = nwait(cpid) - elif ret in (0, 1): - self.set_state('faulty') + if exit_signalled(ret): + ret = 0 + else: + ret = exit_status(ret) + if ret in (0,1): + self.set_state('faulty') time.sleep(10) self.set_state('inconsistent') return ret diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/xlators/features/marker/utils/syncdaemon/repce.py index 47691301e29..02833b0382b 100644 --- a/xlators/features/marker/utils/syncdaemon/repce.py +++ b/xlators/features/marker/utils/syncdaemon/repce.py @@ -1,6 +1,5 @@ import os import sys -import select import time import logging from threading import Condition @@ -20,7 +19,7 @@ except ImportError: # py 3 import pickle -from syncdutils import Thread +from syncdutils import Thread, select pickle_proto = -1 repce_version = 1.0 @@ -114,7 +113,7 @@ class RepceClient(object): def listen(self): while True: - select.select((self.inf,), (), ()) + select((self.inf,), (), ()) rid, exc, res = recv(self.inf) rjob = self.jtab.pop(rid) if rjob.cbk: diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py index 800d297bacd..5f46edd44cf 100644 --- a/xlators/features/marker/utils/syncdaemon/resource.py +++ b/xlators/features/marker/utils/syncdaemon/resource.py @@ -6,7 +6,6 @@ import stat import time import errno import struct -import select import socket import logging import tempfile @@ -17,6 +16,7 @@ import repce from repce import RepceServer, RepceClient from master import GMaster import syncdutils +from syncdutils import select UrlRX = re.compile('\A(\w+)://(.*)') HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I) @@ -206,7 +206,7 @@ class SlaveLocal(object): logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout)) break else: - select.select((), (), ()) + select((), (), ()) class SlaveRemote(object): @@ -455,7 +455,7 @@ class SSH(AbstractUrl, SlaveRemote): i, o = ret inf = os.fdopen(i) repce.send(o, None, '__repce_version__') - select.select((inf,), (), ()) + select((inf,), (), ()) repce.recv(inf) # hack hack hack: store a global reference to the file # to save it from getting GC'd which implies closing it diff --git a/xlators/features/marker/utils/syncdaemon/syncdutils.py b/xlators/features/marker/utils/syncdaemon/syncdutils.py index 4bf51da746e..81c4e2e4be5 100644 --- a/xlators/features/marker/utils/syncdaemon/syncdutils.py +++ b/xlators/features/marker/utils/syncdaemon/syncdutils.py @@ -5,9 +5,11 @@ import fcntl import shutil import logging from threading import Lock, Thread as baseThread -from errno import EACCES, EAGAIN +from errno import EACCES, EAGAIN, EINTR from signal import SIGTERM, SIGKILL from time import sleep +import select as oselect +from os import waitpid as owaitpid from gconf import gconf @@ -158,3 +160,24 @@ class Thread(baseThread): kw['target'] = twrap baseThread.__init__(self, *a, **kw) self.setDaemon(True) + +class GsyncdError(Exception): + pass + +def eintr_wrap(func, exc, *a): + """ + wrapper around syscalls resilient to interrupt caused + by signals + """ + while True: + try: + return func(*a) + except exc, ex: + if not ex[0] == EINTR: + raise GsyncdError(ex[1]) + +def select(*a): + return eintr_wrap(oselect.select, oselect.error, *a) + +def waitpid(*a): + return eintr_wrap(owaitpid, OSError, *a) -- cgit