geo-rep: Structured log support

Changed all log messages to structured log format Change-Id: Idae25f8b4ad0bbae38f4362cbda7bbf51ce7607b Updates: #240 Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: https://review.gluster.org/17551 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Kotresh HR <khiremat@redhat.com>
author: Aravinda VK <avishwan@redhat.com> 2017-06-15 18:09:36 +0530
committer: Aravinda VK <avishwan@redhat.com> 2017-06-20 06:00:47 +0000
commit: 0a8dac38ac4415ea770fb36b34e3c494e8713e6e (patch)
tree: b66cd2f3583466bfc2eeb16bdf724e4494cdbc3e /geo-replication/syncdaemon/monitor.py
parent: 52d0886cfbcdfd69fa0cac0a6d51cd8811d8c6d7 (diff)
1 files changed, 35 insertions, 26 deletions
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index c54c07d600c..b65f1948050 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -22,7 +22,7 @@ from errno import ECHILD, ESRCH
 import re
 import random
 from gconf import gconf
-from syncdutils import select, waitpid, errno_wrap
+from syncdutils import select, waitpid, errno_wrap, lf
 from syncdutils import set_term_handler, is_host_local, GsyncdError
 from syncdutils import escape, Thread, finalize, memoize
 from syncdutils import gf_event, EVENT_GEOREP_FAULTY
@@ -63,15 +63,17 @@ def get_slave_bricks_status(host, vol):
     po.wait()
     po.terminate_geterr(fail_on_err=False)
     if po.returncode != 0:
-        logging.info("Volume status command failed, unable to get "
-                     "list of up nodes of %s, returning empty list: %s" %
-                     (vol, po.returncode))
+        logging.info(lf("Volume status command failed, unable to get "
+                        "list of up nodes, returning empty list",
+                        volume=vol,
+                        error=po.returncode))
         return []
     vi = XET.fromstring(vix)
     if vi.find('opRet').text != '0':
-        logging.info("Unable to get list of up nodes of %s, "
-                     "returning empty list: %s" %
-                     (vol, vi.find('opErrstr').text))
+        logging.info(lf("Unable to get list of up nodes, "
+                        "returning empty list",
+                        volume=vol,
+                        error=vi.find('opErrstr').text))
         return []
 
     up_hosts = set()
@@ -81,8 +83,10 @@ def get_slave_bricks_status(host, vol):
             if el.find('status').text == '1':
                 up_hosts.add(el.find('hostname').text)
     except (ParseError, AttributeError, ValueError) as e:
-        logging.info("Parsing failed to get list of up nodes of %s, "
-                     "returning empty list: %s" % (vol, e))
+        logging.info(lf("Parsing failed to get list of up nodes, "
+                        "returning empty list",
+                        volume=vol,
+                        error=e))
 
     return list(up_hosts)
 
@@ -271,8 +275,9 @@ class Monitor(object):
             # Spawn the worker and agent in lock to avoid fd leak
             self.lock.acquire()
 
-            logging.info('starting gsyncd worker(%s). Slave node: %s' %
-                         (w[0]['dir'], remote_host))
+            logging.info(lf('starting gsyncd worker',
+                            brick=w[0]['dir'],
+                            slave_node=remote_host))
 
             # Couple of pipe pairs for RPC communication b/w
             # worker and changelog agent.
@@ -336,15 +341,16 @@ class Monitor(object):
 
                 if ret_agent is not None:
                     # Agent is died Kill Worker
-                    logging.info("Changelog Agent died, "
-                                 "Aborting Worker(%s)" % w[0]['dir'])
+                    logging.info(lf("Changelog Agent died, Aborting Worker",
+                                    brick=w[0]['dir']))
                     errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                     nwait(cpid)
                     nwait(apid)
 
                 if ret is not None:
-                    logging.info("worker(%s) died before establishing "
-                                 "connection" % w[0]['dir'])
+                    logging.info(lf("worker died before establishing "
+                                    "connection",
+                                    brick=w[0]['dir']))
                     nwait(apid)  # wait for agent
                 else:
                     logging.debug("worker(%s) connected" % w[0]['dir'])
@@ -353,15 +359,16 @@ class Monitor(object):
                         ret_agent = nwait(apid, os.WNOHANG)
 
                         if ret is not None:
-                            logging.info("worker(%s) died in startup "
-                                         "phase" % w[0]['dir'])
+                            logging.info(lf("worker died in startup phase",
+                                            brick=w[0]['dir']))
                             nwait(apid)  # wait for agent
                             break
 
                         if ret_agent is not None:
                             # Agent is died Kill Worker
-                            logging.info("Changelog Agent died, Aborting "
-                                         "Worker(%s)" % w[0]['dir'])
+                            logging.info(lf("Changelog Agent died, Aborting "
+                                            "Worker",
+                                            brick=w[0]['dir']))
                             errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                             nwait(cpid)
                             nwait(apid)
@@ -369,13 +376,15 @@ class Monitor(object):
 
                         time.sleep(1)
             else:
-                logging.info("worker(%s) not confirmed in %d sec, aborting it. "
-                             "Gsyncd invocation on remote slave via SSH or "
-                             "gluster master mount might have hung. Please "
-                             "check the above logs for exact issue and check "
-                             "master or slave volume for errors. Restarting "
-                             "master/slave volume accordingly might help."
-                             % (w[0]['dir'], conn_timeout))
+                logging.info(
+                    lf("Worker not confirmed after wait, aborting it. "
+                       "Gsyncd invocation on remote slave via SSH or "
+                       "gluster master mount might have hung. Please "
+                       "check the above logs for exact issue and check "
+                       "master or slave volume for errors. Restarting "
+                       "master/slave volume accordingly might help.",
+                       brick=w[0]['dir'],
+                       timeout=conn_timeout))
                 errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                 nwait(apid)  # wait for agent
                 ret = nwait(cpid)
author	Aravinda VK <avishwan@redhat.com>	2017-06-15 18:09:36 +0530
committer	Aravinda VK <avishwan@redhat.com>	2017-06-20 06:00:47 +0000
commit	0a8dac38ac4415ea770fb36b34e3c494e8713e6e (patch)
tree	b66cd2f3583466bfc2eeb16bdf724e4494cdbc3e /geo-replication/syncdaemon/monitor.py
parent	52d0886cfbcdfd69fa0cac0a6d51cd8811d8c6d7 (diff)