summaryrefslogtreecommitdiffstats
path: root/geo-replication/syncdaemon/monitor.py
diff options
context:
space:
mode:
authorAravinda VK <avishwan@redhat.com>2017-11-30 12:52:30 +0530
committerKotresh HR <khiremat@redhat.com>2018-01-23 03:03:01 +0000
commit7c9b62cfff34d1ac4c8fa0822b18e51c15e6db81 (patch)
treebe9bab79cdb0762e644063f24f524cd2b5487308 /geo-replication/syncdaemon/monitor.py
parent8efa3ee675a991410c6aa27dce40d4dd441d0935 (diff)
geo-rep: Support for using Volinfo from Conf file
Once Geo-replication is started, it runs Gluster commands to get Volume info from Master and Slave. With this patch, Georep can get Volume info from Conf file if `--use-gconf-volinfo` argument is specified to monitor Create a config(Or add to the config if exists) with following fields [vars] master-bricks=NODEID:HOSTNAME:PATH,.. slave-bricks=NODEID:HOSTNAME,.. master-volume-id= slave-volume-id= master-replica-count= master-disperse_count= Note: Exising Geo-replication is not affected since this is activated only when `--use-gconf-volinfo` is passed while spawning `gsyncd monitor` Tiering support is not yet added since Tiering + Glusterd2 is still under discussion. Fixes: #396 Change-Id: I281baccbad03686c00f6488a8511dd6db0edc57a Signed-off-by: Aravinda VK <avishwan@redhat.com>
Diffstat (limited to 'geo-replication/syncdaemon/monitor.py')
-rw-r--r--geo-replication/syncdaemon/monitor.py68
1 files changed, 18 insertions, 50 deletions
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index a193b57caff..257d34a743b 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -14,7 +14,6 @@ import time
import signal
import logging
import xml.etree.ElementTree as XET
-from subprocess import PIPE
from threading import Lock
from errno import ECHILD, ESRCH
import random
@@ -23,9 +22,9 @@ from resource import SSH
import gsyncdconfig as gconf
from rconf import rconf
from syncdutils import select, waitpid, errno_wrap, lf, grabpidfile
-from syncdutils import set_term_handler, is_host_local, GsyncdError
-from syncdutils import Thread, finalize, Popen, Volinfo
-from syncdutils import gf_event, EVENT_GEOREP_FAULTY
+from syncdutils import set_term_handler, GsyncdError
+from syncdutils import Thread, finalize, Volinfo, VolinfoFromGconf
+from syncdutils import gf_event, EVENT_GEOREP_FAULTY, get_up_nodes
from gsyncdstatus import GeorepStatus, set_monitor_status
@@ -54,43 +53,6 @@ def get_subvol_num(brick_idx, vol, hot):
return str(cnt)
-def get_slave_bricks_status(host, vol):
- po = Popen(['gluster', '--xml', '--remote-host=' + host,
- 'volume', 'status', vol, "detail"],
- stdout=PIPE, stderr=PIPE)
- vix = po.stdout.read()
- po.wait()
- po.terminate_geterr(fail_on_err=False)
- if po.returncode != 0:
- logging.info(lf("Volume status command failed, unable to get "
- "list of up nodes, returning empty list",
- volume=vol,
- error=po.returncode))
- return []
- vi = XET.fromstring(vix)
- if vi.find('opRet').text != '0':
- logging.info(lf("Unable to get list of up nodes, "
- "returning empty list",
- volume=vol,
- error=vi.find('opErrstr').text))
- return []
-
- up_hosts = set()
-
- try:
- for el in vi.findall('volStatus/volumes/volume/node'):
- if el.find('status').text == '1':
- up_hosts.add((el.find('hostname').text,
- el.find('peerid').text))
- except (ParseError, AttributeError, ValueError) as e:
- logging.info(lf("Parsing failed to get list of up nodes, "
- "returning empty list",
- volume=vol,
- error=e))
-
- return list(up_hosts)
-
-
class Monitor(object):
"""class which spawns and manages gsyncd workers"""
@@ -116,7 +78,7 @@ class Monitor(object):
errno_wrap(os.kill, [-os.getpid(), signal.SIGTERM], [ESRCH])
def monitor(self, w, argv, cpids, agents, slave_vol, slave_host, master,
- suuid):
+ suuid, slavenodes):
"""the monitor loop
Basic logic is a blantantly simple blunt heuristics:
@@ -180,8 +142,7 @@ class Monitor(object):
# If the connected slave node is down then try to connect to
# different up node.
current_slave_host = remote_host
- slave_up_hosts = get_slave_bricks_status(
- slave_host, slave_vol)
+ slave_up_hosts = get_up_nodes(slavenodes, gconf.get("ssh-port"))
if (current_slave_host, remote_id) not in slave_up_hosts:
if len(slave_up_hosts) > 0:
@@ -354,7 +315,7 @@ class Monitor(object):
self.status[w[0]['dir']].set_worker_status(self.ST_INCON)
return ret
- def multiplex(self, wspx, suuid, slave_vol, slave_host, master):
+ def multiplex(self, wspx, suuid, slave_vol, slave_host, master, slavenodes):
argv = [os.path.basename(sys.executable), sys.argv[0]]
cpids = set()
@@ -363,7 +324,7 @@ class Monitor(object):
for wx in wspx:
def wmon(w):
cpid, _ = self.monitor(w, argv, cpids, agents, slave_vol,
- slave_host, master, suuid)
+ slave_host, master, suuid, slavenodes)
time.sleep(1)
self.lock.acquire()
for cpid in cpids:
@@ -380,7 +341,10 @@ class Monitor(object):
def distribute(master, slave):
- mvol = Volinfo(master.volume, master.host)
+ if rconf.args.use_gconf_volinfo:
+ mvol = VolinfoFromGconf(master.volume, master=True)
+ else:
+ mvol = Volinfo(master.volume, master.host)
logging.debug('master bricks: ' + repr(mvol.bricks))
prelude = []
slave_host = None
@@ -393,7 +357,11 @@ def distribute(master, slave):
logging.debug('slave SSH gateway: ' + slave.remote_addr)
- svol = Volinfo(slave.volume, "localhost", prelude)
+ if rconf.args.use_gconf_volinfo:
+ svol = VolinfoFromGconf(slave.volume, master=False)
+ else:
+ svol = Volinfo(slave.volume, "localhost", prelude)
+
sbricks = svol.bricks
suuid = svol.uuid
slave_host = slave.remote_addr.split('@')[-1]
@@ -415,14 +383,14 @@ def distribute(master, slave):
workerspex = []
for idx, brick in enumerate(mvol.bricks):
- if is_host_local(brick['uuid']):
+ if rconf.args.local_node_id == brick['uuid']:
is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']]))
workerspex.append((brick,
slaves[idx % len(slaves)],
get_subvol_num(idx, mvol, is_hot),
is_hot))
logging.debug('worker specs: ' + repr(workerspex))
- return workerspex, suuid, slave_vol, slave_host, master
+ return workerspex, suuid, slave_vol, slave_host, master, slavenodes
def monitor(local, remote):