summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--glustolibs-gluster/glustolibs/gluster/gluster_init.py55
-rw-r--r--glustolibs-gluster/glustolibs/gluster/heal_libs.py149
-rw-r--r--tests/functional/afr/__init__.py0
-rw-r--r--tests/functional/afr/test_self_heal_daemon_process.py299
4 files changed, 503 insertions, 0 deletions
diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_init.py b/glustolibs-gluster/glustolibs/gluster/gluster_init.py
index d45a186d0..5d08acd0e 100644
--- a/glustolibs-gluster/glustolibs/gluster/gluster_init.py
+++ b/glustolibs-gluster/glustolibs/gluster/gluster_init.py
@@ -167,3 +167,58 @@ def env_setup_servers(servers):
return False
return True
+
+
+def get_glusterd_pids(nodes):
+ """
+ Checks if glusterd process is running and
+ return the process id's in dictionary format
+
+ Args:
+ nodes ( str|list ) : Node/Nodes of the cluster
+
+ Returns:
+ tuple : Tuple containing two elements (ret, gluster_pids).
+ The first element 'ret' is of type 'bool', True if only if
+ glusterd is running on all the nodes in the list and each
+ node contains only one instance of glusterd running.
+ False otherwise.
+
+ The second element 'glusterd_pids' is of type dictonary and
+ it contains the process ID's for glusterd.
+
+ """
+ glusterd_pids = {}
+ _rc = True
+ if isinstance(nodes, str):
+ nodes = [nodes]
+
+ cmd = "pidof glusterd"
+ g.log.info("Executing cmd: %s on node %s" % (cmd, nodes))
+ results = g.run_parallel(nodes, cmd)
+ for node in results:
+ ret, out, err = results[node]
+ if ret == 0:
+ if len(out.strip().split("\n")) == 1:
+ if not out.strip():
+ g.log.error("NO glusterd process found "
+ "on node %s" % node)
+ _rc = False
+ glusterd_pids[node] = ['-1']
+ else:
+ g.log.info("glusterd process with "
+ "pid %s found on %s",
+ out.strip().split("\n"), node)
+ glusterd_pids[node] = (out.strip().split("\n"))
+ else:
+ g.log.error("More than One glusterd process "
+ "found on node %s" % node)
+ _rc = False
+ glusterd_pids[node] = out
+ else:
+ g.log.error("Not able to get glusterd process "
+ "from node %s" % node)
+ _rc = False
+ glusterd_pids[node] = ['-1']
+
+ return _rc, glusterd_pids
diff --git a/glustolibs-gluster/glustolibs/gluster/heal_libs.py b/glustolibs-gluster/glustolibs/gluster/heal_libs.py
index 5ccb4b2b4..3cd867c36 100644
--- a/glustolibs-gluster/glustolibs/gluster/heal_libs.py
+++ b/glustolibs-gluster/glustolibs/gluster/heal_libs.py
@@ -305,3 +305,152 @@ def wait_for_self_heal_daemons_to_be_online(mnode, volname, timeout=300):
g.log.info("All self-heal-daemons of the volume '%s' are online ",
volname)
return True
+
+
+def get_self_heal_daemon_pid(nodes):
+ """
+ Checks if self-heal daemon process is running and
+ return the process id's in dictionary format
+
+ Args:
+ nodes ( str|list ) : Node/Nodes of the cluster
+
+ Returns:
+ tuple : Tuple containing two elements (ret, glustershd_pids).
+ The first element 'ret' is of type 'bool', True if and only if
+ glustershd is running on all the nodes in the list and each
+ node contains only one instance of glustershd running.
+ False otherwise.
+
+ The second element 'glustershd_pids' is of type dictonary and it
+ contains the process ID's for glustershd
+ """
+ glustershd_pids = {}
+ _rc = True
+ if isinstance(nodes, str):
+ nodes = [nodes]
+ cmd = "pgrep -f glustershd"
+ g.log.info("Executing cmd: %s on node %s" % (cmd, nodes))
+ results = g.run_parallel(nodes, cmd)
+ for node in results:
+ ret, out, err = results[node]
+ if ret == 0:
+ if len(out.strip().split("\n")) == 1:
+ if not out.strip():
+ g.log.error("NO self heal daemon process found "
+ "on node %s" % node)
+ _rc = False
+ glustershd_pids[node] = [-1]
+ else:
+ g.log.info("Single Self Heal Daemon process with "
+ "pid %s found on %s",
+ out.strip().split("\n"), node)
+ glustershd_pids[node] = (out.strip().split("\n"))
+ else:
+ g.log.error("More than One self heal daemon process "
+ "found on node %s" % node)
+ _rc = False
+ glustershd_pids[node] = [-1]
+ else:
+ g.log.error("Not able to get self heal daemon process "
+ "from node %s" % node)
+ _rc = False
+ glustershd_pids[node] = [-1]
+
+ return _rc, glustershd_pids
+
+
+def do_bricks_exist_in_shd_volfile(mnode, volname, brick_list):
+ """
+ Checks whether the given brick list is present in glustershd
+ server volume file
+
+ Args:
+ mnode (str) : Node on which commands will be executed.
+ volname (str) : Name of the volume.
+ brick_list ( list ) : brick list of a volume which needs to
+ compare in glustershd server volume file
+
+ Returns:
+ bool : True if brick exists in glustershd server volume file.
+ False Otherwise
+ """
+ GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol"
+ brick_list_server_vol = []
+ volume_clients = "volume " + volname + "-client-"
+ host = brick = None
+ parse = False
+
+ # Establish connection to mnode
+ conn = g.rpyc_get_connection(mnode)
+ if conn is None:
+ g.log.info("Not able to establish connection to node %s" % mnode)
+ return False
+ try:
+ fd = conn.builtins.open(GLUSTERSHD)
+ for each_line in fd:
+ each_line = each_line.strip()
+ if volume_clients in each_line:
+ parse = True
+ elif "end-volume" in each_line:
+ parse = False
+ brick_list_server_vol.append("%s:%s" % (host, brick))
+ elif parse:
+ if "option remote-subvolume" in each_line:
+ brick = each_line.split(" ")[2]
+ if "option remote-host" in each_line:
+ host = each_line.split(" ")[2]
+
+ except IOError as e:
+ g.log.info("I/O error ({0}): {1}".format(e.errno, e.strerror))
+ return False
+
+ g.log.info("Brick List from volume info : %s" % brick_list)
+ g.log.info("Brick List from volume server "
+ "file : %s" % brick_list_server_vol)
+
+ if set(brick_list) != set(brick_list_server_vol):
+ return False
+ return True
+
+
+def is_shd_daemonized(nodes, timeout=120):
+ """
+ wait for the glustershd process to release parent process.
+
+ Args:
+ nodes ( str|list ) : Node/Nodes of the cluster
+
+ Kwargs:
+ timeout (int): timeout value in seconds to wait for self-heal-daemons
+ to be online.
+
+ Returns:
+ bool : True if glustershd releases its parent.
+ False Otherwise
+
+ """
+ counter = 0
+ flag = 0
+ if isinstance(nodes, str):
+ nodes = [nodes]
+ while counter < timeout:
+ ret, pids = get_self_heal_daemon_pid(nodes)
+ if not ret:
+ g.log.info("Retry after 3 sec to get self heal "
+ "daemon process....")
+ time.sleep(3)
+ counter = counter + 3
+ else:
+ flag = 1
+ break
+
+ if not flag:
+ g.log.error("Either No self heal daemon process found or more than"
+ "One self heal daemon process found even "
+ "after %d minutes", (timeout/60.0))
+ return False
+ else:
+ g.log.info("Single self heal daemon process on all nodes %s",
+ nodes)
+ return True
diff --git a/tests/functional/afr/__init__.py b/tests/functional/afr/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/functional/afr/__init__.py
diff --git a/tests/functional/afr/test_self_heal_daemon_process.py b/tests/functional/afr/test_self_heal_daemon_process.py
new file mode 100644
index 000000000..536b76b72
--- /dev/null
+++ b/tests/functional/afr/test_self_heal_daemon_process.py
@@ -0,0 +1,299 @@
+# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+""" Description:
+ Test Cases in this module tests the self heal daemon process.
+"""
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.volume_libs import (
+ expand_volume, shrink_volume, log_volume_info_and_status,
+ wait_for_volume_process_to_be_online)
+from glustolibs.gluster.rebalance_ops import (rebalance_start,
+ wait_for_rebalance_to_complete,
+ rebalance_status)
+from glustolibs.gluster.brick_libs import get_all_bricks
+from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
+ do_bricks_exist_in_shd_volfile,
+ is_shd_daemonized)
+
+
+@runs_on([['replicated', 'distributed-replicated', 'dispersed',
+ 'distributed-dispersed'], ['glusterfs', 'nfs', 'cifs']])
+class SelfHealDaemonProcessTests(GlusterBaseClass):
+ """
+ SelfHealDaemonProcessTests contains tests which verifies the
+ self-heal daemon process of the nodes
+ """
+ @classmethod
+ def setUpClass(cls):
+ """
+ setup volume, mount volume and initialize necessary variables
+ which is used in tests
+ """
+
+ # calling GlusterBaseClass setUpClass
+ GlusterBaseClass.setUpClass.im_func(cls)
+
+ # Setup Volume and Mount Volume
+ g.log.info("Starting to Setup Volume and Mount Volume")
+ ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ # Verfiy glustershd process releases its parent process
+ ret = is_shd_daemonized(cls.servers)
+ if not ret:
+ raise ExecutionError("Self Heal Daemon process was still"
+ " holding parent process.")
+ g.log.info("Self Heal Daemon processes are online")
+
+ cls.GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol"
+
+ def setUp(self):
+ """
+ setUp method for every test
+ """
+
+ # calling GlusterBaseClass setUp
+ GlusterBaseClass.setUp.im_func(self)
+
+ def tearDown(self):
+ """
+ tearDown for every test
+ """
+
+ # Calling GlusterBaseClass tearDown
+ GlusterBaseClass.tearDown.im_func(self)
+
+ @classmethod
+ def tearDownClass(cls):
+ """
+ Clean up the volume and umount volume from client
+ """
+
+ # stopping the volume
+ g.log.info("Starting to Unmount Volume and Cleanup Volume")
+ ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
+ g.log.info("Successful in Unmount Volume and Cleanup Volume")
+
+ # calling GlusterBaseClass tearDownClass
+ GlusterBaseClass.tearDownClass.im_func(cls)
+
+ def test_glustershd_with_add_remove_brick(self):
+ """
+ Test script to verify glustershd process with adding and
+ removing bricks
+
+ * check glustershd process - only 1 glustershd process should
+ be running
+ * bricks must be present in glustershd-server.vol file for
+ the replicated involved volumes
+ * Add bricks
+ * check glustershd process - only 1 glustershd process should
+ be running and its should be different from previous one
+ * bricks which are added must present in glustershd-server.vol file
+ * remove bricks
+ * check glustershd process - only 1 glustershd process should
+ be running and its different from previous one
+ * bricks which are removed should not present
+ in glustershd-server.vol file
+
+ """
+
+ nodes = self.volume['servers']
+ bricks_list = []
+ glustershd_pids = {}
+
+ # check the self-heal daemon process
+ g.log.info("Starting to get self-heal daemon process on "
+ "nodes %s" % nodes)
+ ret, pids = get_self_heal_daemon_pid(nodes)
+ self.assertTrue(ret, ("Either No self heal daemon process found or "
+ "more than One self heal daemon process "
+ "found : %s" % pids))
+ g.log.info("Successful in getting Single self heal daemon process"
+ " on all nodes %s", nodes)
+ glustershd_pids = pids
+
+ # get the bricks for the volume
+ g.log.info("Fetching bricks for the volume : %s" % self.volname)
+ bricks_list = get_all_bricks(self.mnode, self.volname)
+ g.log.info("Brick List : %s" % bricks_list)
+
+ # validate the bricks present in volume info with
+ # glustershd server volume file
+ g.log.info("Starting parsing file %s on "
+ "node %s" % (self.GLUSTERSHD, self.mnode))
+ ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
+ bricks_list)
+ self.assertTrue(ret, ("Brick List from volume info is different "
+ "from glustershd server volume file. "
+ "Please check log file for details"))
+ g.log.info("Successfully parsed %s file" % self.GLUSTERSHD)
+
+ # expanding volume
+ g.log.info("Start adding bricks to volume %s" % self.volname)
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info)
+ self.assertTrue(ret, ("Failed to add bricks to "
+ "volume %s " % self.volname))
+ g.log.info("Add brick successfull")
+
+ # Log Volume Info and Status after expanding the volume
+ g.log.info("Logging volume info and Status after expanding volume")
+ ret = log_volume_info_and_status(self.mnode, self.volname)
+ self.assertTrue(ret, ("Logging volume info and status failed "
+ "on volume %s", self.volname))
+ g.log.info("Successful in logging volume info and status "
+ "of volume %s", self.volname)
+
+ # Verify volume's all process are online for 60 sec
+ g.log.info("Verifying volume's all process are online")
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
+ 60)
+ self.assertTrue(ret, ("Volume %s : All process are not "
+ "online", self.volname))
+ g.log.info("Successfully Verified volume %s processes are online",
+ self.volname)
+
+ # Start Rebalance
+ g.log.info("Starting Rebalance on the volume")
+ ret, out, err = rebalance_start(self.mnode, self.volname)
+ self.assertEqual(ret, 0, ("Failed to start rebalance on "
+ "the volume %s with error %s" %
+ (self.volname, err)))
+ g.log.info("Successfully started rebalance on the "
+ "volume %s", self.volname)
+
+ # Log Rebalance status
+ g.log.info("Log Rebalance status")
+ _, _, _ = rebalance_status(self.mnode, self.volname)
+
+ # Wait for rebalance to complete
+ g.log.info("Waiting for rebalance to complete")
+ ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
+ self.assertTrue(ret, ("Rebalance is not yet complete "
+ "on the volume %s", self.volname))
+ g.log.info("Rebalance is successfully complete on "
+ "the volume %s", self.volname)
+
+ # Check Rebalance status after rebalance is complete
+ g.log.info("Checking Rebalance status")
+ ret, _, _ = rebalance_status(self.mnode, self.volname)
+ self.assertEqual(ret, 0, ("Failed to get rebalance status for "
+ "the volume %s", self.volname))
+ g.log.info("Successfully got rebalance status of the "
+ "volume %s", self.volname)
+
+ # Check the self-heal daemon process after adding bricks
+ g.log.info("Starting to get self-heal daemon process on "
+ "nodes %s" % nodes)
+ glustershd_pids_after_expanding = {}
+ ret, pids = get_self_heal_daemon_pid(nodes)
+ self.assertTrue(ret, ("Either No self heal daemon process found or "
+ "more than One self heal daemon process found"))
+ g.log.info("Successfull in getting self-heal daemon process "
+ "on nodes %s" % nodes)
+
+ glustershd_pids_after_expanding = pids
+ g.log.info("Self Heal Daemon Process ID's afetr expanding "
+ "volume: %s" % glustershd_pids_after_expanding)
+
+ self.assertNotEqual(glustershd_pids,
+ glustershd_pids_after_expanding,
+ "Self Daemon process is same before and"
+ " after adding bricks")
+ g.log.info("Self Heal Daemon Process is different before and "
+ "after adding bricks")
+
+ # get the bricks for the volume after expanding
+ bricks_list_after_expanding = get_all_bricks(self.mnode, self.volname)
+ g.log.info("Brick List after expanding "
+ "volume: %s" % bricks_list_after_expanding)
+
+ # validate the bricks present in volume info
+ # with glustershd server volume file after adding bricks
+ g.log.info("Starting parsing file %s" % self.GLUSTERSHD)
+ ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
+ bricks_list_after_expanding)
+
+ self.assertTrue(ret, ("Brick List from volume info is different "
+ "from glustershd server volume file after "
+ "expanding bricks. Please check log file "
+ "for details"))
+ g.log.info("Successfully parsed %s file" % self.GLUSTERSHD)
+
+ # shrink the volume
+ g.log.info("Starting volume shrink")
+ ret = shrink_volume(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to shrink the volume on "
+ "volume %s", self.volname))
+ g.log.info("Shrinking volume is successful on "
+ "volume %s", self.volname)
+
+ # Log Volume Info and Status after shrinking the volume
+ g.log.info("Logging volume info and Status after shrinking volume")
+ ret = log_volume_info_and_status(self.mnode, self.volname)
+ self.assertTrue(ret, ("Logging volume info and status failed on "
+ "volume %s", self.volname))
+ g.log.info("Successful in logging volume info and status "
+ "of volume %s", self.volname)
+
+ # get the bricks after shrinking the volume
+ bricks_list_after_shrinking = get_all_bricks(self.mnode, self.volname)
+ g.log.info("Brick List after shrinking "
+ "volume: %s" % bricks_list_after_shrinking)
+
+ self.assertEqual(len(bricks_list_after_shrinking), len(bricks_list),
+ "Brick Count is mismatched after "
+ "shrinking the volume %s" % self.volname)
+ g.log.info("Brick Count matched before before expanding "
+ "and after shrinking volume")
+
+ # Verfiy glustershd process releases its parent process
+ ret = is_shd_daemonized(nodes)
+ self.assertTrue(ret, ("Either No self heal daemon process found or "
+ "more than One self heal daemon process found"))
+
+ # check the self-heal daemon process after removing bricks
+ g.log.info("Starting to get self-heal daemon process "
+ "on nodes %s" % nodes)
+ glustershd_pids_after_shrinking = {}
+ ret, pids = get_self_heal_daemon_pid(nodes)
+ glustershd_pids_after_shrinking = pids
+ self.assertNotEqual(glustershd_pids_after_expanding,
+ glustershd_pids_after_shrinking,
+ "Self Heal Daemon process is same "
+ "after adding bricks and shrinking volume")
+ g.log.info("Self Heal Daemon Process is different after adding bricks "
+ "and shrinking volume")
+
+ # validate bricks present in volume info
+ # with glustershd server volume file after removing bricks
+ g.log.info("Starting parsing file %s" % self.GLUSTERSHD)
+ ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
+ bricks_list_after_shrinking)
+ self.assertTrue(ret, ("Brick List from volume info is different "
+ "from glustershd server volume file after "
+ "removing bricks. Please check log file "
+ "for details"))
+ g.log.info("Successfully parsed %s file" % self.GLUSTERSHD)