summaryrefslogtreecommitdiffstats
path: root/tests/functional/afr/heal/test_self_heal_daemon_process.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional/afr/heal/test_self_heal_daemon_process.py')
-rwxr-xr-x[-rw-r--r--]tests/functional/afr/heal/test_self_heal_daemon_process.py234
1 files changed, 4 insertions, 230 deletions
diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py
index 15cd43951..b66ae25d2 100644..100755
--- a/tests/functional/afr/heal/test_self_heal_daemon_process.py
+++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py
@@ -32,15 +32,12 @@ from glustolibs.gluster.rebalance_ops import (rebalance_start,
rebalance_status)
from glustolibs.gluster.brick_libs import (
get_all_bricks, bring_bricks_offline, bring_bricks_online,
- are_bricks_online, select_bricks_to_bring_offline, are_bricks_offline,
- select_volume_bricks_to_bring_offline, get_online_bricks_list)
+ are_bricks_online, select_bricks_to_bring_offline, are_bricks_offline)
from glustolibs.gluster.brick_ops import replace_brick
from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
do_bricks_exist_in_shd_volfile,
is_shd_daemonized,
- are_all_self_heal_daemons_are_online,
- monitor_heal_completion)
-from glustolibs.gluster.heal_ops import get_heal_info_summary
+ are_all_self_heal_daemons_are_online)
from glustolibs.gluster.volume_ops import (volume_stop, volume_start,
get_volume_list)
from glustolibs.gluster.gluster_init import restart_glusterd
@@ -48,6 +45,8 @@ from glustolibs.io.utils import validate_io_procs
from glustolibs.misc.misc_libs import upload_scripts
+# pylint: disable=too-many-lines
+
@runs_on([['replicated', 'distributed-replicated', 'dispersed',
'distributed-dispersed'], ['glusterfs', 'nfs', 'cifs']])
class SelfHealDaemonProcessTests(GlusterBaseClass):
@@ -815,231 +814,6 @@ class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass):
g.log.info("Successfully parsed %s file", self.glustershd)
-@runs_on([['replicated', 'distributed-replicated'],
- ['glusterfs', 'nfs', 'cifs']])
-class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass):
- """
- SelfHealDaemonProcessTestsWithHealing contains tests which verifies the
- self-heal daemon process with healing.
- """
- @classmethod
- def setUpClass(cls):
- """
- setup volume, mount volume and initialize necessary variables
- which is used in tests
- """
-
- # calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
-
- # Setup Volume and Mount Volume
- g.log.info("Starting to Setup Volume and Mount Volume")
- ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts)
- if not ret:
- raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
- g.log.info("Successful in Setup Volume and Mount Volume")
-
- # Verfiy glustershd process releases its parent process
- g.log.info("Verifying Self Heal Daemon process is daemonized")
- ret = is_shd_daemonized(cls.servers)
- if not ret:
- raise ExecutionError("Self Heal Daemon process was still"
- " holding parent process.")
- g.log.info("Self Heal Daemon processes are online")
-
- # upload script
- script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py"
- cls.script_upload_path = "/usr/share/glustolibs/io/scripts/" \
- "file_dir_ops.py"
-
- ret = upload_scripts(cls.clients, script_abs_path)
- if not ret:
- raise ExecutionError("Failed to upload IO scripts to clients")
-
- cls.GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol"
-
- @classmethod
- def tearDownClass(cls):
- """
- Clean up the volume and umount volume from client
- """
-
- # stopping the volume
- g.log.info("Starting to Unmount Volume and Cleanup Volume")
- ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts)
- if not ret:
- raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
- g.log.info("Successful in Unmount Volume and Cleanup Volume")
-
- # calling GlusterBaseClass tearDownClass
- GlusterBaseClass.tearDownClass.im_func(cls)
-
- def test_existing_glustershd_should_take_care_of_self_healing(self):
- """
- Test Script which verifies that the existing glustershd should take
- care of self healing
-
- * Create and start the Replicate volume
- * Check the glustershd processes - Note the pids
- * Bring down the One brick ( lets say brick1) without affecting
- the cluster
- * Create 5000 files on volume
- * bring the brick1 up which was killed in previous steps
- * check the heal info - proactive self healing should start
- * Bring down brick1 again
- * wait for 60 sec and brought up the brick1
- * Check the glustershd processes - pids should be different
- * Monitor the heal till its complete
-
- """
- # pylint: disable=too-many-locals,too-many-lines,too-many-statements
- nodes = self.servers
-
- # check the self-heal daemon process
- g.log.info("Starting to get self-heal daemon process on "
- "nodes %s", nodes)
- ret, pids = get_self_heal_daemon_pid(nodes)
- self.assertTrue(ret, ("Either No self heal daemon process found or "
- "more than One self heal daemon process "
- "found : %s" % pids))
- g.log.info("Successful in getting Single self heal daemon process"
- " on all nodes %s", nodes)
- glustershd_pids = pids
-
- # select the bricks to bring offline
- g.log.info("Selecting bricks to brought offline for volume %s",
- self.volname)
- bricks_to_bring_offline = \
- select_volume_bricks_to_bring_offline(self.mnode,
- self.volname)
- g.log.info("Brick List to bring offline : %s",
- bricks_to_bring_offline)
-
- # Bring down the selected bricks
- g.log.info("Going to bring down the brick process "
- "for %s", bricks_to_bring_offline)
- ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
- self.assertTrue(ret, ("Failed to bring down the bricks. Please "
- "check the log file for more details."))
- g.log.info("Brought down the brick process "
- "for %s successfully", bricks_to_bring_offline)
-
- # get the bricks which are running
- g.log.info("getting the brick list which are online")
- online_bricks = get_online_bricks_list(self.mnode, self.volname)
- g.log.info("Online Bricks for volume %s : %s",
- self.volname, online_bricks)
-
- # write 1MB files to the mounts
- g.log.info("Starting IO on all mounts...")
- g.log.info("mounts: %s", self.mounts)
- all_mounts_procs = []
- for mount_obj in self.mounts:
- cmd = "for i in `seq 1 5000`;do dd if=/dev/urandom " \
- "of=%s/file_$i bs=1M count=1;done" % mount_obj.mountpoint
- g.log.info(cmd)
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
- all_mounts_procs.append(proc)
-
- # Validate IO
- self.assertTrue(
- validate_io_procs(all_mounts_procs, self.mounts),
- "IO failed on some of the clients"
- )
-
- # check the heal info
- g.log.info("Get the pending heal info for the volume %s",
- self.volname)
- heal_info = get_heal_info_summary(self.mnode, self.volname)
- g.log.info("Successfully got heal info for the volume %s",
- self.volname)
- g.log.info("Heal Info for volume %s : %s", self.volname, heal_info)
-
- # Bring bricks online
- g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
- ret = bring_bricks_online(self.mnode, self.volname,
- bricks_to_bring_offline, 'glusterd_restart')
- self.assertTrue(ret, ("Failed to bring bricks: %s online"
- % bricks_to_bring_offline))
- g.log.info("Successfully brought all bricks: %s online",
- bricks_to_bring_offline)
-
- # Wait for 90 sec to start self healing
- time.sleep(90)
-
- # check the heal info
- g.log.info("Get the pending heal info for the volume %s",
- self.volname)
- heal_info_after_brick_online = get_heal_info_summary(self.mnode,
- self.volname)
- g.log.info("Successfully got heal info for the volume %s",
- self.volname)
- g.log.info("Heal Info for volume %s : %s",
- self.volname, heal_info_after_brick_online)
-
- # check heal pending is decreased
- flag = False
- for brick in online_bricks:
- if int(heal_info_after_brick_online[brick]['numberOfEntries'])\
- < int(heal_info[brick]['numberOfEntries']):
- flag = True
- break
-
- self.assertTrue(flag, ("Pro-active self heal is not started"))
- g.log.info("Pro-active self heal is started")
-
- # bring down bricks again
- g.log.info("Going to bring down the brick process "
- "for %s", bricks_to_bring_offline)
- ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
- self.assertTrue(ret, ("Failed to bring down the bricks. Please "
- "check the log file for more details."))
- g.log.info("Brought down the brick process "
- "for %s successfully", bricks_to_bring_offline)
-
- # wait for 60 sec and brought up the brick again
- time.sleep(60)
- g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
- ret = bring_bricks_online(self.mnode, self.volname,
- bricks_to_bring_offline, 'glusterd_restart')
- self.assertTrue(ret, ("Failed to bring bricks: %s online"
- % bricks_to_bring_offline))
- g.log.info("Successfully brought all bricks: %s online",
- bricks_to_bring_offline)
-
- # Verfiy glustershd process releases its parent process
- ret = is_shd_daemonized(nodes)
- self.assertTrue(ret, ("Either No self heal daemon process found or "
- "more than One self heal daemon process found"))
-
- # check the self-heal daemon process
- g.log.info("Starting to get self-heal daemon process on "
- "nodes %s", nodes)
- ret, pids = get_self_heal_daemon_pid(nodes)
- self.assertTrue(ret, ("Either No self heal daemon process found or "
- "more than One self heal daemon process "
- "found : %s" % pids))
- g.log.info("Successful in getting Single self heal daemon process"
- " on all nodes %s", nodes)
- glustershd_pids_after_bricks_online = pids
-
- # compare the glustershd pids
- self.assertNotEqual(glustershd_pids,
- glustershd_pids_after_bricks_online,
- ("self heal daemon process are same before and "
- "after bringing up bricks online"))
- g.log.info("EXPECTED : self heal daemon process are different before "
- "and after bringing up bricks online")
-
- # wait for heal to complete
- g.log.info("Monitoring the heal.....")
- ret = monitor_heal_completion(self.mnode, self.volname)
- self.assertTrue(ret, ("Heal is not completed on volume %s"
- % self.volname))
- g.log.info("Heal Completed on volume %s", self.volname)
-
-
class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass):
"""
SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which