From ef3548c2a20c8f752574e230332e6613a0995bb6 Mon Sep 17 00:00:00 2001 From: Vitalii Koriakov Date: Mon, 3 Dec 2018 14:53:41 +0200 Subject: Moved test_existing_glustershd_should_take_care_of_self_healing to separate folder Change-Id: I1fb4497ac915c7a93f223ef4e6946eeb4dcd0e90 Signed-off-by: Vitalii Koriakov --- ...xisting_shd_should_take_care_of_self_healing.py | 252 +++++++++++++++++++++ .../afr/heal/test_self_heal_daemon_process.py | 234 +------------------ 2 files changed, 256 insertions(+), 230 deletions(-) create mode 100755 tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py mode change 100644 => 100755 tests/functional/afr/heal/test_self_heal_daemon_process.py diff --git a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py new file mode 100755 index 000000000..64f5254a5 --- /dev/null +++ b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py @@ -0,0 +1,252 @@ +# Copyright (C) 2016-2018 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test Cases in this module tests the self heal daemon process. +""" + +import time +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, bring_bricks_online, + select_volume_bricks_to_bring_offline, get_online_bricks_list) +from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, + is_shd_daemonized, + monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.heal_ops import get_heal_info_summary +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['replicated', 'distributed-replicated'], + ['glusterfs', 'cifs', 'nfs']]) +class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithHealing contains tests which verifies the + self-heal daemon process with healing. + """ + def setUp(self): + """ + setup volume, mount volume and initialize necessary variables + which is used in tests + """ + # calling GlusterBaseClass setUpClass + GlusterBaseClass.setUp.im_func(self) + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + # Verfiy glustershd process releases its parent process + g.log.info("Verifying Self Heal Daemon process is daemonized") + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + # stopping the volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # calling GlusterBaseClass tearDownClass + GlusterBaseClass.tearDownClass.im_func(self) + + def test_existing_glustershd_should_take_care_of_self_healing(self): + """ + Test Script which verifies that the existing glustershd should take + care of self healing + + * Create and start the Replicate volume + * Check the glustershd processes - Note the pids + * Bring down the One brick ( lets say brick1) without affecting + the cluster + * Create 1000 files on volume + * bring the brick1 up which was killed in previous steps + * check the heal info - proactive self healing should start + * Bring down brick1 again + * wait for 60 sec and brought up the brick1 + * Check the glustershd processes - pids should be different + * Monitor the heal till its complete + + """ + # pylint: disable=too-many-locals,too-many-lines,too-many-statements + nodes = self.servers + + # check the self-heal daemon process + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in getting Single self heal daemon process" + " on all nodes %s", nodes) + glustershd_pids = pids + + # select the bricks to bring offline + g.log.info("Selecting bricks to brought offline for volume %s", + self.volname) + bricks_to_bring_offline = \ + select_volume_bricks_to_bring_offline(self.mnode, + self.volname) + g.log.info("Brick List to bring offline : %s", + bricks_to_bring_offline) + + # Bring down the selected bricks + g.log.info("Going to bring down the brick process " + "for %s", bricks_to_bring_offline) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, ("Failed to bring down the bricks. Please " + "check the log file for more details.")) + g.log.info("Brought down the brick process " + "for %s successfully", bricks_to_bring_offline) + + # get the bricks which are running + g.log.info("getting the brick list which are online") + online_bricks = get_online_bricks_list(self.mnode, self.volname) + g.log.info("Online Bricks for volume %s : %s", + self.volname, online_bricks) + + # write 1MB files to the mounts + g.log.info("Starting IO on all mounts...") + g.log.info("mounts: %s", self.mounts) + all_mounts_procs = [] + cmd = ("for i in `seq 1 1000`; " + "do dd if=/dev/urandom of=%s/file_$i " + "bs=1M count=1; " + "done" + % self.mounts[0].mountpoint) + g.log.info(cmd) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + + # Validate IO + self.assertTrue( + validate_io_procs(all_mounts_procs, self.mounts), + "IO failed on some of the clients" + ) + + # check the heal info + g.log.info("Get the pending heal info for the volume %s", + self.volname) + heal_info = get_heal_info_summary(self.mnode, self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + g.log.info("Heal Info for volume %s : %s", self.volname, heal_info) + + # Bring bricks online + g.log.info("Bring bricks: %s online", bricks_to_bring_offline) + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline, 'glusterd_restart') + self.assertTrue(ret, ("Failed to bring bricks: %s online" + % bricks_to_bring_offline)) + g.log.info("Successfully brought all bricks: %s online", + bricks_to_bring_offline) + + # Wait for 90 sec to start self healing + g.log.info('Waiting for 90 sec to start self healing') + time.sleep(90) + + # check the heal info + g.log.info("Get the pending heal info for the volume %s", + self.volname) + heal_info_after_brick_online = get_heal_info_summary(self.mnode, + self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + g.log.info("Heal Info for volume %s : %s", + self.volname, heal_info_after_brick_online) + + # check heal pending is decreased + flag = False + for brick in online_bricks: + if int(heal_info_after_brick_online[brick]['numberOfEntries'])\ + < int(heal_info[brick]['numberOfEntries']): + flag = True + break + + self.assertTrue(flag, "Pro-active self heal is not started") + g.log.info("Pro-active self heal is started") + + # bring down bricks again + g.log.info("Going to bring down the brick process " + "for %s", bricks_to_bring_offline) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, ("Failed to bring down the bricks. Please " + "check the log file for more details.")) + g.log.info("Brought down the brick process " + "for %s successfully", bricks_to_bring_offline) + + # wait for 60 sec and brought up the brick again + g.log.info('waiting for 60 sec and brought up the brick again') + time.sleep(60) + g.log.info("Bring bricks: %s online", bricks_to_bring_offline) + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline, 'glusterd_restart') + self.assertTrue(ret, ("Failed to bring bricks: %s online" + % bricks_to_bring_offline)) + g.log.info("Successfully brought all bricks: %s online", + bricks_to_bring_offline) + + # Verfiy glustershd process releases its parent process + ret = is_shd_daemonized(nodes) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process found")) + + # check the self-heal daemon process + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in getting Single self heal daemon process" + " on all nodes %s", nodes) + shd_pids_after_bricks_online = pids + + # compare the glustershd pids + self.assertNotEqual(glustershd_pids, + shd_pids_after_bricks_online, + ("self heal daemon process are same before and " + "after bringing up bricks online")) + g.log.info("EXPECTED : self heal daemon process are different before " + "and after bringing up bricks online") + + # wait for heal to complete + g.log.info("Monitoring the heal.....") + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, ("Heal is not completed on volume %s" + % self.volname)) + g.log.info("Heal Completed on volume %s", self.volname) + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py old mode 100644 new mode 100755 index 15cd43951..b66ae25d2 --- a/tests/functional/afr/heal/test_self_heal_daemon_process.py +++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py @@ -32,15 +32,12 @@ from glustolibs.gluster.rebalance_ops import (rebalance_start, rebalance_status) from glustolibs.gluster.brick_libs import ( get_all_bricks, bring_bricks_offline, bring_bricks_online, - are_bricks_online, select_bricks_to_bring_offline, are_bricks_offline, - select_volume_bricks_to_bring_offline, get_online_bricks_list) + are_bricks_online, select_bricks_to_bring_offline, are_bricks_offline) from glustolibs.gluster.brick_ops import replace_brick from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, do_bricks_exist_in_shd_volfile, is_shd_daemonized, - are_all_self_heal_daemons_are_online, - monitor_heal_completion) -from glustolibs.gluster.heal_ops import get_heal_info_summary + are_all_self_heal_daemons_are_online) from glustolibs.gluster.volume_ops import (volume_stop, volume_start, get_volume_list) from glustolibs.gluster.gluster_init import restart_glusterd @@ -48,6 +45,8 @@ from glustolibs.io.utils import validate_io_procs from glustolibs.misc.misc_libs import upload_scripts +# pylint: disable=too-many-lines + @runs_on([['replicated', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['glusterfs', 'nfs', 'cifs']]) class SelfHealDaemonProcessTests(GlusterBaseClass): @@ -815,231 +814,6 @@ class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass): g.log.info("Successfully parsed %s file", self.glustershd) -@runs_on([['replicated', 'distributed-replicated'], - ['glusterfs', 'nfs', 'cifs']]) -class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass): - """ - SelfHealDaemonProcessTestsWithHealing contains tests which verifies the - self-heal daemon process with healing. - """ - @classmethod - def setUpClass(cls): - """ - setup volume, mount volume and initialize necessary variables - which is used in tests - """ - - # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts) - if not ret: - raise ExecutionError("Failed to Setup_Volume and Mount_Volume") - g.log.info("Successful in Setup Volume and Mount Volume") - - # Verfiy glustershd process releases its parent process - g.log.info("Verifying Self Heal Daemon process is daemonized") - ret = is_shd_daemonized(cls.servers) - if not ret: - raise ExecutionError("Self Heal Daemon process was still" - " holding parent process.") - g.log.info("Self Heal Daemon processes are online") - - # upload script - script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py" - cls.script_upload_path = "/usr/share/glustolibs/io/scripts/" \ - "file_dir_ops.py" - - ret = upload_scripts(cls.clients, script_abs_path) - if not ret: - raise ExecutionError("Failed to upload IO scripts to clients") - - cls.GLUSTERSHD = "/var/lib/glusterd/glustershd/glustershd-server.vol" - - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ - - # stopping the volume - g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) - if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") - - # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDownClass.im_func(cls) - - def test_existing_glustershd_should_take_care_of_self_healing(self): - """ - Test Script which verifies that the existing glustershd should take - care of self healing - - * Create and start the Replicate volume - * Check the glustershd processes - Note the pids - * Bring down the One brick ( lets say brick1) without affecting - the cluster - * Create 5000 files on volume - * bring the brick1 up which was killed in previous steps - * check the heal info - proactive self healing should start - * Bring down brick1 again - * wait for 60 sec and brought up the brick1 - * Check the glustershd processes - pids should be different - * Monitor the heal till its complete - - """ - # pylint: disable=too-many-locals,too-many-lines,too-many-statements - nodes = self.servers - - # check the self-heal daemon process - g.log.info("Starting to get self-heal daemon process on " - "nodes %s", nodes) - ret, pids = get_self_heal_daemon_pid(nodes) - self.assertTrue(ret, ("Either No self heal daemon process found or " - "more than One self heal daemon process " - "found : %s" % pids)) - g.log.info("Successful in getting Single self heal daemon process" - " on all nodes %s", nodes) - glustershd_pids = pids - - # select the bricks to bring offline - g.log.info("Selecting bricks to brought offline for volume %s", - self.volname) - bricks_to_bring_offline = \ - select_volume_bricks_to_bring_offline(self.mnode, - self.volname) - g.log.info("Brick List to bring offline : %s", - bricks_to_bring_offline) - - # Bring down the selected bricks - g.log.info("Going to bring down the brick process " - "for %s", bricks_to_bring_offline) - ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) - self.assertTrue(ret, ("Failed to bring down the bricks. Please " - "check the log file for more details.")) - g.log.info("Brought down the brick process " - "for %s successfully", bricks_to_bring_offline) - - # get the bricks which are running - g.log.info("getting the brick list which are online") - online_bricks = get_online_bricks_list(self.mnode, self.volname) - g.log.info("Online Bricks for volume %s : %s", - self.volname, online_bricks) - - # write 1MB files to the mounts - g.log.info("Starting IO on all mounts...") - g.log.info("mounts: %s", self.mounts) - all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = "for i in `seq 1 5000`;do dd if=/dev/urandom " \ - "of=%s/file_$i bs=1M count=1;done" % mount_obj.mountpoint - g.log.info(cmd) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) - - # Validate IO - self.assertTrue( - validate_io_procs(all_mounts_procs, self.mounts), - "IO failed on some of the clients" - ) - - # check the heal info - g.log.info("Get the pending heal info for the volume %s", - self.volname) - heal_info = get_heal_info_summary(self.mnode, self.volname) - g.log.info("Successfully got heal info for the volume %s", - self.volname) - g.log.info("Heal Info for volume %s : %s", self.volname, heal_info) - - # Bring bricks online - g.log.info("Bring bricks: %s online", bricks_to_bring_offline) - ret = bring_bricks_online(self.mnode, self.volname, - bricks_to_bring_offline, 'glusterd_restart') - self.assertTrue(ret, ("Failed to bring bricks: %s online" - % bricks_to_bring_offline)) - g.log.info("Successfully brought all bricks: %s online", - bricks_to_bring_offline) - - # Wait for 90 sec to start self healing - time.sleep(90) - - # check the heal info - g.log.info("Get the pending heal info for the volume %s", - self.volname) - heal_info_after_brick_online = get_heal_info_summary(self.mnode, - self.volname) - g.log.info("Successfully got heal info for the volume %s", - self.volname) - g.log.info("Heal Info for volume %s : %s", - self.volname, heal_info_after_brick_online) - - # check heal pending is decreased - flag = False - for brick in online_bricks: - if int(heal_info_after_brick_online[brick]['numberOfEntries'])\ - < int(heal_info[brick]['numberOfEntries']): - flag = True - break - - self.assertTrue(flag, ("Pro-active self heal is not started")) - g.log.info("Pro-active self heal is started") - - # bring down bricks again - g.log.info("Going to bring down the brick process " - "for %s", bricks_to_bring_offline) - ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) - self.assertTrue(ret, ("Failed to bring down the bricks. Please " - "check the log file for more details.")) - g.log.info("Brought down the brick process " - "for %s successfully", bricks_to_bring_offline) - - # wait for 60 sec and brought up the brick again - time.sleep(60) - g.log.info("Bring bricks: %s online", bricks_to_bring_offline) - ret = bring_bricks_online(self.mnode, self.volname, - bricks_to_bring_offline, 'glusterd_restart') - self.assertTrue(ret, ("Failed to bring bricks: %s online" - % bricks_to_bring_offline)) - g.log.info("Successfully brought all bricks: %s online", - bricks_to_bring_offline) - - # Verfiy glustershd process releases its parent process - ret = is_shd_daemonized(nodes) - self.assertTrue(ret, ("Either No self heal daemon process found or " - "more than One self heal daemon process found")) - - # check the self-heal daemon process - g.log.info("Starting to get self-heal daemon process on " - "nodes %s", nodes) - ret, pids = get_self_heal_daemon_pid(nodes) - self.assertTrue(ret, ("Either No self heal daemon process found or " - "more than One self heal daemon process " - "found : %s" % pids)) - g.log.info("Successful in getting Single self heal daemon process" - " on all nodes %s", nodes) - glustershd_pids_after_bricks_online = pids - - # compare the glustershd pids - self.assertNotEqual(glustershd_pids, - glustershd_pids_after_bricks_online, - ("self heal daemon process are same before and " - "after bringing up bricks online")) - g.log.info("EXPECTED : self heal daemon process are different before " - "and after bringing up bricks online") - - # wait for heal to complete - g.log.info("Monitoring the heal.....") - ret = monitor_heal_completion(self.mnode, self.volname) - self.assertTrue(ret, ("Heal is not completed on volume %s" - % self.volname)) - g.log.info("Heal Completed on volume %s", self.volname) - - class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): """ SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which -- cgit