diff options
author | Vitalii Koriakov <vkoriako@redhat.com> | 2018-12-03 14:53:41 +0200 |
---|---|---|
committer | Vijay Avuthu <vavuthu@redhat.com> | 2018-12-06 09:27:05 +0000 |
commit | ef3548c2a20c8f752574e230332e6613a0995bb6 (patch) | |
tree | 3dd7fa4197cf86d08f94f633998b1c21549d4559 /tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py | |
parent | 7fb5a55f55ade4077cf37fc558308d766ade5f79 (diff) |
Moved test_existing_glustershd_should_take_care_of_self_healing to separate folder
Change-Id: I1fb4497ac915c7a93f223ef4e6946eeb4dcd0e90
Signed-off-by: Vitalii Koriakov <vkoriako@redhat.com>
Diffstat (limited to 'tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py')
-rwxr-xr-x | tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py new file mode 100755 index 000000000..64f5254a5 --- /dev/null +++ b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py @@ -0,0 +1,252 @@ +# Copyright (C) 2016-2018 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test Cases in this module tests the self heal daemon process. +""" + +import time +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, bring_bricks_online, + select_volume_bricks_to_bring_offline, get_online_bricks_list) +from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, + is_shd_daemonized, + monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.heal_ops import get_heal_info_summary +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['replicated', 'distributed-replicated'], + ['glusterfs', 'cifs', 'nfs']]) +class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithHealing contains tests which verifies the + self-heal daemon process with healing. + """ + def setUp(self): + """ + setup volume, mount volume and initialize necessary variables + which is used in tests + """ + # calling GlusterBaseClass setUpClass + GlusterBaseClass.setUp.im_func(self) + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + # Verfiy glustershd process releases its parent process + g.log.info("Verifying Self Heal Daemon process is daemonized") + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + # stopping the volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # calling GlusterBaseClass tearDownClass + GlusterBaseClass.tearDownClass.im_func(self) + + def test_existing_glustershd_should_take_care_of_self_healing(self): + """ + Test Script which verifies that the existing glustershd should take + care of self healing + + * Create and start the Replicate volume + * Check the glustershd processes - Note the pids + * Bring down the One brick ( lets say brick1) without affecting + the cluster + * Create 1000 files on volume + * bring the brick1 up which was killed in previous steps + * check the heal info - proactive self healing should start + * Bring down brick1 again + * wait for 60 sec and brought up the brick1 + * Check the glustershd processes - pids should be different + * Monitor the heal till its complete + + """ + # pylint: disable=too-many-locals,too-many-lines,too-many-statements + nodes = self.servers + + # check the self-heal daemon process + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in getting Single self heal daemon process" + " on all nodes %s", nodes) + glustershd_pids = pids + + # select the bricks to bring offline + g.log.info("Selecting bricks to brought offline for volume %s", + self.volname) + bricks_to_bring_offline = \ + select_volume_bricks_to_bring_offline(self.mnode, + self.volname) + g.log.info("Brick List to bring offline : %s", + bricks_to_bring_offline) + + # Bring down the selected bricks + g.log.info("Going to bring down the brick process " + "for %s", bricks_to_bring_offline) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, ("Failed to bring down the bricks. Please " + "check the log file for more details.")) + g.log.info("Brought down the brick process " + "for %s successfully", bricks_to_bring_offline) + + # get the bricks which are running + g.log.info("getting the brick list which are online") + online_bricks = get_online_bricks_list(self.mnode, self.volname) + g.log.info("Online Bricks for volume %s : %s", + self.volname, online_bricks) + + # write 1MB files to the mounts + g.log.info("Starting IO on all mounts...") + g.log.info("mounts: %s", self.mounts) + all_mounts_procs = [] + cmd = ("for i in `seq 1 1000`; " + "do dd if=/dev/urandom of=%s/file_$i " + "bs=1M count=1; " + "done" + % self.mounts[0].mountpoint) + g.log.info(cmd) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + + # Validate IO + self.assertTrue( + validate_io_procs(all_mounts_procs, self.mounts), + "IO failed on some of the clients" + ) + + # check the heal info + g.log.info("Get the pending heal info for the volume %s", + self.volname) + heal_info = get_heal_info_summary(self.mnode, self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + g.log.info("Heal Info for volume %s : %s", self.volname, heal_info) + + # Bring bricks online + g.log.info("Bring bricks: %s online", bricks_to_bring_offline) + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline, 'glusterd_restart') + self.assertTrue(ret, ("Failed to bring bricks: %s online" + % bricks_to_bring_offline)) + g.log.info("Successfully brought all bricks: %s online", + bricks_to_bring_offline) + + # Wait for 90 sec to start self healing + g.log.info('Waiting for 90 sec to start self healing') + time.sleep(90) + + # check the heal info + g.log.info("Get the pending heal info for the volume %s", + self.volname) + heal_info_after_brick_online = get_heal_info_summary(self.mnode, + self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + g.log.info("Heal Info for volume %s : %s", + self.volname, heal_info_after_brick_online) + + # check heal pending is decreased + flag = False + for brick in online_bricks: + if int(heal_info_after_brick_online[brick]['numberOfEntries'])\ + < int(heal_info[brick]['numberOfEntries']): + flag = True + break + + self.assertTrue(flag, "Pro-active self heal is not started") + g.log.info("Pro-active self heal is started") + + # bring down bricks again + g.log.info("Going to bring down the brick process " + "for %s", bricks_to_bring_offline) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, ("Failed to bring down the bricks. Please " + "check the log file for more details.")) + g.log.info("Brought down the brick process " + "for %s successfully", bricks_to_bring_offline) + + # wait for 60 sec and brought up the brick again + g.log.info('waiting for 60 sec and brought up the brick again') + time.sleep(60) + g.log.info("Bring bricks: %s online", bricks_to_bring_offline) + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline, 'glusterd_restart') + self.assertTrue(ret, ("Failed to bring bricks: %s online" + % bricks_to_bring_offline)) + g.log.info("Successfully brought all bricks: %s online", + bricks_to_bring_offline) + + # Verfiy glustershd process releases its parent process + ret = is_shd_daemonized(nodes) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process found")) + + # check the self-heal daemon process + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in getting Single self heal daemon process" + " on all nodes %s", nodes) + shd_pids_after_bricks_online = pids + + # compare the glustershd pids + self.assertNotEqual(glustershd_pids, + shd_pids_after_bricks_online, + ("self heal daemon process are same before and " + "after bringing up bricks online")) + g.log.info("EXPECTED : self heal daemon process are different before " + "and after bringing up bricks online") + + # wait for heal to complete + g.log.info("Monitoring the heal.....") + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, ("Heal is not completed on volume %s" + % self.volname)) + g.log.info("Heal Completed on volume %s", self.volname) + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') |