diff options
Diffstat (limited to 'tests/functional/afr')
-rw-r--r-- | tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py b/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py new file mode 100644 index 000000000..4a69d5700 --- /dev/null +++ b/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py @@ -0,0 +1,263 @@ +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.heal_ops import trigger_heal_full +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain, + is_shd_daemonized) +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online, + get_subvols) +from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (list_all_files_and_dirs_mounts, + wait_for_io_to_complete, + validate_io_procs, + collect_mounts_arequal) + + +@runs_on([['distributed-replicated', 'replicated'], + ['glusterfs', 'nfs', 'cifs']]) +class TestAFRSelfHeal(GlusterBaseClass): + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + + # Checking if failure occure before I/O was complete + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + g.log.info("Listing all files and directories is successful") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_replace_brick_self_heal_io_in_progress(self): + """ + - Create directory on mount point and write files/dirs + - Create another set of files (1K files) + - While creation of files/dirs are in progress Kill one brick + - Remove the contents of the killed brick(simulating disk replacement) + - When the IO's are still in progress, restart glusterd on the nodes + where we simulated disk replacement to bring back bricks online + - Start volume heal + - Wait for IO's to complete + - Verify whether the files are self-healed + - Calculate arequals of the mount point and all the bricks + """ + # pylint: disable=too-many-locals,too-many-statements,too-many-branches + # Create dirs with files + g.log.info('Creating dirs with file...') + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -n 2 -f 10 %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + ret, _, err = g.run(self.mounts[0].client_system, command, + user=self.mounts[0].user) + self.assertFalse(ret, err) + g.log.info("IO is successful") + + # Creating another set of files (1K files) + self.all_mounts_procs = [] + + # Create dirs with files + g.log.info('Creating 1K files...') + command = ("/usr/bin/env python %s create_files " + "-f 1500 --fixed-file-size 10k %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts[0]) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Select bricks to bring offline + bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( + self.mnode, self.volname)) + bricks_to_bring_offline = list(filter(None, ( + bricks_to_bring_offline_dict['hot_tier_bricks'] + + bricks_to_bring_offline_dict['cold_tier_bricks'] + + bricks_to_bring_offline_dict['volume_bricks']))) + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Remove the content of the killed bricks + for brick in bricks_to_bring_offline: + brick_node, brick_path = brick.split(':') + + # Removing files + command = ('cd %s ; rm -rf *' % brick_path) + ret, _, err = g.run(brick_node, command) + self.assertFalse(ret, err) + g.log.info('Files are deleted on brick %s', brick) + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Wait for self-heal-daemons to be online + ret = is_shd_daemonized(self.all_servers) + self.assertTrue(ret, "Either No self heal daemon process found") + g.log.info("All self-heal daemons are online") + + # Start healing + ret = trigger_heal_full(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Check arequals for "replicated" + all_bricks = get_all_bricks(self.mnode, self.volname) + if self.volume_type == "replicated": + + # Get arequal after bricks are online + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after successfully bringing' + 'bricks online.') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Get arequal on bricks and compare with mount_point_total + ret, arequals = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(mount_point_total, brick_total, + 'Arequals for mountpoint and brick ' + 'are not equal') + g.log.info('Arequals for mountpoint and brick are equal') + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + g.log.info("Number of subvolumes in volume %s:", num_subvols) + + # Get arequals and compare + for i in range(0, num_subvols): + + # Get arequal for first brick + subvol_brick_list = subvols_dict['volume_subvols'][i] + ret, arequal = collect_bricks_arequal(subvol_brick_list[0]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + first_brick_total = arequal[0].splitlines()[-1].split(':')[-1] + + # Get arequal for every brick and compare with first brick + ret, arequals = collect_bricks_arequal(subvol_brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(first_brick_total, brick_total, + 'Arequals for subvol and brick are ' + 'not equal') + g.log.info('Arequals for subvol and brick are equal') |