diff options
Diffstat (limited to 'tests/functional/afr')
9 files changed, 1589 insertions, 29 deletions
diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py index d2b43bfe3..bbefe0cff 100644 --- a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py +++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,7 +58,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): for volume_config in cls.volume_configs: ret = setup_volume(mnode=cls.mnode, all_servers_info=cls.all_servers_info, - volume_config=volume_config) + volume_config=volume_config, multi_vol=True) volname = volume_config['name'] if not ret: raise ExecutionError("Failed to setup Volume" diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py new file mode 100644 index 000000000..37bd2ec52 --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py @@ -0,0 +1,600 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete, + enable_granular_heal, + disable_granular_heal) +from glustolibs.gluster.lib_utils import (add_user, del_user, group_del, + group_add, collect_bricks_arequal) +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + self.user_group_created = False + + # If test case running is test_self_heal_meta_data + # create user and group + test_name_splitted = self.id().split('.') + test_id = test_name_splitted[len(test_name_splitted) - 1] + if test_id == 'test_self_heal_meta_data': + + # Create non-root group + if not group_add(self.first_client, 'qa_all'): + raise ExecutionError("Failed to create group qa_all") + + # Create non-root users + self.users = ('qa_func', 'qa_system', 'qa_perf') + for user in self.users: + if not add_user(self.first_client, user, group='qa_all'): + raise ExecutionError("Failed to create user {}" + .format(user)) + + self.user_group_created = True + g.log.info("Successfully created all users.") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users and group if created + if self.user_group_created: + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + g.log.info("Successfully deleted all users") + + # Delete non-root group + group_del(self.first_client, 'qa_all') + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _set_granular_heal_to_on_or_off(self, enabled=False): + """Set granular heal to ON or OFF""" + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if enabled: + if granular['cluster.granular-entry-heal'] != 'on': + ret = enable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + else: + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to off") + + def _run_cmd(self, io_cmd, err_msg): + """Run cmd and show error message if it fails""" + cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, err_msg) + + def _create_files_and_dirs_on_mount_point(self, index, second_set=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_self_heal on mount point + if not second_set: + ret = mkdir(self.first_client, '{}/{}'.format( + self.mountpoint, 'test_self_heal')) + self.assertTrue(ret, "Failed to create dir test_self_heal") + + # Create dirctories and files inside directory test_self_heal + io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random" + " of=file.$i count=1K bs=$i; done", + + "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;" + " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j" + ";done;done", + + "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq " + "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;" + " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k" + " bs=1k count=$k; done; done; done;", + + "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq" + " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j " + "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i" + " count=1K bs=$i; done ;") + self._run_cmd( + io_cmd[index], "Failed to create dirs and files inside") + + def _delete_files_and_dirs(self): + """Delete files and dirs from mount point""" + io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done") + self._run_cmd(io_cmd, "Failed to delete dirs and files") + + def _rename_files_and_dirs(self): + """Rename files and dirs from mount point""" + io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;" + " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j " + "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i " + "renamed_dir.$i; done;") + self._run_cmd(io_cmd, "Failed to rename dirs and files") + + def _change_meta_deta_of_dirs_and_files(self): + """Change meta data of dirs and files""" + cmds = ( + # Change permission + "for i in `seq 1 100`; do chmod 555 dir.$i; done; " + "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 " + "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for " + "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;", + + # Change ownership + "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; " + "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; " + "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;", + + # Change group + "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, + "Failed to change meta data on dirs and files") + g.log.info("Successfully changed meta data on dirs and files") + + def _verify_meta_data_of_files_and_dirs(self): + """Verify meta data of files and dirs""" + cmds = ( + # Verify permissions + "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";" + " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do " + "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]" + "; then exit 1; fi; done; done; for i in `seq 51 100`; do " + "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then " + "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j" + " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;" + "done;", + + # Verify ownership + "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;", + + # Verify group + "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, "Meta data of dirs and files not proper") + + def _set_and_remove_extended_attributes(self, remove=False): + """Set and remove extended attributes""" + # Command to set extended attribute to files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 " + "5`;do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_files dir.$i/file.$j; done; " + "done;") + err_msg = "Failed to set extended attributes to files and dirs" + if remove: + # Command to remove extended attribute set on files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name " + "dir.$i; for j in `seq 1 5`; do setfattr -x " + "trusted.name dir.$i/file.$j ; done ; done ;") + err_msg = "Failed to remove extended attributes to files and dirs" + + self._run_cmd(io_cmd, err_msg) + + def _verify_if_extended_attributes_are_proper(self, remove=False): + """Verify if extended attributes are set or remove properly""" + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text " + "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? " + "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do " + "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F " + "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then " + "exit 1; fi; done; done;") + err_msg = "Extended attributes on files and dirs are not proper" + if remove: + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e " + "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in" + " `seq 1 5`; do getfattr -n trusted.name -e text " + "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; " + "done;") + err_msg = "Extended attributes set to files and dirs not removed" + self._run_cmd(io_cmd, err_msg) + + def _remove_files_and_create_dirs_with_the_same_name(self): + """Remove files and create dirs with the same name""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir " + "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;") + self._run_cmd(io_cmd, + "Failed to remove files and create dirs with same name") + + def _verify_if_dirs_are_proper_or_not(self): + """Verify if dirs are proper or not""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | " + "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; " + "done; done; done;") + self._run_cmd(io_cmd, "Dirs created instead of files aren't proper") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/test_self_heal'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_heal_status_restart_vol_wait_and_check_data(self): + """ + Perform repatative steps mentioned below: + 1 Check if heal info is showing all the files and dirs to be healed + 2 Bring back all brick processes which were killed + 3 Wait for heal to complete on the volume + 4 Check if heal is complete and check if volume is in split brain + 5 Collect and compare arequal-checksum according to the volume type + for bricks + """ + # Check if heal info is showing all the files and dirs to be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + def _run_test_self_heal_entry_heal(self): + """Run steps of test_self_heal_entry_heal""" + # Create a directory and create files and directories inside it on + # mount point + self._create_files_and_dirs_on_mount_point(0) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create a new set of files and directories on mount point + self._create_files_and_dirs_on_mount_point(3, second_set=True) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Delete files and directories from mount point + self._delete_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Rename the existing files and dirs + self._rename_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + def test_self_heal_entry_heal(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create a new set of files and directories on mount point. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Bring down brick processes accoding to the volume type. + 12. Delete files and directories from mount point. + 13. Check if heal info is showing all the files and dirs to be healed. + 14. Bring back all brick processes which were killed. + 15. Wait for heal to complete on the volume. + 16. Check if heal is complete and check if volume is in split brain. + 17. Collect and compare arequal-checksum according to the volume type + for bricks. + 18. Bring down brick processes accoding to the volume type. + 19. Rename the existing files and dirs. + 20. Check if heal info is showing all the files and dirs to be healed. + 21. Bring back all brick processes which were killed. + 22. Wait for heal to complete on the volume. + 23. Check if heal is complete and check if volume is in split brain. + 24. Collect and compare arequal-checksum according to the volume type + for bricks. + + Note: + Do this test with both Granular-entry-heal set enable and disable. + """ + for value in (False, True): + if value: + # Cleanup old data from mount point + ret, _, _ = g.run(self.first_client, + 'rm -rf {}/*'.format(self.mountpoint)) + self.assertFalse(ret, 'Failed to cleanup mount point') + g.log.info("Testing with granular heal set to enabled") + self._set_granular_heal_to_on_or_off(enabled=value) + self._run_test_self_heal_entry_heal() + + def test_self_heal_meta_data(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Change the meta data of files and dirs. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if the meta data of files and dirs. + 12. Bring down brick processes accoding to the volume type. + 13. Set extended attributes on the files and dirs. + 14. Verify if the extended attributes are set properly or not. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring back all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify if extended attributes are consitent or not. + 21. Bring down brick processes accoding to the volume type + 22. Remove extended attributes on the files and dirs. + 23. Verify if extended attributes were removed properly. + 24. Check if heal info is showing all the files and dirs to be healed. + 25. Bring back all brick processes which were killed. + 26. Wait for heal to complete on the volume. + 27. Check if heal is complete and check if volume is in split brain. + 28. Collect and compare arequal-checksum according to the volume type + for bricks. + 29. Verify if extended attributes are removed or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(1) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Change the meta data of files and dirs + self._change_meta_deta_of_dirs_and_files() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if the meta data of files and dirs + self._verify_meta_data_of_files_and_dirs() + + for value in (False, True): + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Set or remove extended attributes on the files and dirs + self._set_and_remove_extended_attributes(remove=value) + + # Verify if the extended attributes are set properly or not + self._verify_if_extended_attributes_are_proper(remove=value) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if extended attributes are consitent or not + self._verify_if_extended_attributes_are_proper(remove=value) + + def test_self_heal_of_dir_with_files_removed(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Remove all files and create dir which have name of files. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if dirs are healed properly or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(2) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Remove all files and create dir which have name of files + self._remove_files_and_create_dirs_with_the_same_name() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if dirs are healed properly or not + self._verify_if_dirs_are_proper_or_not() diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py index 43b4f4edf..a449e396f 100644 --- a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py +++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,12 +26,14 @@ from glustolibs.gluster.brick_libs import ( select_volume_bricks_to_bring_offline, get_online_bricks_list) from glustolibs.gluster.heal_libs import ( get_self_heal_daemon_pid, is_shd_daemonized, - monitor_heal_completion, bring_self_heal_daemon_process_offline) + monitor_heal_completion, bring_self_heal_daemon_process_offline, + disable_granular_heal) from glustolibs.gluster.heal_ops import (get_heal_info_summary, trigger_heal_full) from glustolibs.io.utils import validate_io_procs from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) from glustolibs.gluster.mount_ops import mount_volume, umount_volume @@ -99,6 +101,15 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): * heal should complete successfully """ # pylint: disable=too-many-locals,too-many-statements,too-many-lines + + # Disable granular heal if not disabled already + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + # Setting Volume options options = {"metadata-self-heal": "on", "entry-self-heal": "on", @@ -131,7 +142,7 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): all_mounts_procs, num_files_to_write = [], 100 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_files " - "-f %s --base-file-name file %s" % (self.script_upload_path, + "-f %d --base-file-name file %s" % (self.script_upload_path, num_files_to_write, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, @@ -221,8 +232,8 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s read %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;" + " done " % (mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_add_brick_followed_by_remove_brick.py b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py new file mode 100644 index 000000000..a653b792d --- /dev/null +++ b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py @@ -0,0 +1,170 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.dht_test_utils import is_layout_complete +from glustolibs.gluster.glusterfile import (file_exists, + occurences_of_pattern_in_file) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume, shrink_volume +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['replicated'], ['glusterfs']]) +class TestAddBrickFollowedByRemoveBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + cls.first_client = cls.mounts[0].client_system + cls.mountpoint = cls.mounts[0].mountpoint + cls.is_io_running = False + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not file_exists(cls.first_client, cls.script_upload_path): + if not upload_scripts(cls.first_client, cls.script_upload_path): + raise ExecutionError( + "Failed to upload IO scripts to client %s" + % cls.first_client) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if self.is_io_running: + if not wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[0]]): + raise ExecutionError("IO failed on some of the clients") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_layout_of_bricks(self): + """Check the layout of bricks""" + ret = is_layout_complete(self.mnode, self.volname, "/") + self.assertTrue(ret, ("Volume %s: Layout is not complete", + self.volname)) + g.log.info("Volume %s: Layout is complete", self.volname) + + def _add_brick_and_wait_for_rebalance_to_complete(self): + """Add brick and wait for rebalance to complete""" + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + self._check_layout_of_bricks() + + def _remove_brick_from_volume(self): + """Remove bricks from volume""" + # Remove bricks from the volume + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=2000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + def test_add_brick_followed_by_remove_brick(self): + """ + Test case: + 1. Create a volume, start it and mount it to a client. + 2. Start I/O on volume. + 3. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 1x3 should now be 2x3) + 4. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 2x3 should now be 3x3) + 5. Remove brick from volume such that it becomes a 2x3. + 6. Remove brick from volume such that it becomes a 1x3. + 7. Wait for I/O to complete and check for any input/output errors in + both client and rebalance logs. + """ + # Start I/O on mount point + self.all_mounts_procs = [] + cmd = ("/usr/bin/env python {} create_deep_dirs_with_files " + "--dirname-start-num {} --dir-depth 5 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 {}" + .format(self.script_upload_path, 10, self.mountpoint)) + proc = g.run_async(self.first_client, cmd) + self.all_mounts_procs.append(proc) + self.is_io_running = True + + # Convert 1x3 to 2x3 and then convert 2x3 to 3x3 + for _ in range(0, 2): + self._add_brick_and_wait_for_rebalance_to_complete() + + # Convert 3x3 to 2x3 and then convert 2x3 to 1x3 + for _ in range(0, 2): + self._remove_brick_from_volume() + + # Validate I/O processes running on the nodes + ret = validate_io_procs(self.all_mounts_procs, [self.mounts[0]]) + self.is_io_running = False + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO on all mounts: Complete") + + # Check for Input/output errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "Input/output error", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + + # Check for Input/output errors in client logs + ret = occurences_of_pattern_in_file( + self.first_client, "Input/output error", + "/var/log/glusterfs/mnt-{}_{}.log".format(self.volname, + self.mount_type)) + self.assertEqual(ret, 0, + "[Input/output error] present in client log file") + g.log.info("Expanding and shrinking volume successful and no I/O " + "errors see in rebalance and client logs") diff --git a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py index ad6f336a5..1acd11faa 100644 --- a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py +++ b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -112,17 +112,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 files from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s create_files -f 5 " - "--base-file-name test_file --fixed-file-size 1k %s" % ( - self.script_upload_path, - mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 1k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a file from mount point") @@ -149,17 +148,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 new files of same name from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s create_files -f 5 " - "--base-file-name test_file --fixed-file-size 10k %s" % ( - self.script_upload_path, - mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 10k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a new file of same name " @@ -225,10 +223,11 @@ class TestSelfHeal(GlusterBaseClass): fpath = (self.mounts[0].mountpoint + '/test_file' + str(fcount) + '.txt') status = get_fattr(self.mounts[0].client_system, - fpath, 'replica.split-brain-status') + fpath, 'replica.split-brain-status', + encode="text") compare_string = ("The file is not under data or metadata " "split-brain") - self.assertEqual(status.rstrip('\x00'), compare_string, + self.assertEqual(status, compare_string, "file test_file%s is under" " split-brain" % str(fcount)) g.log.info("none of the files are under split-brain") diff --git a/tests/functional/afr/test_default_granular_entry_heal.py b/tests/functional/afr/test_default_granular_entry_heal.py new file mode 100644 index 000000000..91ca25907 --- /dev/null +++ b/tests/functional/afr/test_default_granular_entry_heal.py @@ -0,0 +1,235 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated', + 'arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestDefaultGranularEntryHeal(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(choice(subvol)) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'mydir')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/mydir'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def test_default_granular_entry_heal(self): + """ + Test case: + 1. Create a cluster. + 2. Create volume start it and mount it. + 3. Check if cluster.granular-entry-heal is ON by default or not. + 4. Check /var/lib/glusterd/<volname>/info for + cluster.granular-entry-heal=on. + 5. Check if option granular-entry-heal is present in the + volume graph or not. + 6. Kill one or two bricks of the volume depending on volume type. + 7. Create all types of files on the volume like text files, hidden + files, link files, dirs, char device, block device and so on. + 8. Bring back the killed brick by restarting the volume. + 9. Wait for heal to complete. + 10. Check arequal-checksum of all the bricks and see if it's proper or + not. + """ + # Check if cluster.granular-entry-heal is ON by default or not + ret = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + self.assertEqual(ret['cluster.granular-entry-heal'], 'on', + "Value of cluster.granular-entry-heal not on " + "by default") + + # Check var/lib/glusterd/<volname>/info for + # cluster.granular-entry-heal=on + ret = occurences_of_pattern_in_file(self.mnode, + 'cluster.granular-entry-heal=on', + '/var/lib/glusterd/vols/{}/info' + .format(self.volname)) + self.assertEqual(ret, 1, "Failed get cluster.granular-entry-heal=on in" + " info file") + + # Check if option granular-entry-heal is present in the + # volume graph or not + ret = occurences_of_pattern_in_file(self.first_client, + 'option granular-entry-heal on', + "/var/log/glusterfs/mnt-{}_{}.log" + .format(self.volname, + self.mount_type)) + self.assertTrue(ret > 0, + "Failed to find granular-entry-heal in volume graph") + g.log.info("granular-entry-heal properly set to ON by default") + + # Kill one or two bricks of the volume depending on volume type + self._bring_bricks_offline() + + # Create all types of files on the volume like text files, hidden + # files, link files, dirs, char device, block device and so on + cmd = ("cd {};mkdir mydir;cd mydir;mkdir dir;mkdir .hiddendir;" + "touch file;touch .hiddenfile;mknod blockfile b 1 5;" + "mknod charfile b 1 5; mkfifo pipefile;touch fileforhardlink;" + "touch fileforsoftlink;ln fileforhardlink hardlinkfile;" + "ln -s fileforsoftlink softlinkfile".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files of all types") + + # Bring back the killed brick by restarting the volume Bricks should + # be online again + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check arequal-checksum of all the bricks and see if it's proper or + # not + self._check_arequal_checksum_for_the_volume() diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py new file mode 100644 index 000000000..d5b6d5d43 --- /dev/null +++ b/tests/functional/afr/test_self_heal_with_expand_volume.py @@ -0,0 +1,221 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import (set_file_permissions, + occurences_of_pattern_in_file) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.lib_utils import (add_user, del_user) +from glustolibs.gluster.volume_libs import (get_subvols, expand_volume) + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestHealWithExpandVolume(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Create non-root users + self.users = ('qa_user', 'qa_admin') + for user in self.users: + if not add_user(self.first_client, user): + raise ExecutionError("Failed to create non-root user {}" + .format(user)) + g.log.info("Successfully created non-root users") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + ret, _, _ = g.run(self.first_client, + "rm -rf /home/{}".format(user)) + if ret: + raise ExecutionError("Failed to remove home dir of " + "non-root user") + g.log.info("Successfully deleted all users") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + self.bricks_to_bring_offline.append(choice(subvols[0])) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_if_there_are_files_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _expand_volume_and_wait_for_rebalance_to_complete(self): + """Expand volume and wait for rebalance to complete""" + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=6000) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + def test_self_heal_and_add_brick_with_data_from_diff_users(self): + """ + Test case: + 1. Created a 2X3 volume. + 2. Mount the volume using FUSE and give 777 permissions to the mount. + 3. Added a new user. + 4. Login as new user and created 100 files from the new user: + for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done + 5. Kill a brick which is part of the volume. + 6. On the mount, login as root user and create 1000 files: + for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done + 7. On the mount, login as new user, and copy existing data to + the mount. + 8. Start volume using force. + 9. While heal is in progress, add-brick and start rebalance. + 10. Wait for rebalance and heal to complete, + 11. Check for MSGID: 108008 errors in rebalance logs. + """ + # Change permissions of mount point to 777 + ret = set_file_permissions(self.first_client, self.mountpoint, + '-R 777') + self.assertTrue(ret, "Unable to change mount point permissions") + g.log.info("Mount point permissions set to 777") + + # Create 100 files from non-root user + cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom " + "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0], + self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files from non-root user") + + # Kill one brick which is part of the volume + self._bring_bricks_offline() + + # Create 1000 files from root user + cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i" + " bs=10M count=1;done" % self.mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to creare files from root user") + + # On the mount, login as new user, and copy existing data to + # the mount + cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/" + "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;" + "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to copy files from non-root user") + + # Check if there are files to be healed + self._check_if_there_are_files_to_be_healed() + + # Start the vol using force + self._restart_volume_and_bring_all_offline_bricks_online() + + # Add bricks to volume and wait for heal to complete + self._expand_volume_and_wait_for_rebalance_to_complete() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check for MSGID: 108008 errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "MSGID: 108008", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + g.log.info("Expanding volume successful and no MSGID: 108008 " + "errors see in rebalance logs") diff --git a/tests/functional/afr/test_split_brain_with_hard_link_file.py b/tests/functional/afr/test_split_brain_with_hard_link_file.py new file mode 100644 index 000000000..a8248fb72 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_hard_link_file.py @@ -0,0 +1,175 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals, unused-variable +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion, + is_heal_complete) + +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Override Volumes + if cls.volume_type == "distributed-replicated": + # Define x3 distributed-replicated volume + cls.volume['voltype'] = { + 'type': 'distributed-replicated', + 'dist_count': 2, + 'replica_count': 3, + 'transport': 'tcp'} + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + @classmethod + def tearDownClass(cls): + + # Cleanup Volume + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + g.log.info("Successful in cleaning up Volume %s", cls.volname) + + cls.get_super_method(cls, 'tearDownClass')() + + def _test_brick_down_with_file_rename(self, pfile, rfile, brick): + # Bring brick offline + g.log.info('Bringing brick %s offline', brick) + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Failed to bring brick %s offline' + % brick) + + ret = are_bricks_offline(self.mnode, self.volname, + [brick]) + self.assertTrue(ret, 'Brick %s is not offline' + % brick) + g.log.info('Bringing brick %s offline is successful', + brick) + + # Rename file + cmd = ("mv %s/%s %s/%s" + % (self.mounts[0].mountpoint, pfile, + self.mounts[0].mountpoint, rfile)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "rename of file failed") + + # Bring brick back online + g.log.info('Bringing brick %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, + brick) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick) + g.log.info('Bringing brick %s online is successful', brick) + + def test_afr_heal_with_brickdown_hardlink(self): + """ + Steps: + 1. Create 2 * 3 distribute replicate volume and disable all heals + 2. Create a file and 3 hardlinks to it from fuse mount. + 3. Kill brick4, rename HLINK1 to an appropriate name so that + it gets hashed to replicate-1 + 4. Likewise rename HLINK3 and HLINK7 as well, killing brick5 and brick6 + respectively each time. i.e. a different brick of the 2nd + replica is down each time. + 5. Now enable shd and let selfheals complete. + 6. Heal should complete without split-brains. + """ + bricks_list = get_all_bricks(self.mnode, self.volname) + options = {"metadata-self-heal": "off", + "entry-self-heal": "off", + "data-self-heal": "off", + "self-heal-daemon": "off"} + g.log.info("setting options %s", options) + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for" + "volume %s" % (options, self.volname))) + g.log.info("Successfully set %s for volume %s", options, self.volname) + + cmd = ("touch %s/FILE" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "file creation failed") + + # Creating a hardlink for the file created + for i in range(1, 4): + ret = create_link_file(self.clients[0], + '{}/FILE'.format(self.mounts[0].mountpoint), + '{}/HLINK{}'.format + (self.mounts[0].mountpoint, i)) + self.assertTrue(ret, "Unable to create hard link file ") + + # Bring brick3 offline,Rename file HLINK1,and bring back brick3 online + self._test_brick_down_with_file_rename("HLINK1", "NEW-HLINK1", + bricks_list[3]) + + # Bring brick4 offline,Rename file HLINK2,and bring back brick4 online + self._test_brick_down_with_file_rename("HLINK2", "NEW-HLINK2", + bricks_list[4]) + + # Bring brick5 offline,Rename file HLINK3,and bring back brick5 online + self._test_brick_down_with_file_rename("HLINK3", "NEW-HLINK3", + bricks_list[5]) + + # Setting options + options = {"self-heal-daemon": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Check data on mount point + cmd = ("ls %s" % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "failed to fetch data from mount point") diff --git a/tests/functional/afr/test_split_brain_with_node_reboot.py b/tests/functional/afr/test_split_brain_with_node_reboot.py new file mode 100644 index 000000000..9b630ba75 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_node_reboot.py @@ -0,0 +1,149 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +from unittest import SkipTest +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.io.utils import (run_linux_untar, run_crefi, + wait_for_io_to_complete) + + +@runs_on([['replicated', 'distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for availability of atleast 3 clients + if len(cls.clients) < 3: + raise SkipTest("This test requires atleast 3 clients") + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + cls.list_of_io_processes = [] + cls.is_io_running = False + + def tearDown(self): + + # If I/O processes are running wait from them to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + self.mounts): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_afr_node_reboot_self_heal(self): + """ + Steps: + 1. Create *3 replica volume + 2. Mount the volume on 3 clients + 3. Run following workload from clients + Client 1: Linux Untars + Client 2: Lookups ls + Client 3: Lookups du + 4. Create a directory on mount point + 5. Create deep dirs and file in the directory created at step 4 + 6. Perform node reboot + 7. Check for heal status + 8. Reboot another node + 9. Check for heal status + """ + + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar from client 1 + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Run lookup operation ls from client 2 + cmd = ("cd {}; for i in `seq 1 1000000`;do du -sh; done" + .format(self.mounts[1].mountpoint)) + ret = g.run_async(self.mounts[1].client_system, cmd) + self.list_of_io_processes += [ret] + + # Run lookup operation du from client 3 + cmd = ("cd {}; for i in `seq 1 1000000`;do ls -laRt; done" + .format(self.mounts[2].mountpoint)) + ret = g.run_async(self.mounts[2].client_system, cmd) + self.list_of_io_processes += [ret] + + # Create a dir to start crefi tool + self.linux_untar_dir = "{}/{}".format(self.mounts[3].mountpoint, + "crefi") + ret = mkdir(self.clients[3], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir for crefi") + + # Create deep dirs and files on mount point from client 4 + list_of_fops = ("create", "rename", "chmod", "chown", "chgrp", + "hardlink", "truncate", "setxattr") + for fops in list_of_fops: + ret = run_crefi(self.clients[3], + self.linux_untar_dir, 10, 3, 3, thread=4, + random_size=True, fop=fops, minfs=0, + maxfs=102400, multi=True, random_filename=True) + self.assertTrue(ret, "crefi failed during {}".format(fops)) + g.log.info("crefi PASSED FOR fop %s", fops) + g.log.info("IOs were successful using crefi") + + for server_num in (1, 2): + # Perform node reboot for servers + g.log.info("Rebooting %s", self.servers[server_num]) + ret = g.run_async(self.servers[server_num], "reboot") + self.assertTrue(ret, 'Failed to reboot node') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') |