diff options
Diffstat (limited to 'tests')
52 files changed, 7152 insertions, 181 deletions
diff --git a/tests/functional/afr/heal/test_dir_time_stamp_restoration.py b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py new file mode 100644 index 000000000..6a4ef2a19 --- /dev/null +++ b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py @@ -0,0 +1,160 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Check if parent directory timestamps are restored after an entry heal. +""" +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + select_volume_bricks_to_bring_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.glusterdir import (mkdir, rmdir) +from glustolibs.gluster.glusterfile import (get_fattr, get_file_stat) +from glustolibs.gluster.volume_libs import set_volume_options +from glustolibs.gluster.heal_libs import monitor_heal_completion + + +@runs_on([['replicated'], + ['glusterfs']]) +class TestDirTimeStampRestore(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def are_mdata_xattrs_equal(self): + """Check if atime/mtime/ctime in glusterfs.mdata xattr are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + fattr = get_fattr(server, '%s/%s' % (brick, "dir1"), + 'trusted.glusterfs.mdata') + self.assertIsNotNone(fattr, 'Unable to get mdata xattr') + timestamps.append(fattr) + + g.log.debug("mdata list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def are_stat_timestamps_equal(self): + """Check if atime/mtime/ctime in stat info are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + stat_data = get_file_stat(server, "%s/dir1" % brick) + ts_string = "{}-{}-{}".format(stat_data['epoch_atime'], + stat_data['epoch_mtime'], + stat_data['epoch_ctime']) + timestamps.append(ts_string) + + g.log.debug("stat list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def perform_test(self, ctime): + """ + Testcase steps: + 1. Enable/disable features,ctime based on function argument. + 2. Create a directory on the mount point. + 3. Kill a brick and create a file inside the directory. + 4. Bring the brick online. + 5. Trigger heal and wait for its completion. + 6. Verify that the atime, mtime and ctime of the directory are same on + all bricks of the replica. + """ + if ctime: + option = {'features.ctime': 'on'} + else: + option = {'features.ctime': 'off'} + ret = set_volume_options(self.mnode, self.volname, option) + self.assertTrue(ret, 'failed to set option %s on %s' + % (option, self.volume)) + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + dirpath = '{}/dir1'.format(m_point) + ret = mkdir(client, dirpath) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + cmd = 'touch {}/file1'.format(dirpath) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Unable to create file from mount point') + + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + if ctime: + ret = self.are_mdata_xattrs_equal() + self.assertTrue(ret, "glusterfs.mdata mismatch for {}" + .format(dirpath)) + else: + ret = self.are_stat_timestamps_equal() + self.assertTrue(ret, "stat mismatch for {}".format(dirpath)) + + ret = rmdir(client, dirpath, force=True) + self.assertTrue(ret, 'Unable to delete directory from mount point') + + def test_dir_time_stamp_restoration(self): + """ + Create pending entry self-heal on a replica volume and verify that + after the heal is complete, the atime, mtime and ctime of the parent + directory are identical on all bricks of the replica. + + The test is run with features.ctime enabled as well as disabled. + """ + self.perform_test(ctime=True) + self.perform_test(ctime=False) diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py index d2b43bfe3..bbefe0cff 100644 --- a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py +++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,7 +58,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): for volume_config in cls.volume_configs: ret = setup_volume(mnode=cls.mnode, all_servers_info=cls.all_servers_info, - volume_config=volume_config) + volume_config=volume_config, multi_vol=True) volname = volume_config['name'] if not ret: raise ExecutionError("Failed to setup Volume" diff --git a/tests/functional/afr/heal/test_self_heal_with_link_files.py b/tests/functional/afr/heal/test_self_heal_with_link_files.py new file mode 100644 index 000000000..d029c3d9e --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_link_files.py @@ -0,0 +1,405 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import (get_subvols, + replace_brick_from_volume) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_files_and_dirs_on_mount_point(self, second_attempt=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_link_self_heal on mount point + if not second_attempt: + ret = mkdir(self.first_client, + '{}/{}'.format(self.mountpoint, + 'test_link_self_heal')) + self.assertTrue(ret, "Failed to create dir test_link_self_heal") + + # Create dirctories and files inside directory test_link_self_heal + io_cmd = ("for i in `seq 1 5`; do mkdir dir.$i; " + "for j in `seq 1 10`; do dd if=/dev/random " + "of=dir.$i/file.$j bs=1k count=$j; done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5` ; do for j in `seq 1 10`; " + "do dd if=/dev/random of=sym_link_dir.$i/" + "new_file.$j bs=1k count=$j; done; done ") + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create dirs and files inside") + + def _create_soft_links_to_directories(self): + """Create soft links to directories""" + cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s " + "dir.$i sym_link_dir.$i; done".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create soft links to dirs") + + def _verify_soft_links_to_dir(self, option=0): + """Verify soft links to dir""" + + cmd_list = [ + ("for i in `seq 1 5`; do stat -c %F sym_link_dir.$i | " + "grep -F 'symbolic link'; if [ $? -ne 0 ]; then exit 1;" + " fi ; done; for i in `seq 1 5` ; do readlink sym_link_dir.$i | " + "grep \"dir.$i\"; if [ $? -ne 0 ]; then exit 1; fi; done; "), + ("for i in `seq 1 5`; do for j in `seq 1 10`; do ls " + "dir.$i/new_file.$j; if [ $? -ne 0 ]; then exit 1; fi; done; " + "done")] + + # Generate command to check according to option + if option == 2: + verify_cmd = "".join(cmd_list) + else: + verify_cmd = cmd_list[option] + + cmd = ("cd {}/test_link_self_heal; {}".format(self.mountpoint, + verify_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Symlinks aren't proper") + + def _create_hard_links_to_files(self, second_attempt=False): + """Create hard links to files""" + io_cmd = ("for i in `seq 1 5`;do for j in `seq 1 10`;do ln " + "dir.$i/file.$j dir.$i/link_file.$j;done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5`; do mkdir new_dir.$i; for j in " + "`seq 1 10`; do ln dir.$i/file.$j new_dir.$i/new_file." + "$j;done; done;") + + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create hard links to files") + + def _verify_hard_links_to_files(self, second_set=False): + """Verify if hard links to files""" + file_to_compare = "dir.$i/link_file.$j" + if second_set: + file_to_compare = "new_dir.$i/new_file.$j" + + cmd = ("cd {}/test_link_self_heal;for i in `seq 1 5`; do for j in `seq" + " 1 10`;do if [ `stat -c %i dir.$i/file.$j` -ne `stat -c %i " + "{}` ];then exit 1; fi; done; done" + .format(self.mountpoint, file_to_compare)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to verify hard links to files") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal([self.mounts[0]]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + ret, arequals = collect_bricks_arequal([brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _replace_one_random_brick(self): + """Replace one random brick from the volume""" + brick = choice(get_all_bricks(self.mnode, self.volname)) + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, self.all_servers_info, + src_brick=brick) + self.assertTrue(ret, "Failed to replace brick %s " % brick) + g.log.info("Successfully replaced brick %s", brick) + + def test_self_heal_of_hard_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create hard links for the files created in step 2. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring brack all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if hard links are proper or not. + 12. Do a lookup on mount point. + 13. Bring down brick processes accoding to the volume type. + 14. Create a second set of hard links to the files. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring brack all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify both set of hard links are proper or not. + 21. Do a lookup on mount point. + 22. Pick a random brick and replace it. + 23. Wait for heal to complete on the volume. + 24. Check if heal is complete and check if volume is in split brain. + 25. Collect and compare arequal-checksum according to the volume type + for bricks. + 26. Verify both set of hard links are proper or not. + 27. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + for attempt in (False, True): + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create hardlinks for the files created in step 2 + self._create_hard_links_to_files(second_attempt=attempt) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + if attempt: + self._verify_hard_links_to_files(second_set=attempt) + + # Pick a random brick and replace it + self._replace_one_random_brick() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + self._verify_hard_links_to_files(second_set=True) + + def test_self_heal_of_soft_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create soft links for the dirs created in step 2. + 6. Verify if soft links are proper or not. + 7. Add files through the soft links. + 8. Verify if the soft links are proper or not. + 9. Check if heal info is showing all the files and dirs to be healed. + 10. Bring brack all brick processes which were killed. + 11. Wait for heal to complete on the volume. + 12. Check if heal is complete and check if volume is in split brain. + 13. Collect and compare arequal-checksum according to the volume type + for bricks. + 14. Verify if soft links are proper or not. + 15. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create soft links for the dirs created in step 2 + self._create_soft_links_to_directories() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir() + + # Add files through the soft links + self._create_files_and_dirs_on_mount_point(second_attempt=True) + + # Verify if the soft links are proper or not + self._verify_soft_links_to_dir(option=1) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir(option=2) diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py new file mode 100644 index 000000000..37bd2ec52 --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py @@ -0,0 +1,600 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete, + enable_granular_heal, + disable_granular_heal) +from glustolibs.gluster.lib_utils import (add_user, del_user, group_del, + group_add, collect_bricks_arequal) +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + self.user_group_created = False + + # If test case running is test_self_heal_meta_data + # create user and group + test_name_splitted = self.id().split('.') + test_id = test_name_splitted[len(test_name_splitted) - 1] + if test_id == 'test_self_heal_meta_data': + + # Create non-root group + if not group_add(self.first_client, 'qa_all'): + raise ExecutionError("Failed to create group qa_all") + + # Create non-root users + self.users = ('qa_func', 'qa_system', 'qa_perf') + for user in self.users: + if not add_user(self.first_client, user, group='qa_all'): + raise ExecutionError("Failed to create user {}" + .format(user)) + + self.user_group_created = True + g.log.info("Successfully created all users.") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users and group if created + if self.user_group_created: + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + g.log.info("Successfully deleted all users") + + # Delete non-root group + group_del(self.first_client, 'qa_all') + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _set_granular_heal_to_on_or_off(self, enabled=False): + """Set granular heal to ON or OFF""" + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if enabled: + if granular['cluster.granular-entry-heal'] != 'on': + ret = enable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + else: + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to off") + + def _run_cmd(self, io_cmd, err_msg): + """Run cmd and show error message if it fails""" + cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, err_msg) + + def _create_files_and_dirs_on_mount_point(self, index, second_set=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_self_heal on mount point + if not second_set: + ret = mkdir(self.first_client, '{}/{}'.format( + self.mountpoint, 'test_self_heal')) + self.assertTrue(ret, "Failed to create dir test_self_heal") + + # Create dirctories and files inside directory test_self_heal + io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random" + " of=file.$i count=1K bs=$i; done", + + "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;" + " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j" + ";done;done", + + "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq " + "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;" + " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k" + " bs=1k count=$k; done; done; done;", + + "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq" + " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j " + "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i" + " count=1K bs=$i; done ;") + self._run_cmd( + io_cmd[index], "Failed to create dirs and files inside") + + def _delete_files_and_dirs(self): + """Delete files and dirs from mount point""" + io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done") + self._run_cmd(io_cmd, "Failed to delete dirs and files") + + def _rename_files_and_dirs(self): + """Rename files and dirs from mount point""" + io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;" + " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j " + "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i " + "renamed_dir.$i; done;") + self._run_cmd(io_cmd, "Failed to rename dirs and files") + + def _change_meta_deta_of_dirs_and_files(self): + """Change meta data of dirs and files""" + cmds = ( + # Change permission + "for i in `seq 1 100`; do chmod 555 dir.$i; done; " + "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 " + "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for " + "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;", + + # Change ownership + "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; " + "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; " + "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;", + + # Change group + "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, + "Failed to change meta data on dirs and files") + g.log.info("Successfully changed meta data on dirs and files") + + def _verify_meta_data_of_files_and_dirs(self): + """Verify meta data of files and dirs""" + cmds = ( + # Verify permissions + "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";" + " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do " + "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]" + "; then exit 1; fi; done; done; for i in `seq 51 100`; do " + "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then " + "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j" + " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;" + "done;", + + # Verify ownership + "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;", + + # Verify group + "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, "Meta data of dirs and files not proper") + + def _set_and_remove_extended_attributes(self, remove=False): + """Set and remove extended attributes""" + # Command to set extended attribute to files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 " + "5`;do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_files dir.$i/file.$j; done; " + "done;") + err_msg = "Failed to set extended attributes to files and dirs" + if remove: + # Command to remove extended attribute set on files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name " + "dir.$i; for j in `seq 1 5`; do setfattr -x " + "trusted.name dir.$i/file.$j ; done ; done ;") + err_msg = "Failed to remove extended attributes to files and dirs" + + self._run_cmd(io_cmd, err_msg) + + def _verify_if_extended_attributes_are_proper(self, remove=False): + """Verify if extended attributes are set or remove properly""" + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text " + "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? " + "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do " + "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F " + "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then " + "exit 1; fi; done; done;") + err_msg = "Extended attributes on files and dirs are not proper" + if remove: + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e " + "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in" + " `seq 1 5`; do getfattr -n trusted.name -e text " + "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; " + "done;") + err_msg = "Extended attributes set to files and dirs not removed" + self._run_cmd(io_cmd, err_msg) + + def _remove_files_and_create_dirs_with_the_same_name(self): + """Remove files and create dirs with the same name""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir " + "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;") + self._run_cmd(io_cmd, + "Failed to remove files and create dirs with same name") + + def _verify_if_dirs_are_proper_or_not(self): + """Verify if dirs are proper or not""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | " + "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; " + "done; done; done;") + self._run_cmd(io_cmd, "Dirs created instead of files aren't proper") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/test_self_heal'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_heal_status_restart_vol_wait_and_check_data(self): + """ + Perform repatative steps mentioned below: + 1 Check if heal info is showing all the files and dirs to be healed + 2 Bring back all brick processes which were killed + 3 Wait for heal to complete on the volume + 4 Check if heal is complete and check if volume is in split brain + 5 Collect and compare arequal-checksum according to the volume type + for bricks + """ + # Check if heal info is showing all the files and dirs to be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + def _run_test_self_heal_entry_heal(self): + """Run steps of test_self_heal_entry_heal""" + # Create a directory and create files and directories inside it on + # mount point + self._create_files_and_dirs_on_mount_point(0) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create a new set of files and directories on mount point + self._create_files_and_dirs_on_mount_point(3, second_set=True) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Delete files and directories from mount point + self._delete_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Rename the existing files and dirs + self._rename_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + def test_self_heal_entry_heal(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create a new set of files and directories on mount point. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Bring down brick processes accoding to the volume type. + 12. Delete files and directories from mount point. + 13. Check if heal info is showing all the files and dirs to be healed. + 14. Bring back all brick processes which were killed. + 15. Wait for heal to complete on the volume. + 16. Check if heal is complete and check if volume is in split brain. + 17. Collect and compare arequal-checksum according to the volume type + for bricks. + 18. Bring down brick processes accoding to the volume type. + 19. Rename the existing files and dirs. + 20. Check if heal info is showing all the files and dirs to be healed. + 21. Bring back all brick processes which were killed. + 22. Wait for heal to complete on the volume. + 23. Check if heal is complete and check if volume is in split brain. + 24. Collect and compare arequal-checksum according to the volume type + for bricks. + + Note: + Do this test with both Granular-entry-heal set enable and disable. + """ + for value in (False, True): + if value: + # Cleanup old data from mount point + ret, _, _ = g.run(self.first_client, + 'rm -rf {}/*'.format(self.mountpoint)) + self.assertFalse(ret, 'Failed to cleanup mount point') + g.log.info("Testing with granular heal set to enabled") + self._set_granular_heal_to_on_or_off(enabled=value) + self._run_test_self_heal_entry_heal() + + def test_self_heal_meta_data(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Change the meta data of files and dirs. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if the meta data of files and dirs. + 12. Bring down brick processes accoding to the volume type. + 13. Set extended attributes on the files and dirs. + 14. Verify if the extended attributes are set properly or not. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring back all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify if extended attributes are consitent or not. + 21. Bring down brick processes accoding to the volume type + 22. Remove extended attributes on the files and dirs. + 23. Verify if extended attributes were removed properly. + 24. Check if heal info is showing all the files and dirs to be healed. + 25. Bring back all brick processes which were killed. + 26. Wait for heal to complete on the volume. + 27. Check if heal is complete and check if volume is in split brain. + 28. Collect and compare arequal-checksum according to the volume type + for bricks. + 29. Verify if extended attributes are removed or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(1) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Change the meta data of files and dirs + self._change_meta_deta_of_dirs_and_files() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if the meta data of files and dirs + self._verify_meta_data_of_files_and_dirs() + + for value in (False, True): + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Set or remove extended attributes on the files and dirs + self._set_and_remove_extended_attributes(remove=value) + + # Verify if the extended attributes are set properly or not + self._verify_if_extended_attributes_are_proper(remove=value) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if extended attributes are consitent or not + self._verify_if_extended_attributes_are_proper(remove=value) + + def test_self_heal_of_dir_with_files_removed(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Remove all files and create dir which have name of files. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if dirs are healed properly or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(2) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Remove all files and create dir which have name of files + self._remove_files_and_create_dirs_with_the_same_name() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if dirs are healed properly or not + self._verify_if_dirs_are_proper_or_not() diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py index 43b4f4edf..a449e396f 100644 --- a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py +++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,12 +26,14 @@ from glustolibs.gluster.brick_libs import ( select_volume_bricks_to_bring_offline, get_online_bricks_list) from glustolibs.gluster.heal_libs import ( get_self_heal_daemon_pid, is_shd_daemonized, - monitor_heal_completion, bring_self_heal_daemon_process_offline) + monitor_heal_completion, bring_self_heal_daemon_process_offline, + disable_granular_heal) from glustolibs.gluster.heal_ops import (get_heal_info_summary, trigger_heal_full) from glustolibs.io.utils import validate_io_procs from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) from glustolibs.gluster.mount_ops import mount_volume, umount_volume @@ -99,6 +101,15 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): * heal should complete successfully """ # pylint: disable=too-many-locals,too-many-statements,too-many-lines + + # Disable granular heal if not disabled already + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + # Setting Volume options options = {"metadata-self-heal": "on", "entry-self-heal": "on", @@ -131,7 +142,7 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): all_mounts_procs, num_files_to_write = [], 100 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_files " - "-f %s --base-file-name file %s" % (self.script_upload_path, + "-f %d --base-file-name file %s" % (self.script_upload_path, num_files_to_write, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, @@ -221,8 +232,8 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s read %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;" + " done " % (mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_add_brick_followed_by_remove_brick.py b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py new file mode 100644 index 000000000..a653b792d --- /dev/null +++ b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py @@ -0,0 +1,170 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.dht_test_utils import is_layout_complete +from glustolibs.gluster.glusterfile import (file_exists, + occurences_of_pattern_in_file) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume, shrink_volume +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['replicated'], ['glusterfs']]) +class TestAddBrickFollowedByRemoveBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + cls.first_client = cls.mounts[0].client_system + cls.mountpoint = cls.mounts[0].mountpoint + cls.is_io_running = False + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not file_exists(cls.first_client, cls.script_upload_path): + if not upload_scripts(cls.first_client, cls.script_upload_path): + raise ExecutionError( + "Failed to upload IO scripts to client %s" + % cls.first_client) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if self.is_io_running: + if not wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[0]]): + raise ExecutionError("IO failed on some of the clients") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_layout_of_bricks(self): + """Check the layout of bricks""" + ret = is_layout_complete(self.mnode, self.volname, "/") + self.assertTrue(ret, ("Volume %s: Layout is not complete", + self.volname)) + g.log.info("Volume %s: Layout is complete", self.volname) + + def _add_brick_and_wait_for_rebalance_to_complete(self): + """Add brick and wait for rebalance to complete""" + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + self._check_layout_of_bricks() + + def _remove_brick_from_volume(self): + """Remove bricks from volume""" + # Remove bricks from the volume + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=2000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + def test_add_brick_followed_by_remove_brick(self): + """ + Test case: + 1. Create a volume, start it and mount it to a client. + 2. Start I/O on volume. + 3. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 1x3 should now be 2x3) + 4. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 2x3 should now be 3x3) + 5. Remove brick from volume such that it becomes a 2x3. + 6. Remove brick from volume such that it becomes a 1x3. + 7. Wait for I/O to complete and check for any input/output errors in + both client and rebalance logs. + """ + # Start I/O on mount point + self.all_mounts_procs = [] + cmd = ("/usr/bin/env python {} create_deep_dirs_with_files " + "--dirname-start-num {} --dir-depth 5 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 {}" + .format(self.script_upload_path, 10, self.mountpoint)) + proc = g.run_async(self.first_client, cmd) + self.all_mounts_procs.append(proc) + self.is_io_running = True + + # Convert 1x3 to 2x3 and then convert 2x3 to 3x3 + for _ in range(0, 2): + self._add_brick_and_wait_for_rebalance_to_complete() + + # Convert 3x3 to 2x3 and then convert 2x3 to 1x3 + for _ in range(0, 2): + self._remove_brick_from_volume() + + # Validate I/O processes running on the nodes + ret = validate_io_procs(self.all_mounts_procs, [self.mounts[0]]) + self.is_io_running = False + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO on all mounts: Complete") + + # Check for Input/output errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "Input/output error", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + + # Check for Input/output errors in client logs + ret = occurences_of_pattern_in_file( + self.first_client, "Input/output error", + "/var/log/glusterfs/mnt-{}_{}.log".format(self.volname, + self.mount_type)) + self.assertEqual(ret, 0, + "[Input/output error] present in client log file") + g.log.info("Expanding and shrinking volume successful and no I/O " + "errors see in rebalance and client logs") diff --git a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py index ad6f336a5..1acd11faa 100644 --- a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py +++ b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -112,17 +112,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 files from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s create_files -f 5 " - "--base-file-name test_file --fixed-file-size 1k %s" % ( - self.script_upload_path, - mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 1k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a file from mount point") @@ -149,17 +148,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 new files of same name from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s create_files -f 5 " - "--base-file-name test_file --fixed-file-size 10k %s" % ( - self.script_upload_path, - mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 10k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a new file of same name " @@ -225,10 +223,11 @@ class TestSelfHeal(GlusterBaseClass): fpath = (self.mounts[0].mountpoint + '/test_file' + str(fcount) + '.txt') status = get_fattr(self.mounts[0].client_system, - fpath, 'replica.split-brain-status') + fpath, 'replica.split-brain-status', + encode="text") compare_string = ("The file is not under data or metadata " "split-brain") - self.assertEqual(status.rstrip('\x00'), compare_string, + self.assertEqual(status, compare_string, "file test_file%s is under" " split-brain" % str(fcount)) g.log.info("none of the files are under split-brain") diff --git a/tests/functional/afr/test_default_granular_entry_heal.py b/tests/functional/afr/test_default_granular_entry_heal.py new file mode 100644 index 000000000..91ca25907 --- /dev/null +++ b/tests/functional/afr/test_default_granular_entry_heal.py @@ -0,0 +1,235 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated', + 'arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestDefaultGranularEntryHeal(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(choice(subvol)) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'mydir')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/mydir'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def test_default_granular_entry_heal(self): + """ + Test case: + 1. Create a cluster. + 2. Create volume start it and mount it. + 3. Check if cluster.granular-entry-heal is ON by default or not. + 4. Check /var/lib/glusterd/<volname>/info for + cluster.granular-entry-heal=on. + 5. Check if option granular-entry-heal is present in the + volume graph or not. + 6. Kill one or two bricks of the volume depending on volume type. + 7. Create all types of files on the volume like text files, hidden + files, link files, dirs, char device, block device and so on. + 8. Bring back the killed brick by restarting the volume. + 9. Wait for heal to complete. + 10. Check arequal-checksum of all the bricks and see if it's proper or + not. + """ + # Check if cluster.granular-entry-heal is ON by default or not + ret = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + self.assertEqual(ret['cluster.granular-entry-heal'], 'on', + "Value of cluster.granular-entry-heal not on " + "by default") + + # Check var/lib/glusterd/<volname>/info for + # cluster.granular-entry-heal=on + ret = occurences_of_pattern_in_file(self.mnode, + 'cluster.granular-entry-heal=on', + '/var/lib/glusterd/vols/{}/info' + .format(self.volname)) + self.assertEqual(ret, 1, "Failed get cluster.granular-entry-heal=on in" + " info file") + + # Check if option granular-entry-heal is present in the + # volume graph or not + ret = occurences_of_pattern_in_file(self.first_client, + 'option granular-entry-heal on', + "/var/log/glusterfs/mnt-{}_{}.log" + .format(self.volname, + self.mount_type)) + self.assertTrue(ret > 0, + "Failed to find granular-entry-heal in volume graph") + g.log.info("granular-entry-heal properly set to ON by default") + + # Kill one or two bricks of the volume depending on volume type + self._bring_bricks_offline() + + # Create all types of files on the volume like text files, hidden + # files, link files, dirs, char device, block device and so on + cmd = ("cd {};mkdir mydir;cd mydir;mkdir dir;mkdir .hiddendir;" + "touch file;touch .hiddenfile;mknod blockfile b 1 5;" + "mknod charfile b 1 5; mkfifo pipefile;touch fileforhardlink;" + "touch fileforsoftlink;ln fileforhardlink hardlinkfile;" + "ln -s fileforsoftlink softlinkfile".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files of all types") + + # Bring back the killed brick by restarting the volume Bricks should + # be online again + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check arequal-checksum of all the bricks and see if it's proper or + # not + self._check_arequal_checksum_for_the_volume() diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py new file mode 100644 index 000000000..d5b6d5d43 --- /dev/null +++ b/tests/functional/afr/test_self_heal_with_expand_volume.py @@ -0,0 +1,221 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import (set_file_permissions, + occurences_of_pattern_in_file) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.lib_utils import (add_user, del_user) +from glustolibs.gluster.volume_libs import (get_subvols, expand_volume) + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestHealWithExpandVolume(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Create non-root users + self.users = ('qa_user', 'qa_admin') + for user in self.users: + if not add_user(self.first_client, user): + raise ExecutionError("Failed to create non-root user {}" + .format(user)) + g.log.info("Successfully created non-root users") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + ret, _, _ = g.run(self.first_client, + "rm -rf /home/{}".format(user)) + if ret: + raise ExecutionError("Failed to remove home dir of " + "non-root user") + g.log.info("Successfully deleted all users") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + self.bricks_to_bring_offline.append(choice(subvols[0])) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_if_there_are_files_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _expand_volume_and_wait_for_rebalance_to_complete(self): + """Expand volume and wait for rebalance to complete""" + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=6000) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + def test_self_heal_and_add_brick_with_data_from_diff_users(self): + """ + Test case: + 1. Created a 2X3 volume. + 2. Mount the volume using FUSE and give 777 permissions to the mount. + 3. Added a new user. + 4. Login as new user and created 100 files from the new user: + for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done + 5. Kill a brick which is part of the volume. + 6. On the mount, login as root user and create 1000 files: + for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done + 7. On the mount, login as new user, and copy existing data to + the mount. + 8. Start volume using force. + 9. While heal is in progress, add-brick and start rebalance. + 10. Wait for rebalance and heal to complete, + 11. Check for MSGID: 108008 errors in rebalance logs. + """ + # Change permissions of mount point to 777 + ret = set_file_permissions(self.first_client, self.mountpoint, + '-R 777') + self.assertTrue(ret, "Unable to change mount point permissions") + g.log.info("Mount point permissions set to 777") + + # Create 100 files from non-root user + cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom " + "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0], + self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files from non-root user") + + # Kill one brick which is part of the volume + self._bring_bricks_offline() + + # Create 1000 files from root user + cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i" + " bs=10M count=1;done" % self.mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to creare files from root user") + + # On the mount, login as new user, and copy existing data to + # the mount + cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/" + "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;" + "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to copy files from non-root user") + + # Check if there are files to be healed + self._check_if_there_are_files_to_be_healed() + + # Start the vol using force + self._restart_volume_and_bring_all_offline_bricks_online() + + # Add bricks to volume and wait for heal to complete + self._expand_volume_and_wait_for_rebalance_to_complete() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check for MSGID: 108008 errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "MSGID: 108008", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + g.log.info("Expanding volume successful and no MSGID: 108008 " + "errors see in rebalance logs") diff --git a/tests/functional/afr/test_split_brain_with_hard_link_file.py b/tests/functional/afr/test_split_brain_with_hard_link_file.py new file mode 100644 index 000000000..a8248fb72 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_hard_link_file.py @@ -0,0 +1,175 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals, unused-variable +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion, + is_heal_complete) + +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Override Volumes + if cls.volume_type == "distributed-replicated": + # Define x3 distributed-replicated volume + cls.volume['voltype'] = { + 'type': 'distributed-replicated', + 'dist_count': 2, + 'replica_count': 3, + 'transport': 'tcp'} + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + @classmethod + def tearDownClass(cls): + + # Cleanup Volume + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + g.log.info("Successful in cleaning up Volume %s", cls.volname) + + cls.get_super_method(cls, 'tearDownClass')() + + def _test_brick_down_with_file_rename(self, pfile, rfile, brick): + # Bring brick offline + g.log.info('Bringing brick %s offline', brick) + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Failed to bring brick %s offline' + % brick) + + ret = are_bricks_offline(self.mnode, self.volname, + [brick]) + self.assertTrue(ret, 'Brick %s is not offline' + % brick) + g.log.info('Bringing brick %s offline is successful', + brick) + + # Rename file + cmd = ("mv %s/%s %s/%s" + % (self.mounts[0].mountpoint, pfile, + self.mounts[0].mountpoint, rfile)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "rename of file failed") + + # Bring brick back online + g.log.info('Bringing brick %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, + brick) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick) + g.log.info('Bringing brick %s online is successful', brick) + + def test_afr_heal_with_brickdown_hardlink(self): + """ + Steps: + 1. Create 2 * 3 distribute replicate volume and disable all heals + 2. Create a file and 3 hardlinks to it from fuse mount. + 3. Kill brick4, rename HLINK1 to an appropriate name so that + it gets hashed to replicate-1 + 4. Likewise rename HLINK3 and HLINK7 as well, killing brick5 and brick6 + respectively each time. i.e. a different brick of the 2nd + replica is down each time. + 5. Now enable shd and let selfheals complete. + 6. Heal should complete without split-brains. + """ + bricks_list = get_all_bricks(self.mnode, self.volname) + options = {"metadata-self-heal": "off", + "entry-self-heal": "off", + "data-self-heal": "off", + "self-heal-daemon": "off"} + g.log.info("setting options %s", options) + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for" + "volume %s" % (options, self.volname))) + g.log.info("Successfully set %s for volume %s", options, self.volname) + + cmd = ("touch %s/FILE" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "file creation failed") + + # Creating a hardlink for the file created + for i in range(1, 4): + ret = create_link_file(self.clients[0], + '{}/FILE'.format(self.mounts[0].mountpoint), + '{}/HLINK{}'.format + (self.mounts[0].mountpoint, i)) + self.assertTrue(ret, "Unable to create hard link file ") + + # Bring brick3 offline,Rename file HLINK1,and bring back brick3 online + self._test_brick_down_with_file_rename("HLINK1", "NEW-HLINK1", + bricks_list[3]) + + # Bring brick4 offline,Rename file HLINK2,and bring back brick4 online + self._test_brick_down_with_file_rename("HLINK2", "NEW-HLINK2", + bricks_list[4]) + + # Bring brick5 offline,Rename file HLINK3,and bring back brick5 online + self._test_brick_down_with_file_rename("HLINK3", "NEW-HLINK3", + bricks_list[5]) + + # Setting options + options = {"self-heal-daemon": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Check data on mount point + cmd = ("ls %s" % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "failed to fetch data from mount point") diff --git a/tests/functional/afr/test_split_brain_with_node_reboot.py b/tests/functional/afr/test_split_brain_with_node_reboot.py new file mode 100644 index 000000000..9b630ba75 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_node_reboot.py @@ -0,0 +1,149 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +from unittest import SkipTest +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.io.utils import (run_linux_untar, run_crefi, + wait_for_io_to_complete) + + +@runs_on([['replicated', 'distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for availability of atleast 3 clients + if len(cls.clients) < 3: + raise SkipTest("This test requires atleast 3 clients") + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + cls.list_of_io_processes = [] + cls.is_io_running = False + + def tearDown(self): + + # If I/O processes are running wait from them to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + self.mounts): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_afr_node_reboot_self_heal(self): + """ + Steps: + 1. Create *3 replica volume + 2. Mount the volume on 3 clients + 3. Run following workload from clients + Client 1: Linux Untars + Client 2: Lookups ls + Client 3: Lookups du + 4. Create a directory on mount point + 5. Create deep dirs and file in the directory created at step 4 + 6. Perform node reboot + 7. Check for heal status + 8. Reboot another node + 9. Check for heal status + """ + + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar from client 1 + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Run lookup operation ls from client 2 + cmd = ("cd {}; for i in `seq 1 1000000`;do du -sh; done" + .format(self.mounts[1].mountpoint)) + ret = g.run_async(self.mounts[1].client_system, cmd) + self.list_of_io_processes += [ret] + + # Run lookup operation du from client 3 + cmd = ("cd {}; for i in `seq 1 1000000`;do ls -laRt; done" + .format(self.mounts[2].mountpoint)) + ret = g.run_async(self.mounts[2].client_system, cmd) + self.list_of_io_processes += [ret] + + # Create a dir to start crefi tool + self.linux_untar_dir = "{}/{}".format(self.mounts[3].mountpoint, + "crefi") + ret = mkdir(self.clients[3], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir for crefi") + + # Create deep dirs and files on mount point from client 4 + list_of_fops = ("create", "rename", "chmod", "chown", "chgrp", + "hardlink", "truncate", "setxattr") + for fops in list_of_fops: + ret = run_crefi(self.clients[3], + self.linux_untar_dir, 10, 3, 3, thread=4, + random_size=True, fop=fops, minfs=0, + maxfs=102400, multi=True, random_filename=True) + self.assertTrue(ret, "crefi failed during {}".format(fops)) + g.log.info("crefi PASSED FOR fop %s", fops) + g.log.info("IOs were successful using crefi") + + for server_num in (1, 2): + # Perform node reboot for servers + g.log.info("Rebooting %s", self.servers[server_num]) + ret = g.run_async(self.servers[server_num], "reboot") + self.assertTrue(ret, 'Failed to reboot node') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') diff --git a/tests/functional/arbiter/test_brick_down_cyclic.py b/tests/functional/arbiter/test_brick_down_cyclic.py new file mode 100644 index 000000000..8639a4dc5 --- /dev/null +++ b/tests/functional/arbiter/test_brick_down_cyclic.py @@ -0,0 +1,140 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +import time +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks, + are_bricks_online) +from glustolibs.gluster.heal_libs import ( + monitor_heal_completion, are_all_self_heal_daemons_are_online) + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestBrickDownHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @classmethod + def tearDownClass(cls): + """ + Cleanup Volume + """ + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + + cls.get_super_method(cls, 'tearDownClass')() + + def test_brick_down_heal(self): + """ + - Run IO's from client on a single file + - Now bring down bricks in cyclic order + - kill brick 1, sleep for 5 seconds, bring brick 1 up, wait for 10s + - Now repeat step3 for brick2 and brick 3 + - Repeat the cycle a few times + - Trigger heal, check for split brain using command + """ + # Write IO's + self.all_mounts_procs = [] + cmd = ("for i in `seq 1 10`;" + "do dd if=/dev/urandom of=%s/file$i bs=1K count=1;" + "done" % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd) + self.all_mounts_procs.append(proc) + + # Killing bricks in cyclic order + bricks_list = get_all_bricks(self.mnode, self.volname) + + # Total number of cyclic brick-down cycles to be executed + number_of_cycles = 0 + while number_of_cycles < 3: + number_of_cycles += 1 + for brick in bricks_list: + # Bring brick offline + g.log.info('Bringing bricks %s offline', brick) + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s offline" + % brick)) + + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, 'Bricks %s are not offline' % brick) + g.log.info('Bringing bricks %s offline is successful', brick) + + # Introducing 5 second sleep when brick is down + g.log.info("Waiting for 5 seconds, with ongoing IO while " + "brick %s is offline", brick) + ret = time.sleep(5) + + # Bring brick online + g.log.info('Bringing bricks %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s online " + % brick)) + g.log.info('Bricks %s are online', brick) + + # Introducing 10 second sleep when brick is up + g.log.info("Waiting for 10 seconds,when " + "brick %s is online", brick) + ret = time.sleep(10) + + # Check if bricks are online + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, 'Bricks %s are not online' % bricks_list) + g.log.info('Bricks %s are online', bricks_list) + + # Check daemons + g.log.info('Checking daemons...') + ret = are_all_self_heal_daemons_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Some of the self-heal Daemons are " + "offline")) + g.log.info('All self-heal Daemons are online') + + # Trigger self heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger heal on volume') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') diff --git a/tests/functional/arbiter/test_verify_metadata_and_data_heal.py b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py new file mode 100644 index 000000000..d48e36e73 --- /dev/null +++ b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py @@ -0,0 +1,297 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import ( + is_heal_complete, is_volume_in_split_brain, monitor_heal_completion, + wait_for_self_heal_daemons_to_be_online) +from glustolibs.gluster.heal_ops import (disable_self_heal_daemon, + enable_self_heal_daemon, trigger_heal) +from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal, + del_user, group_add, group_del) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import list_all_files_and_dirs_mounts + + +@runs_on([['arbiter', 'replicated'], ['glusterfs']]) +class TestMetadataAndDataHeal(GlusterBaseClass): + '''Description: Verify shd heals files after performing metadata and data + operations while a brick was down''' + def _dac_helper(self, host, option): + '''Helper for creating, deleting users and groups''' + + # Permission/Ownership changes required only for `test_metadata..` + # tests, using random group and usernames + if 'metadata' not in self.test_dir: + return + + if option == 'create': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_add(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not add_user(host, 'qa_all', group='qa_func'): + raise ExecutionError('Unable to {} user {} under {} on ' + '{}'.format(option, 'qa_all', 'qa_func', + host)) + elif option == 'delete': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_del(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not del_user(host, 'qa_all'): + raise ExecutionError('Unable to {} user on {}'.format( + option, host)) + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = self.mounts[0:1] + self.client = self.mounts[0].client_system + + # Use testcase name as test directory + self.test_dir = self.id().split('.')[-1] + self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + # Crete group and user names required for the test + self._dac_helper(host=self.client, option='create') + + def tearDown(self): + # Delete group and user names created as part of setup + self._dac_helper(host=self.client, option='delete') + + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + + self.get_super_method(self, 'tearDown')() + + def _perform_io_and_disable_self_heal(self): + '''Refactor of steps common to all tests: Perform IO, disable heal''' + ret = mkdir(self.client, self.fqpath) + self.assertTrue(ret, + 'Directory creation failed on {}'.format(self.client)) + self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + # Create 6 dir's, 6 files and 6 files in each subdir with 10K data + file_io = ('''cd {0}; for i in `seq 1 6`; + do mkdir dir.$i; {1} 10K > file.$i; + for j in `seq 1 6`; + do {1} 10K > dir.$i/file.$j; done; + done;'''.format(self.fqpath, self.io_cmd)) + ret, _, err = g.run(self.client, file_io) + self.assertEqual(ret, 0, 'Unable to create directories and data files') + self.assertFalse(err, '{0} failed with {1}'.format(file_io, err)) + + # Disable self heal deamon + self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname), + 'Disabling self-heal-daemon falied') + + def _perform_brick_ops_and_enable_self_heal(self, op_type): + '''Refactor of steps common to all tests: Brick down and perform + metadata/data operations''' + # First brick in the subvol will always be online and used for self + # heal, so make keys match brick index + self.op_cmd = { + # Metadata Operations (owner and permission changes) + 'metadata': { + 2: + '''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \ + dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''', + 3: + '''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + }, + # Data Operations (append data to the files) + 'data': { + 2: + '''cd {0}; for i in `seq 1 3`; + do {1} 2K >> file.$i; + for j in `seq 1 3`; + do {1} 2K >> dir.$i/file.$j; done; + done;''', + 3: + '''cd {0}; for i in `seq 1 3`; + do {1} 3K >> file.$i; + for j in `seq 1 3`; + do {1} 3K >> dir.$i/file.$j; done; + done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; + do {1} 4K >> file.$i; + for j in `seq 1 6`; + do {1} 4K >> dir.$i/file.$j; done; + done;''', + }, + } + bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(bricks, + 'Not able to get list of bricks in the volume') + + # Make first brick always online and start operations from second brick + for index, brick in enumerate(bricks[1:], start=2): + + # Bring brick offline + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + # Perform metadata/data operation + cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Bring brick online + ret = bring_bricks_online( + self.mnode, + self.volname, + brick, + bring_bricks_online_methods='volume_start_force') + + # Assert metadata/data operations resulted in pending heals + self.assertFalse(is_heal_complete(self.mnode, self.volname)) + + # Enable and wait self heal daemon to be online + self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname), + 'Enabling self heal daemon failed') + self.assertTrue( + wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname), + 'Not all self heal daemons are online') + + def _validate_heal_completion_and_arequal(self, op_type): + '''Refactor of steps common to all tests: Validate heal from heal + commands, verify arequal, perform IO and verify arequal after IO''' + + # Validate heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + 'Self heal is not completed within timeout') + self.assertFalse( + is_volume_in_split_brain(self.mnode, self.volname), + 'Volume is in split brain even after heal completion') + + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + self.assertTrue(subvols, 'Not able to get list of subvols') + arbiter = self.volume_type.find('arbiter') >= 0 + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + + # Validate arequal + self._validate_arequal_and_perform_lookup(subvols, stop) + + # Perform some additional metadata/data operations + cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Validate arequal after additional operations + self._validate_arequal_and_perform_lookup(subvols, stop) + + def _validate_arequal_and_perform_lookup(self, subvols, stop): + '''Refactor of steps common to all tests: Validate arequal from bricks + backend and perform a lookup of all files from mount''' + for subvol in subvols: + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue( + ret, 'Unable to get `arequal` checksum on ' + '{}'.format(subvol[0:stop])) + self.assertEqual( + len(set(arequal)), 1, 'Mismatch of `arequal` ' + 'checksum among {} is identified'.format(subvol[0:stop])) + + # Perform a lookup of all files and directories on mounts + self.assertTrue(list_all_files_and_dirs_mounts(self.mounts), + 'Failed to list all files and dirs from mount') + + def test_metadata_heal_from_shd(self): + '''Description: Verify files heal after switching on `self-heal-daemon` + when metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of metadata heal after switching on ' + '`self heal daemon` is complete') + + def test_metadata_heal_from_heal_cmd(self): + '''Description: Verify files heal after triggering heal command when + metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal` + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + + # Invoke `glfsheal` + self.assertTrue(trigger_heal(self.mnode, self.volname), + 'Unable to trigger index heal on the volume') + + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info( + 'Pass: Verification of metadata heal via `glfsheal` is complete') + + def test_data_heal_from_shd(self): + '''Description: Verify files heal after triggering heal command when + data operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data + operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'data' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of data heal after switching on ' + '`self heal daemon` is complete') diff --git a/tests/functional/authentication/test_auth_allow_with_brick_down.py b/tests/functional/authentication/test_auth_allow_with_brick_down.py new file mode 100644 index 000000000..8fe365aed --- /dev/null +++ b/tests/functional/authentication/test_auth_allow_with_brick_down.py @@ -0,0 +1,171 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test cases in this module tests the authentication allow feature +""" +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.auth_ops import set_auth_allow +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) + + +@runs_on([['distributed-replicated', 'distributed-dispersed'], ['glusterfs']]) +class FuseAuthAllow(GlusterBaseClass): + """ + Tests to verify auth.allow feature on fuse mount. + """ + @classmethod + def setUpClass(cls): + """ + Create and start volume + """ + cls.get_super_method(cls, 'setUpClass')() + # Create and start volume + ret = cls.setup_volume() + if not ret: + raise ExecutionError("Failed to setup " + "and start volume %s" % cls.volname) + + def _authenticated_mount(self, mount_obj): + """ + Mount volume on authenticated client + + Args: + mount_obj(obj): Object of GlusterMount class + """ + # Mount volume + ret = mount_obj.mount() + self.assertTrue(ret, ("Failed to mount %s on client %s" % + (mount_obj.volname, + mount_obj.client_system))) + g.log.info("Successfully mounted %s on client %s", mount_obj.volname, + mount_obj.client_system) + + # Verify mount + ret = mount_obj.is_mounted() + self.assertTrue(ret, ("%s is not mounted on client %s" + % (mount_obj.volname, mount_obj.client_system))) + g.log.info("Verified: %s is mounted on client %s", + mount_obj.volname, mount_obj.client_system) + + def _brick_down_heal(self): + # Create files on mount point using dd command + cmd = ('cd %s;for i in {1..10};' + 'do dd if=/dev/urandom bs=1024 count=1 of=file$i;done;' + % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to createfiles on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Bring brick1 offline + bricks_list = get_all_bricks(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, bricks_list[1]) + self.assertTrue(ret, 'Failed to bring brick1 offline') + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Brick1 is not offline') + g.log.info('Bringing brick1 offline is successful') + + # Bring brick1 back online + ret = bring_bricks_online(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring brick1 online') + g.log.info('Bringing brick1 online is successful') + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + def test_auth_allow_with_heal(self): + """ + Validating the FUSE authentication volume options with Heal. + Steps: + 1. Setup and start volume + 2. Set auth.allow on volume for client1 using ip of client1 + 3. Mount volume on client1. + 4. Create files on mount point using dd command + 5. Bring down one brick of the volume + 6. Bring the brick back up after few seconds using + "gluster volume start force" + 7. Start volume heal by using gluster volume heal + 8. See the heal status using gluster volume heal info + 9. Set auth.allow on volume for client1 using hostname of client1. + 10. Repeat steps from 3 to 9 + """ + # Setting authentication on volume for client1 using ip + auth_dict = {'all': [self.mounts[0].client_system]} + ret = set_auth_allow(self.volname, self.mnode, auth_dict) + self.assertTrue(ret, "Failed to set authentication") + + # Mounting volume on client1 + self._authenticated_mount(self.mounts[0]) + + # Create files,bring brick down and check heal + self._brick_down_heal() + + # Unmount volume from client1 + ret = self.mounts[0].unmount() + self.assertTrue(ret, ("Failed to unmount volume %s from client %s" + % (self.volname, self.mounts[0].client_system))) + + # Obtain hostname of client1 + ret, hostname_client1, _ = g.run(self.mounts[0].client_system, + "hostname") + self.assertEqual(ret, 0, ("Failed to obtain hostname of client %s" + % self.mounts[0].client_system)) + g.log.info("Obtained hostname of client. IP- %s, hostname- %s", + self.mounts[0].client_system, hostname_client1.strip()) + + # Setting authentication on volume for client1 using hostname + auth_dict = {'all': [hostname_client1.strip()]} + ret = set_auth_allow(self.volname, self.mnode, auth_dict) + self.assertTrue(ret, "Failed to set authentication") + + # Mounting volume on client1 + self._authenticated_mount(self.mounts[0]) + + # Create files,bring brick down and check heal + self._brick_down_heal() + + def tearDown(self): + """ + Cleanup volume + """ + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup volume.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/bvt/test_cvt.py b/tests/functional/bvt/test_cvt.py index dea251256..f8cb4f2ba 100644 --- a/tests/functional/bvt/test_cvt.py +++ b/tests/functional/bvt/test_cvt.py @@ -41,15 +41,13 @@ from glustolibs.gluster.volume_libs import ( from glustolibs.gluster.volume_libs import ( log_volume_info_and_status, expand_volume, shrink_volume, replace_brick_from_volume, wait_for_volume_process_to_be_online) -from glustolibs.gluster.glusterfile import get_fattr_list from glustolibs.gluster.rebalance_ops import (rebalance_start, wait_for_rebalance_to_complete, rebalance_status) from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, bring_bricks_offline, bring_bricks_online, - are_bricks_offline, - get_all_bricks) + are_bricks_offline) from glustolibs.gluster.heal_libs import monitor_heal_completion from glustolibs.gluster.quota_ops import (quota_enable, quota_disable, quota_limit_usage, @@ -286,39 +284,9 @@ class TestGlusterShrinkVolumeSanity(GlusterBasicFeaturesSanityBaseClass): g.log.info("Successful in logging volume info and status of volume %s", self.volname) - # Temporary code: - # Additional checks to gather infomartion from all - # servers for Bug 1810901 and setting log level to debug. - if self.volume_type == 'distributed-dispersed': - for brick_path in get_all_bricks(self.mnode, self.volname): - node, path = brick_path.split(':') - ret, out, _ = g.run(node, 'find {}/'.format(path)) - g.log.info(out) - for filedir in out.split('\n'): - ret, out, _ = g.run(node, 'ls -l {}'.format(filedir)) - g.log.info("Return value for ls -l command: %s", ret) - g.log.info(out) - ret = get_fattr_list(node, filedir, encode_hex=True) - g.log.info(ret) - # Shrinking volume by removing bricks from volume when IO in progress ret = shrink_volume(self.mnode, self.volname) - # Temporary code: - # Additional checks to gather infomartion from all - # servers for Bug 1810901. - if not ret and self.volume_type == 'distributed-dispersed': - for brick_path in get_all_bricks(self.mnode, self.volname): - node, path = brick_path.split(':') - ret, out, _ = g.run(node, 'find {}/'.format(path)) - g.log.info(out) - for filedir in out.split('\n'): - ret, out, _ = g.run(node, 'ls -l {}'.format(filedir)) - g.log.info("Return value for ls -l command: %s", ret) - g.log.info(out) - ret = get_fattr_list(node, filedir, encode_hex=True) - g.log.info(ret) - self.assertTrue(ret, ("Failed to shrink the volume when IO in " "progress on volume %s", self.volname)) g.log.info("Shrinking volume when IO in progress is successful on " diff --git a/tests/functional/dht/test_accessing_file_when_dht_layout_is_stale.py b/tests/functional/dht/test_accessing_file_when_dht_layout_is_stale.py new file mode 100644 index 000000000..e7f89d84e --- /dev/null +++ b/tests/functional/dht/test_accessing_file_when_dht_layout_is_stale.py @@ -0,0 +1,181 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_fattr, set_fattr +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +# pylint: disable=too-many-locals +@runs_on([['distributed'], ['glusterfs']]) +class TestAccessFileStaleLayout(GlusterBaseClass): + def setUp(self): + self.get_super_method(self, 'setUp')() + + self.volume['voltype']['dist_count'] = 2 + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError('Failed to setup and mount volume') + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Failed to umount and cleanup Volume') + + self.get_super_method(self, 'tearDown')() + + def _get_brick_node_and_path(self): + '''Yields list containing brick node and path from first brick of each + subvol + ''' + subvols = get_subvols(self.mnode, self.volname) + for subvol in subvols['volume_subvols']: + subvol[0] += self.dir_path + yield subvol[0].split(':') + + def _assert_file_lookup(self, node, fqpath, when, result): + '''Perform `stat` on `fqpath` from `node` and validate against `result` + ''' + cmd = ('stat {}'.format(fqpath)) + ret, _, _ = g.run(node, cmd) + assert_method = self.assertNotEqual + assert_msg = 'fail' + if result: + assert_method = self.assertEqual + assert_msg = 'pass' + assert_method( + ret, 0, 'Lookup on {} from {} should {} {} layout ' + 'change'.format(fqpath, node, assert_msg, when)) + + def test_accessing_file_when_dht_layout_is_stale(self): + ''' + Description : Checks if a file can be opened and accessed if the dht + layout has become stale. + + Steps: + 1. Create, start and mount a volume consisting 2 subvols on 2 clients + 2. Create a dir `dir` and file `dir/file` from client0 + 3. Take note of layouts of `brick1`/dir and `brick2`/dir of the volume + 4. Validate for success lookup from only one brick path + 5. Re-assign layouts ie., brick1/dir to brick2/dir and vice-versa + 6. Remove `dir/file` from client0 and recreate same file from client0 + and client1 + 7. Validate for success lookup from only one brick path (as layout is + changed file creation path will be changed) + 8. Validate checksum is matched from both the clients + ''' + + # Will be used in _get_brick_node_and_path + self.dir_path = '/dir' + + # Will be used in argument to _assert_file_lookup + file_name = '/file' + + dir_path = self.mounts[0].mountpoint + self.dir_path + file_path = dir_path + file_name + + client0, client1 = self.clients[0], self.clients[1] + fattr = 'trusted.glusterfs.dht' + io_cmd = ('cat /dev/urandom | tr -dc [:space:][:print:] | ' + 'head -c 1K > {}'.format(file_path)) + + # Create a dir from client0 + ret = mkdir(self.clients[0], dir_path) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + # Touch a file with data from client0 + ret, _, _ = g.run(client0, io_cmd) + self.assertEqual(ret, 0, 'Failed to create a file on mount') + + # Yields `node` and `brick-path` from first brick of each subvol + gen = self._get_brick_node_and_path() + + # Take note of newly created directory's layout from org_subvol1 + node1, fqpath1 = next(gen) + layout1 = get_fattr(node1, fqpath1, fattr) + self.assertIsNotNone(layout1, + '{} is not present on {}'.format(fattr, fqpath1)) + + # Lookup on file from node1 should fail as `dir/file` will always get + # hashed to node2 in a 2-brick distribute volume by default + self._assert_file_lookup(node1, + fqpath1 + file_name, + when='before', + result=False) + + # Take note of newly created directory's layout from org_subvol2 + node2, fqpath2 = next(gen) + layout2 = get_fattr(node2, fqpath2, fattr) + self.assertIsNotNone(layout2, + '{} is not present on {}'.format(fattr, fqpath2)) + + # Lookup on file from node2 should pass + self._assert_file_lookup(node2, + fqpath2 + file_name, + when='before', + result=True) + + # Set org_subvol2 directory layout to org_subvol1 and vice-versa + for node, fqpath, layout, vol in ((node1, fqpath1, layout2, (2, 1)), + (node2, fqpath2, layout1, (1, 2))): + ret = set_fattr(node, fqpath, fattr, layout) + self.assertTrue( + ret, 'Failed to set layout of org_subvol{} on ' + 'brick {} of org_subvol{}'.format(vol[0], fqpath, vol[1])) + + # Remove file after layout change from client0 + cmd = 'rm -f {}'.format(file_path) + ret, _, _ = g.run(client0, cmd) + self.assertEqual(ret, 0, 'Failed to delete file after layout change') + + # Create file with same name as above after layout change from client0 + # and client1 + for client in (client0, client1): + ret, _, _ = g.run(client, io_cmd) + self.assertEqual( + ret, 0, 'Failed to create file from ' + '{} after layout change'.format(client)) + + # After layout change lookup on file from node1 should pass + self._assert_file_lookup(node1, + fqpath1 + file_name, + when='after', + result=True) + + # After layout change lookup on file from node2 should fail + self._assert_file_lookup(node2, + fqpath2 + file_name, + when='after', + result=False) + + # Take note of checksum from client0 and client1 + checksums = [None] * 2 + for index, mount in enumerate(self.mounts): + ret, checksums[index] = collect_mounts_arequal(mount, dir_path) + self.assertTrue( + ret, 'Failed to get arequal on client {}'.format( + mount.client_system)) + + # Validate no checksum mismatch + self.assertEqual(checksums[0], checksums[1], + 'Checksum mismatch between client0 and client1') + + g.log.info('Pass: Test accessing file on stale layout is complete.') diff --git a/tests/functional/dht/test_add_brick_rebalance_revised.py b/tests/functional/dht/test_add_brick_rebalance_revised.py index 69d807d97..cc749f47a 100644 --- a/tests/functional/dht/test_add_brick_rebalance_revised.py +++ b/tests/functional/dht/test_add_brick_rebalance_revised.py @@ -109,7 +109,7 @@ class TestAddBrickRebalanceRevised(GlusterBaseClass): ("ln file_$i hardfile_$i", "Failed to create hard links for files")) - # Create 50 files on the mount point and create 40 hard links + # Create 50 files on the mount point and create 50 hard links # for the files. for operation, msg in ops: self._run_command_50_times(operation, msg) diff --git a/tests/functional/dht/test_add_brick_rebalance_with_self_heal_in_progress.py b/tests/functional/dht/test_add_brick_rebalance_with_self_heal_in_progress.py new file mode 100644 index 000000000..6fb7fe4f0 --- /dev/null +++ b/tests/functional/dht/test_add_brick_rebalance_with_self_heal_in_progress.py @@ -0,0 +1,136 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks, bring_bricks_online +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import kill_process + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestAddBrickRebalanceWithSelfHeal(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.is_io_running = False + + def tearDown(self): + + # If I/O processes are running wait for it to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + [self.mounts[0]]): + raise ExecutionError("Failed to wait for I/O to complete") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_add_brick_rebalance_with_self_heal_in_progress(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Start creating a few files on mount point. + 3. While file creation is going on, kill one of the bricks + in the replica pair. + 4. After file creattion is complete collect arequal checksum + on mount point. + 5. Bring back the brick online by starting volume with force. + 6. Check if all bricks are online and if heal is in progress. + 7. Add bricks to the volume and start rebalance. + 8. Wait for rebalance and heal to complete on volume. + 9. Collect arequal checksum on mount point and compare + it with the one taken in step 4. + """ + # Start I/O from mount point and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + self.list_of_io_processes = [ + g.run_async(self.mounts[0].client_system, cmd)] + self.is_copy_running = True + + # Get a list of all the bricks to kill brick + brick_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(brick_list, "Empty present brick list") + + # Kill brick process of a brick which is being removed + brick = choice(brick_list) + node, _ = brick.split(":") + ret = kill_process(node, process_names="glusterfsd") + self.assertTrue(ret, "Failed to kill brick process of brick %s" + % brick) + + # Validate if I/O was successful or not. + ret = validate_io_procs(self.list_of_io_processes, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.is_copy_running = False + + # Collect arequal checksum before ops + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Bring back the brick online by starting volume with force + ret = bring_bricks_online(self.mnode, self.volname, brick_list, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, "Error in bringing back brick online") + g.log.info('All bricks are online now') + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Wait for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "heal has not yet completed") + g.log.info("Self heal completed") + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_add_brick_rebalance_with_symlink_pointing_out_of_gluster.py b/tests/functional/dht/test_add_brick_rebalance_with_symlink_pointing_out_of_gluster.py new file mode 100644 index 000000000..92135b3b4 --- /dev/null +++ b/tests/functional/dht/test_add_brick_rebalance_with_symlink_pointing_out_of_gluster.py @@ -0,0 +1,133 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.glusterfile import get_md5sum +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestAddBrickRebalanceWithSymlinkPointingOutOfGluster(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.is_io_running = False + + def tearDown(self): + + # Remove the temporary dir created for test + ret, _, _ = g.run(self.mounts[0].client_system, "rm -rf /mnt/tmp/") + if ret: + raise ExecutionError("Failed to remove /mnt/tmp create for test") + + # If I/O processes are running wait for it to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + [self.mounts[0]]): + raise ExecutionError("Failed to wait for I/O to complete") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_add_brick_rebalance_with_symlink_pointing_out_of_volume(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create symlinks on the volume such that the files for the symlink + are outside the volume. + 3. Once all the symlinks are create a data file using dd: + dd if=/dev/urandom of=FILE bs=1024 count=100 + 4. Start copying the file's data to all the symlink. + 5. When data is getting copied to all files through symlink add brick + and start rebalance. + 6. Once rebalance is complete check the md5sum of each file through + symlink and compare if it's same as the orginal file. + """ + # Create symlinks on volume pointing outside volume + cmd = ("cd %s; mkdir -p /mnt/tmp;for i in {1..100};do " + "touch /mnt/tmp/file$i; ln -sf /mnt/tmp/file$i link$i;done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse( + ret, "Failed to create symlinks pointing outside volume") + + # Create a data file using dd inside mount point + cmd = ("cd %s; dd if=/dev/urandom of=FILE bs=1024 count=100" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, "Failed to create data file on mount point") + + # Start copying data from file to symliks + cmd = ("cd %s;for i in {1..100};do cat FILE >> link$i;done" + % self.mounts[0].mountpoint) + self.list_of_io_processes = [ + g.run_async(self.mounts[0].client_system, cmd)] + self.is_copy_running = True + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Validate if I/O was successful or not. + ret = validate_io_procs(self.list_of_io_processes, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.is_copy_running = False + + # Get md5sum of the original file and compare it with that of + # all files through the symlink + original_file_md5sum = get_md5sum(self.mounts[0].client_system, + "{}/FILE".format( + self.mounts[0].mountpoint)) + self.assertIsNotNone(original_file_md5sum, + 'Failed to get md5sum of original file') + for number in range(1, 101): + symlink_md5sum = get_md5sum(self.mounts[0].client_system, + "{}/link{}".format( + self.mounts[0].mountpoint, number)) + self.assertEqual(original_file_md5sum.split(' ')[0], + symlink_md5sum.split(' ')[0], + "Original file and symlink checksum not equal" + " for link%s" % number) + g.log.info("Symlink and original file checksum same on all symlinks") diff --git a/tests/functional/dht/test_add_brick_replace_brick_fix_layout.py b/tests/functional/dht/test_add_brick_replace_brick_fix_layout.py index 66f39ff24..783ca1800 100644 --- a/tests/functional/dht/test_add_brick_replace_brick_fix_layout.py +++ b/tests/functional/dht/test_add_brick_replace_brick_fix_layout.py @@ -82,7 +82,7 @@ class TestAddBrickReplaceBrickFixLayout(GlusterBaseClass): 1. Create a volume, start it and mount it. 2. Create files and dirs on the mount point. 3. Add bricks to the volume. - 4. Replace 2 old brick to the volume. + 4. Replace 2 old bricks to the volume. 5. Trigger rebalance fix layout and wait for it to complete. 6. Check layout on all the bricks through trusted.glusterfs.dht. """ diff --git a/tests/functional/dht/test_rebalance_multiple_expansions.py b/tests/functional/dht/test_rebalance_multiple_expansions.py new file mode 100644 index 000000000..e96d88d56 --- /dev/null +++ b/tests/functional/dht/test_rebalance_multiple_expansions.py @@ -0,0 +1,100 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed', 'distributed-replicated'], + ['glusterfs']]) +class TestRebalanceMultipleExpansions(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_multiple_expansions(self): + """ + Test case: + 1. Create a volume, start it and mount it + 2. Create some file on mountpoint + 3. Collect arequal checksum on mount point pre-rebalance + 4. Do the following 3 times: + 5. Expand the volume + 6. Start rebalance and wait for it to finish + 7. Collect arequal checksum on mount point post-rebalance + and compare with value from step 3 + """ + + # Create some file on mountpoint + cmd = ("cd %s; for i in {1..500} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + for _ in range(3): + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on " + "volume %s" % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after + # rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_multiple_shrinks.py b/tests/functional/dht/test_rebalance_multiple_shrinks.py new file mode 100644 index 000000000..a95cdf141 --- /dev/null +++ b/tests/functional/dht/test_rebalance_multiple_shrinks.py @@ -0,0 +1,87 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import shrink_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed'], ['glusterfs']]) +class TestRebalanceMultipleShrinks(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 6 + self.volume['voltype']['dist_count'] = 6 + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_multiple_shrinks(self): + """ + Test case: + 1. Modify the distribution count of a volume + 2. Create a volume, start it and mount it + 3. Create some file on mountpoint + 4. Collect arequal checksum on mount point pre-rebalance + 5. Do the following 3 times: + 6. Shrink the volume + 7. Collect arequal checksum on mount point post-rebalance + and compare with value from step 4 + """ + + # Create some file on mountpoint + cmd = ("cd %s; for i in {1..500} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + for _ in range(3): + # Shrink volume + ret = shrink_volume(self.mnode, self.volname, + rebalance_timeout=16000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after + # rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_nested_dir.py b/tests/functional/dht/test_rebalance_nested_dir.py new file mode 100644 index 000000000..77f099ad3 --- /dev/null +++ b/tests/functional/dht/test_rebalance_nested_dir.py @@ -0,0 +1,99 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed', 'distributed-replicated'], + ['glusterfs']]) +class TestRebalanceNestedDir(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_nested_dir(self): + """ + Test case: + 1. Create a volume, start it and mount it + 2. On mount point, create a large nested dir structure with + files in the inner-most dir + 3. Collect arequal checksum on mount point pre-rebalance + 4. Expand the volume + 5. Start rebalance and wait for it to finish + 6. Collect arequal checksum on mount point post-rebalance + and compare wth value from step 3 + """ + + # create a large nested dir structure with files in the inner-most dir + cmd = ("cd %s; for i in {1..100} ; do mkdir $i; cd $i; done;" + "for j in {1..100} ; do " + "dd if=/dev/urandom of=file$j bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_peer_probe.py b/tests/functional/dht/test_rebalance_peer_probe.py new file mode 100644 index 000000000..7ffc9ca63 --- /dev/null +++ b/tests/functional/dht/test_rebalance_peer_probe.py @@ -0,0 +1,130 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal +from glustolibs.gluster.peer_ops import (peer_probe_servers, peer_detach) + + +@runs_on([['distributed'], ['glusterfs']]) +class TestRebalancePeerProbe(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + self.is_peer_detached = False + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Probe detached node in case it's still detached + if self.is_peer_detached: + if not peer_probe_servers(self.mnode, self.servers[5]): + raise ExecutionError("Failed to probe detached " + "servers %s" % self.servers) + g.log.info("Peer probe success for detached " + "servers %s", self.servers) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_peer_probe(self): + """ + Test case: + 1. Detach a peer + 2. Create a volume, start it and mount it + 3. Start creating a few files on mount point + 4. Collect arequal checksum on mount point pre-rebalance + 5. Expand the volume + 6. Start rebalance + 7. While rebalance is going, probe a peer and check if + the peer was probed successfully + 7. Collect arequal checksum on mount point post-rebalance + and compare wth value from step 4 + """ + + # Detach a peer + ret, _, _ = peer_detach(self.mnode, self.servers[5]) + self.assertEqual(ret, 0, "Failed to detach peer %s" + % self.servers[5]) + + self.is_peer_detached = True + + # Start I/O from mount point and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Let rebalance run for a while + sleep(5) + + # Add new node to the cluster + ret = peer_probe_servers(self.mnode, self.servers[5]) + self.assertTrue(ret, "Failed to peer probe server : %s" + % self.servers[5]) + g.log.info("Peer probe success for %s and all peers are in " + "connected state", self.servers[5]) + + self.is_peer_detached = False + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_preserve_user_permissions.py b/tests/functional/dht/test_rebalance_preserve_user_permissions.py index 6bffeb8d7..59327f329 100644 --- a/tests/functional/dht/test_rebalance_preserve_user_permissions.py +++ b/tests/functional/dht/test_rebalance_preserve_user_permissions.py @@ -25,7 +25,6 @@ from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.rebalance_ops import ( rebalance_start, - get_rebalance_status, wait_for_rebalance_to_complete) from glustolibs.gluster.volume_libs import ( expand_volume, @@ -40,9 +39,7 @@ from glustolibs.gluster.glusterfile import ( @runs_on([['distributed', 'distributed-replicated'], ['glusterfs']]) class TestRebalancePreserveUserPermissions(GlusterBaseClass): - def setUp(self): - self.get_super_method(self, 'setUp')() # Creating Volume and mounting the volume @@ -60,7 +57,6 @@ class TestRebalancePreserveUserPermissions(GlusterBaseClass): raise ExecutionError("Failed to add user") def tearDown(self): - ret = del_user(self.client, self.user) if not ret: raise ExecutionError("Failed to delete user") @@ -73,12 +69,45 @@ class TestRebalancePreserveUserPermissions(GlusterBaseClass): self.get_super_method(self, 'tearDown')() + def _start_rebalance_and_wait(self): + """Start rebalance and wait""" + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the volume %s", + self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + def _get_arequal_and_check_if_equal_to_before(self): + """Check if arequal checksum is equal or not""" + self.arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual( + self.arequal_checksum_before, self.arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") + + def _logged_vol_info(self): + """Log volume info and status""" + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume %s", self.volname)) + def _check_user_permission(self): """ Verify permissions on MP and file """ stat_mp_dict = get_file_stat(self.client, self.mountpoint) - self.assertEqual(stat_mp_dict['access'], '777', "Expected 777 " + self.assertIsNotNone(stat_mp_dict, "stat on %s failed" + % self.mountpoint) + self.assertEqual(stat_mp_dict['access'], '777', + "Expected 777 " "but found %s" % stat_mp_dict['access']) g.log.info("File permissions for mountpoint is 777 as expected") @@ -92,9 +121,9 @@ class TestRebalancePreserveUserPermissions(GlusterBaseClass): self.assertEqual(stat_dict['groupname'], self.user, "Expected %s but found %s" % (self.user, stat_dict['groupname'])) - g.log.info("User and Group are 'glusto_user' as expected") + g.log.info("User and Group are %s as expected", self.user) - def test_rebalance_preserve_user_permissions(self): + def _testcase(self, number_of_expands=1): """ Test case: 1. Create a volume start it and mount on the client. @@ -102,7 +131,7 @@ class TestRebalancePreserveUserPermissions(GlusterBaseClass): 3. Add new user to the client. 4. As the new user create dirs/files. 5. Compute arequal checksum and check permission on / and subdir. - 6. Add brick into the volume and start rebalance. + 6. expand cluster according to number_of_expands and start rebalance. 7. After rebalance is completed: 7.1 check arequal checksum 7.2 verfiy no change in / and sub dir permissions. @@ -126,57 +155,24 @@ class TestRebalancePreserveUserPermissions(GlusterBaseClass): # check permission on / and subdir self._check_user_permission() - # Log the volume info and status before rebalance - ret = log_volume_info_and_status(self.mnode, self.volname) - self.assertTrue(ret, ("Logging volume info and status failed on " - "volume %s", self.volname)) - - # Get arequal checksum before starting fix-layout - g.log.info("Getting arequal checksum before rebalance") - arequal_cksum_pre_rebalance = collect_mounts_arequal(self.mounts[0]) - - # Expand the volume - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) - g.log.info("Expanding volume is successful on " - "volume %s", self.volname) - - # Log the volume info after expanding volume. - ret = log_volume_info_and_status(self.mnode, self.volname) - self.assertTrue(ret, ("Logging volume info and status failed on " - "volume %s", self.volname)) + # get arequal checksum before expand + self.arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) - # Start Rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " - "%s", self.volname)) - g.log.info("Successfully started rebalance on the volume %s", - self.volname) + self._logged_vol_info() - # Check rebalance is in progress - rebalance_status = get_rebalance_status(self.mnode, self.volname) - ret = rebalance_status['aggregate']['statusStr'] - self.assertEqual(ret, "in progress", ("Rebalance is not in " - "'in progress' state, either " - "rebalance is in completed state" - " or failed to get rebalance " - "status")) - g.log.info("Rebalance is in 'in progress' state") + # expand the volume + for i in range(number_of_expands): + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand iter %d volume %s", + i, self.volname)) - # Wait for rebalance to complete - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, ("Rebalance is not yet complete on the volume " - "%s", self.volname)) - g.log.info("Rebalance is successfully complete on the volume %s", - self.volname) + self._logged_vol_info() + # Start Rebalance and wait for completion + self._start_rebalance_and_wait() - # Compare arequals checksum pre/post rebalance - arequal_cksum_post_rebalance = collect_mounts_arequal(self.mounts[0]) - self.assertEqual(arequal_cksum_pre_rebalance, - arequal_cksum_post_rebalance, - "arequal checksum is NOT MATCHNG") - g.log.info("arequal checksum is SAME") + # compare arequals checksum before and after rebalance + self._get_arequal_and_check_if_equal_to_before() # permissions check on / and sub dir self._check_user_permission() @@ -190,3 +186,9 @@ class TestRebalancePreserveUserPermissions(GlusterBaseClass): self.assertEqual(ret, 0, ("User %s failed to create files", self.user)) g.log.info("IO as %s is successful", self.user) + + def test_rebalance_preserve_user_permissions(self): + self._testcase() + + def test_rebalance_preserve_user_permissions_multi_expands(self): + self._testcase(2) diff --git a/tests/functional/dht/test_rebalance_two_volumes.py b/tests/functional/dht/test_rebalance_two_volumes.py new file mode 100644 index 000000000..c96f75586 --- /dev/null +++ b/tests/functional/dht/test_rebalance_two_volumes.py @@ -0,0 +1,163 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal +from glustolibs.gluster.mount_ops import mount_volume +from glustolibs.gluster.volume_ops import (volume_create, volume_start, + volume_stop, volume_delete) +from glustolibs.gluster.lib_utils import form_bricks_list + + +@runs_on([['distributed', 'distributed-replicated'], ['glusterfs']]) +class TestRebalanceTwoVolumes(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + self.second_vol_name = "second_volume" + self.second_mountpoint = "/mnt/{}".format(self.second_vol_name) + self.is_second_volume_created = False + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + if self.is_second_volume_created: + # Stop the 2nd volume + ret, _, _ = volume_stop(self.mnode, self.second_vol_name) + self.assertEqual(ret, 0, ("volume stop failed for %s" + % self.second_vol_name)) + g.log.info("Volume %s stopped", self.second_vol_name) + + # Delete the 2nd volume + ret = volume_delete(self.mnode, self.second_vol_name) + self.assertTrue(ret, ("Failed to cleanup the Volume " + "%s", self.second_vol_name)) + g.log.info("Volume deleted successfully : %s", + self.second_vol_name) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_two_volumes(self): + """ + Test case: + 1. Create a volume, start it and mount it + 2. Create a 2nd volume, start it and mount it + 3. Create files on mount points + 4. Collect arequal checksum on mount point pre-rebalance + 5. Expand the volumes + 6. Start rebalance simultaneously on the 2 volumes + 7. Wait for rebalance to complete + 8. Collect arequal checksum on mount point post-rebalance + and compare with value from step 4 + """ + + # Get brick list + bricks_list = form_bricks_list(self.mnode, self.volname, 3, + self.servers, self.all_servers_info) + self.assertIsNotNone(bricks_list, "Bricks list is None") + + # Create 2nd volume + ret, _, _ = volume_create(self.mnode, self.second_vol_name, + bricks_list) + self.assertEqual(ret, 0, ("Failed to create volume %s") % ( + self.second_vol_name)) + g.log.info("Volume %s created successfully", self.second_vol_name) + + # Start 2nd volume + ret, _, _ = volume_start(self.mnode, self.second_vol_name) + self.assertEqual(ret, 0, ("Failed to start volume %s") % ( + self.second_vol_name)) + g.log.info("Started volume %s", self.second_vol_name) + + self.is_second_volume_created = True + + # Mount 2nd volume + for mount_obj in self.mounts: + ret, _, _ = mount_volume(self.second_vol_name, + mtype=self.mount_type, + mpoint=self.second_mountpoint, + mserver=self.mnode, + mclient=mount_obj.client_system) + self.assertEqual(ret, 0, ("Failed to mount volume %s") % ( + self.second_vol_name)) + g.log.info("Volume mounted successfully : %s", + self.second_vol_name) + + # Start I/O from mount point for volume 1 and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Start I/O from mount point for volume 2 and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.second_mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.second_vol_name) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add bricks to volumes + for volume in (self.volname, self.second_vol_name): + ret = expand_volume(self.mnode, volume, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % volume) + + # Trigger rebalance + for volume in (self.volname, self.second_vol_name): + ret, _, _ = rebalance_start(self.mnode, volume, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the" + " volume %s" % volume) + + # Wait for rebalance to complete + for volume in (self.volname, self.second_vol_name): + ret = wait_for_rebalance_to_complete(self.mnode, volume, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume" + " %s" % volume) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_with_acl_set_to_files.py b/tests/functional/dht/test_rebalance_with_acl_set_to_files.py new file mode 100644 index 000000000..d290ae56a --- /dev/null +++ b/tests/functional/dht/test_rebalance_with_acl_set_to_files.py @@ -0,0 +1,129 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterfile import set_acl, get_acl +from glustolibs.gluster.lib_utils import add_user, del_user +from glustolibs.gluster.mount_ops import mount_volume +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'distributed-arbiter', 'distributed', + 'replicated', 'arbiter', 'distributed-dispersed', + 'dispersed'], ['glusterfs']]) +class TestRebalanceWithAclSetToFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume(): + raise ExecutionError("Failed to Setup volume") + + self.first_client = self.mounts[0].client_system + self.mount_point = self.mounts[0].mountpoint + + # Mount volume with -o acl option + ret, _, _ = mount_volume(self.volname, self.mount_type, + self.mount_point, self.mnode, + self.first_client, options='acl') + if ret: + raise ExecutionError("Failed to mount volume") + + # Create a non-root user + if not add_user(self.first_client, 'joker'): + raise ExecutionError("Failed to create user joker") + + def tearDown(self): + + # Remove non-root user created for test + if not del_user(self.first_client, 'joker'): + raise ExecutionError("Failed to remove user joker") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_acl_set_to_files(self): + """Check acl values set to files""" + for number in range(1, 11): + ret = get_acl(self.first_client, self.mount_point, + 'file{}'.format(str(number))) + self.assertIn('user:joker:rwx', ret['rules'], + "Rule not present in getfacl output") + + def test_add_brick_rebalance_with_acl_set_to_files(self): + """ + Test case: + 1. Create a volume, start it and mount it to a client. + 2. Create 10 files on the mount point and set acls on the files. + 3. Check the acl value and collect arequal-checksum. + 4. Add bricks to the volume and start rebalance. + 5. Check the value of acl(it should be same as step 3), + collect and compare arequal-checksum with the one collected + in step 3 + """ + # Create 10 files on the mount point. + cmd = ("cd {}; for i in `seq 1 10`;do touch file$i;done" + .format(self.mount_point)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files on mount point") + + for number in range(1, 11): + ret = set_acl(self.first_client, 'u:joker:rwx', '{}/file{}' + .format(self.mount_point, str(number))) + self.assertTrue(ret, "Failed to set acl on files") + + # Collect arequal on mount point and check acl value + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + self._check_acl_set_to_files() + g.log.info("Files created and acl set to files properly") + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Check acl value if it's same as before rebalance + self._check_acl_set_to_files() + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum and acl value are SAME") diff --git a/tests/functional/dht/test_time_taken_for_ls.py b/tests/functional/dht/test_time_taken_for_ls.py new file mode 100644 index 000000000..7c9653999 --- /dev/null +++ b/tests/functional/dht/test_time_taken_for_ls.py @@ -0,0 +1,105 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed'], ['glusterfs']]) +class TestTimeForls(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup and mount volume") + + self.is_io_running = False + + def tearDown(self): + + if self.is_io_running: + self._validate_io() + + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _validate_io(self): + """Validare I/O threads running on mount point""" + io_success = [] + for proc in self.proc_list: + try: + ret, _, _ = proc.async_communicate() + if ret: + io_success.append(False) + break + io_success.append(True) + except ValueError: + io_success.append(True) + return all(io_success) + + def test_time_taken_for_ls(self): + """ + Test case: + 1. Create a volume of type distributed-replicated or + distributed-arbiter or distributed-dispersed and start it. + 2. Mount the volume to clients and create 2000 directories + and 10 files inside each directory. + 3. Wait for I/O to complete on mount point and perform ls + (ls should complete within 10 seconds). + """ + # Creating 2000 directories on the mount point + ret, _, _ = g.run(self.mounts[0].client_system, + "cd %s; for i in {1..2000};do mkdir dir$i;done" + % self.mounts[0].mountpoint) + self.assertFalse(ret, 'Failed to create 2000 dirs on mount point') + + # Create 5000 files inside each directory + dirs = ('{1..100}', '{101..200}', '{201..300}', '{301..400}', + '{401..500}', '{501..600}', '{601..700}', '{701..800}', + '{801..900}', '{901..1000}', '{1001..1100}', '{1101..1200}', + '{1201..1300}', '{1301..1400}', '{1401..1500}', '{1501..1600}', + '{1801..1900}', '{1901..2000}') + self.proc_list, counter = [], 0 + while counter < 18: + for mount_obj in self.mounts: + ret = g.run_async(mount_obj.client_system, + "cd %s;for i in %s;do " + "touch dir$i/file{1..10};done" + % (mount_obj.mountpoint, dirs[counter])) + self.proc_list.append(ret) + counter += 1 + self.is_io_running = True + + # Check if I/O is successful or not + ret = self._validate_io() + self.assertTrue(ret, "Failed to create Files and dirs on mount point") + self.is_io_running = False + g.log.info("Successfully created files and dirs needed for the test") + + # Run ls on mount point which should get completed within 10 seconds + ret, _, _ = g.run(self.mounts[0].client_system, + "cd %s; timeout 10 ls" + % self.mounts[0].mountpoint) + self.assertFalse(ret, '1s taking more than 10 seconds') + g.log.info("ls completed in under 10 seconds") diff --git a/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py b/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py new file mode 100644 index 000000000..f6228c122 --- /dev/null +++ b/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py @@ -0,0 +1,134 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterfile import set_file_permissions +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online) + + +@runs_on([['distributed', 'distributed-replicated', 'distributed-dispersed', + 'distributed-arbiter'], + ['glusterfs']]) +class TestVerifyPermissionChanges(GlusterBaseClass): + def setUp(self): + """ + Setup and mount volume + """ + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Failed to Setup and Mount Volume") + + def _set_root_dir_permission(self, permission): + """ Sets the root dir permission to the given value""" + m_point = self.mounts[0].mountpoint + ret = set_file_permissions(self.clients[0], m_point, permission) + self.assertTrue(ret, "Failed to set root dir permissions") + + def _get_dir_permissions(self, host, directory): + """ Returns dir permissions""" + cmd = 'stat -c "%a" {}'.format(directory) + ret, out, _ = g.run(host, cmd) + self.assertEqual(ret, 0, "Failed to get permission on {}".format(host)) + return out.strip() + + def _get_root_dir_permission(self, expected=None): + """ Returns the root dir permission """ + permission = self._get_dir_permissions(self.mounts[0].client_system, + self.mounts[0].mountpoint) + if not expected: + return permission.strip() + self.assertEqual(permission, expected, "The permissions doesn't match") + return True + + def _bring_a_brick_offline(self): + """ Brings down a brick from the volume""" + brick_to_kill = get_all_bricks(self.mnode, self.volname)[-1] + ret = bring_bricks_offline(self.volname, brick_to_kill) + self.assertTrue(ret, "Failed to bring brick offline") + return brick_to_kill + + def _bring_back_brick_online(self, brick): + """ Brings back down brick from the volume""" + ret = bring_bricks_online(self.mnode, self.volname, brick) + self.assertTrue(ret, "Failed to bring brick online") + + def _verify_mount_dir_and_brick_dir_permissions(self, expected, + down_brick=None): + """ Verifies the mount directory and brick dir permissions are same""" + # Get root dir permission and verify + self._get_root_dir_permission(expected) + + # Verify brick dir permission + brick_list = get_all_bricks(self.mnode, self.volname) + for brick in brick_list: + brick_node, brick_path = brick.split(":") + if down_brick and down_brick.split(":")[-1] != brick_path: + actual_perm = self._get_dir_permissions(brick_node, + brick_path) + self.assertEqual(actual_perm, expected, + "The permissions are not same") + + def test_verify_root_dir_permission_changes(self): + """ + 1. create pure dist volume + 2. mount on client + 3. Checked default permission (should be 755) + 4. Change the permission to 444 and verify + 5. Kill a brick + 6. Change root permission to 755 + 7. Verify permission changes on all bricks, except down brick + 8. Bring back the brick and verify the changes are reflected + """ + + # Verify the default permission on root dir is 755 + self._verify_mount_dir_and_brick_dir_permissions("755") + + # Change root permission to 444 + self._set_root_dir_permission("444") + + # Verify the changes were successful + self._verify_mount_dir_and_brick_dir_permissions("444") + + # Kill a brick + offline_brick = self._bring_a_brick_offline() + + # Change root permission to 755 + self._set_root_dir_permission("755") + + # Verify the permission changed to 755 on mount and brick dirs + self._verify_mount_dir_and_brick_dir_permissions("755", offline_brick) + + # Bring brick online + self._bring_back_brick_online(offline_brick) + + # Verify the permission changed to 755 on mount and brick dirs + self._verify_mount_dir_and_brick_dir_permissions("755") + + def tearDown(self): + # Unmount and cleanup original volume + if not self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py b/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py new file mode 100755 index 000000000..dd5f3b6da --- /dev/null +++ b/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py @@ -0,0 +1,135 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import time +import itertools +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class EcVerifyLock(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + cls.script = "/usr/share/glustolibs/io/scripts/file_lock.py" + if not upload_scripts(cls.clients, [cls.script]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Setup Volume and Mount Volume + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def test_verify_lock_granted_from_2_clients(self): + """ + - Create disperse volume and mount it to 2 clients` + - Create file from 1 client on mount point + - Take lock from client 1 => Lock is acquired + - Try taking lock from client 2=> Lock is blocked (as already + being taken by client 1) + - Release lock from client1=> Lock is released + - Take lock from client2 + - Again try taking lock from client 1 + - verify test with once, by disabling eagerlock and other eager lock + and once by leaving eager and other eagerlock enabled(by default) + """ + mpoint = self.mounts[0].mountpoint + + # Create a file on client 1 + cmd = "touch {}/test_file".format(mpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on client 1") + + # Verifying OCL as ON + option = "optimistic-change-log" + option_dict = get_volume_options(self.mnode, self.volname, option) + self.assertIsNotNone(option_dict, ("Failed to get %s volume option" + " for volume %s" + % (option, self.volname))) + self.assertEqual(option_dict['disperse.optimistic-change-log'], 'on', + ("%s is not ON for volume %s" % (option, + self.volname))) + g.log.info("Succesfully verified %s value for volume %s", + option, self.volname) + + # Repeat the test with eager-lock and other-eager-lock 'on' & 'off' + for lock_status in ('on', 'off'): + options = {'disperse.eager-lock': lock_status, + 'disperse.other-eager-lock': lock_status} + ret = set_volume_options(self.mnode, self.volname, options) + + self.assertTrue(ret, ("failed to set eagerlock and other " + "eagerlock value as %s " % lock_status)) + g.log.info("Successfully set eagerlock and other eagerlock value" + " to %s", lock_status) + + # Repeat the test for both the combinations of clients + for client_1, client_2 in list(itertools.permutations( + [self.mounts[0].client_system, + self.mounts[1].client_system], r=2)): + # Get lock to file from one client + lock_cmd = ("/usr/bin/env python {} -f {}/" + "test_file -t 30".format(self.script, mpoint)) + proc = g.run_async(client_1, lock_cmd) + time.sleep(5) + + # As the lock is been acquired by one client, + # try to get lock from the other + ret, _, _ = g.run(client_2, lock_cmd) + self.assertEqual(ret, 1, ("Unexpected: {} acquired the lock " + "before been released by {}" + .format(client_2, client_1))) + g.log.info("Expected : Lock can't be acquired by %s before " + "being released by %s", client_2, client_1) + + # Wait for first client to release the lock. + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, ("File lock process failed on %s:%s", + client_1, mpoint)) + + # Try taking the lock from other client and releasing it + lock_cmd = ("/usr/bin/env python {} -f " + "{}/test_file -t 1".format(self.script, mpoint)) + ret, _, _ = g.run(client_2, lock_cmd) + self.assertEqual(ret, 0, + ("Unexpected:{} Can't acquire the lock even " + "after its been released by {}" + .format(client_2, client_1))) + g.log.info("Successful, Lock acquired by %s after being " + "released by %s", client_2, client_1) + + def tearDown(self): + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup " + "Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py b/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py index 2cb21227b..0e0a58842 100644 --- a/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py +++ b/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py @@ -20,6 +20,7 @@ from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import setup_volume from glustolibs.gluster.volume_ops import (set_volume_options, + volume_reset, get_volume_status) from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd, is_glusterd_running) @@ -62,13 +63,12 @@ class TestAddBrickWhenQuorumNotMet(GlusterBaseClass): % self.volname) g.log.info("Volume deleted successfully : %s", self.volname) - # Setting quorum ratio to 51% - ret = set_volume_options(self.mnode, 'all', - {'cluster.server-quorum-ratio': '51%'}) + # Reset Cluster options + ret = volume_reset(self.mnode, 'all') if not ret: - raise ExecutionError("Failed to set server quorum ratio on %s" + raise ExecutionError("Failed to reset cluster options on %s" % self.volname) - g.log.info("Able to set server quorum ratio successfully on %s", + g.log.info("Cluster options reset successfully on %s", self.servers) self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_default_max_bricks_per_process.py b/tests/functional/glusterd/test_default_max_bricks_per_process.py new file mode 100644 index 000000000..b20c1bccd --- /dev/null +++ b/tests/functional/glusterd/test_default_max_bricks_per_process.py @@ -0,0 +1,100 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Default max bricks per-process should be 250 +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import (get_volume_options, + reset_volume_option, + set_volume_options) + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestDefaultMaxBricksPerProcess(GlusterBaseClass): + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" + % self.volname) + g.log.info("Volume created successfully : %s", self.volname) + + def tearDown(self): + # Cleaning up the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup the volume %s" + % self.volname) + g.log.info("Volume deleted successfully: %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_default_max_bricks_per_process(self): + """ + Test Case: + 1) Create a volume and start it. + 2) Fetch the max bricks per process value + 3) Reset the volume options + 4) Fetch the max bricks per process value + 5) Compare the value fetched in last step with the initial value + 6) Enable brick-multiplexing in the cluster + 7) Fetch the max bricks per process value + 8) Compare the value fetched in last step with the initial value + """ + # Fetch the max bricks per process value + ret = get_volume_options(self.mnode, 'all') + self.assertIsNotNone(ret, "Failed to execute the volume get command") + initial_value = ret['cluster.max-bricks-per-process'] + g.log.info("Successfully fetched the max bricks per-process value") + + # Reset the volume options + ret, _, _ = reset_volume_option(self.mnode, 'all', 'all') + self.assertEqual(ret, 0, "Failed to reset the volumes") + g.log.info("Volumes reset was successful") + + # Fetch the max bricks per process value + ret = get_volume_options(self.mnode, 'all') + self.assertIsNotNone(ret, "Failed to execute the volume get command") + + # Comparing the values + second_value = ret['cluster.max-bricks-per-process'] + self.assertEqual(initial_value, second_value, "Unexpected: Max" + " bricks per-process value is not equal") + + # Enable brick-multiplex in the cluster + ret = set_volume_options(self.mnode, 'all', + {'cluster.brick-multiplex': 'enable'}) + self.assertTrue(ret, "Failed to enable brick-multiplex" + " for the cluster") + g.log.info("Successfully enabled brick-multiplex in the cluster") + + # Fetch the max bricks per process value + ret = get_volume_options(self.mnode, 'all') + self.assertIsNotNone(ret, "Failed to execute the volume get command") + + # Comparing the values + third_value = ret['cluster.max-bricks-per-process'] + self.assertEqual(initial_value, third_value, "Unexpected: Max bricks" + " per-process value is not equal") diff --git a/tests/functional/glusterd/test_default_ping_timer_and_epoll_thread_count.py b/tests/functional/glusterd/test_default_ping_timer_and_epoll_thread_count.py index 4127213b0..4ffe047d3 100644 --- a/tests/functional/glusterd/test_default_ping_timer_and_epoll_thread_count.py +++ b/tests/functional/glusterd/test_default_ping_timer_and_epoll_thread_count.py @@ -57,16 +57,16 @@ class TestPingTimerAndEpollThreadCountDefaultValue(GlusterBaseClass): # Shell Script to be run for epoll thread count script = """ - #!/bin/bash - function nepoll () - { - local pid=$1; - for i in $(ls /proc/$pid/task); - do - cat /proc/$pid/task/$i/stack | grep epoll_wait; - done - } - """ + #!/bin/bash + function nepoll () + { + local pid=$1; + for i in $(ls /proc/$pid/task); + do + cat /proc/$pid/task/$i/stack | grep -i 'sys_epoll_wait'; + done + } + """ # Execute the shell script cmd = "echo '{}' > test.sh;".format(script) diff --git a/tests/functional/glusterd/test_gluster_detect_drop_of_outbound_traffic.py b/tests/functional/glusterd/test_gluster_detect_drop_of_outbound_traffic.py new file mode 100644 index 000000000..1a45d5c82 --- /dev/null +++ b/tests/functional/glusterd/test_gluster_detect_drop_of_outbound_traffic.py @@ -0,0 +1,115 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Gluster should detect drop of outbound traffic as network failure +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.peer_ops import nodes_from_pool_list, get_peer_status +from glustolibs.gluster.volume_ops import volume_status + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestGlusterDetectDropOfOutboundTrafficAsNetworkFailure(GlusterBaseClass): + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" + % self.volname) + g.log.info("Volume created successfully : %s", self.volname) + + def tearDown(self): + # Removing the status_err file and the iptable rule,if set previously + if self.iptablerule_set: + cmd = "iptables -D OUTPUT -p tcp -m tcp --dport 24007 -j DROP" + ret, _, _ = g.run(self.servers[1], cmd) + if ret: + raise ExecutionError("Failed to remove the iptable rule" + " for glusterd") + + # Cleaning up the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup the volume %s" + % self.volname) + g.log.info("Volume deleted successfully: %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_gluster_detect_drop_of_out_traffic_as_network_failure(self): + """ + Test Case: + 1) Create a volume and start it. + 2) Add an iptable rule to drop outbound glusterd traffic + 3) Check if the rule is added in iptables list + 4) Execute few Gluster CLI commands like volume status, peer status + 5) Gluster CLI commands should fail with suitable error message + """ + # Set iptablerule_set as false initially + self.iptablerule_set = False + + # Set iptable rule on one node to drop outbound glusterd traffic + cmd = "iptables -I OUTPUT -p tcp --dport 24007 -j DROP" + ret, _, _ = g.run(self.servers[1], cmd) + self.assertEqual(ret, 0, "Failed to set iptable rule on the node: %s" + % self.servers[1]) + g.log.info("Successfully added the rule to iptable") + + # Update iptablerule_set to true + self.iptablerule_set = True + + # Confirm if the iptable rule was added successfully + iptable_rule = "'OUTPUT -p tcp -m tcp --dport 24007 -j DROP'" + cmd = "iptables -S OUTPUT | grep %s" % iptable_rule + ret, _, _ = g.run(self.servers[1], cmd) + self.assertEqual(ret, 0, "Failed to get the rule from iptable") + + # Fetch number of nodes in the pool, except localhost + pool_list = nodes_from_pool_list(self.mnode) + peers_count = len(pool_list) - 1 + + # Gluster CLI commands should fail + # Check volume status command + ret, _, err = volume_status(self.servers[1]) + self.assertEqual(ret, 2, "Unexpected: gluster volume status command" + " did not return any error") + + status_err_count = err.count("Staging failed on") + self.assertEqual(status_err_count, peers_count, "Unexpected: No. of" + " nodes on which vol status cmd failed is not equal" + " to peers_count value") + g.log.info("Volume status command failed with expected error message") + + # Check peer status command and all peers are in 'Disconnected' state + peer_list = get_peer_status(self.servers[1]) + + for peer in peer_list: + self.assertEqual(int(peer["connected"]), 0, "Unexpected: All" + " the peers are not in 'Disconnected' state") + self.assertEqual(peer["stateStr"], "Peer in Cluster", "Unexpected:" + " All the peers not in 'Peer in Cluster' state") + + g.log.info("Peer status command listed all the peers in the" + "expected state") diff --git a/tests/functional/glusterd/test_gluster_volume_status_xml_dump.py b/tests/functional/glusterd/test_gluster_volume_status_xml_dump.py new file mode 100644 index 000000000..eacc0b3c5 --- /dev/null +++ b/tests/functional/glusterd/test_gluster_volume_status_xml_dump.py @@ -0,0 +1,106 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test Default volume behavior and quorum options +""" +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.volume_libs import cleanup_volume +from glustolibs.gluster.volume_ops import ( + volume_stop, get_volume_status, + volume_create, volume_start +) + + +@runs_on([['distributed-arbiter'], + ['glusterfs']]) +class GetVolumeStatusXmlDump(GlusterBaseClass): + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Fetching all the parameters for volume_create + list_of_three_servers = [] + server_info_for_three_nodes = {} + + for server in self.servers[0:3]: + list_of_three_servers.append(server) + server_info_for_three_nodes[server] = self.all_servers_info[ + server] + + bricks_list = form_bricks_list( + self.mnode, self.volname, 3, list_of_three_servers, + server_info_for_three_nodes) + # Creating 2nd volume + self.volname_2 = "test_volume" + ret, _, _ = volume_create(self.mnode, self.volname_2, + bricks_list) + self.assertFalse(ret, "Volume creation failed") + g.log.info("Volume %s created successfully", self.volname_2) + ret, _, _ = volume_start(self.mnode, self.volname_2) + if ret: + raise ExecutionError( + "Failed to start volume {}".format(self.volname_2)) + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume and mount it") + + def test_gluster_volume_status_xml_dump(self): + """ + Setps: + 1. stop one of the volume + (i.e) gluster volume stop <vol-name> + 2. Get the status of the volumes with --xml dump + XML dump should be consistent + """ + ret, _, _ = volume_stop(self.mnode, volname=self.volname_2, + force=True) + self.assertFalse(ret, + "Failed to stop volume '{}'".format( + self.volname_2)) + out = get_volume_status(self.mnode) + self.assertIsNotNone( + out, "Failed to get volume status on {}".format(self.mnode)) + for _ in range(4): + sleep(2) + out1 = get_volume_status(self.mnode) + self.assertIsNotNone( + out1, "Failed to get volume status on {}".format( + self.mnode)) + self.assertEqual(out1, out) + + def tearDown(self): + """tear Down Callback""" + ret = cleanup_volume(self.mnode, self.volname_2) + if not ret: + raise ExecutionError( + "Failed to remove volume '{}'".format(self.volname_2)) + # Unmount volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount and Cleanup volume") + g.log.info("Successful in unmount and cleanup operations") + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_default_volume_behavior_quorum_options.py b/tests/functional/glusterd/test_glusterd_default_volume_behavior_quorum_options.py index 71a47064f..b2652a4ea 100644 --- a/tests/functional/glusterd/test_glusterd_default_volume_behavior_quorum_options.py +++ b/tests/functional/glusterd/test_glusterd_default_volume_behavior_quorum_options.py @@ -22,7 +22,9 @@ Description: from glusto.core import Glusto as g from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on -from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.gluster.volume_ops import ( + get_volume_options, + volume_reset) from glustolibs.gluster.gluster_init import ( stop_glusterd, start_glusterd, @@ -30,6 +32,7 @@ from glustolibs.gluster.gluster_init import ( wait_for_glusterd_to_start) from glustolibs.gluster.brick_libs import get_all_bricks from glustolibs.gluster.brickmux_ops import get_brick_processes_count +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect @runs_on([['replicated', 'arbiter', 'dispersed', 'distributed', @@ -55,7 +58,8 @@ class TestGlusterDDefaultVolumeBehaviorQuorumOptions(GlusterBaseClass): else: ret = get_volume_options(self.mnode, 'all', option_name) self.assertIsNotNone(ret, "The %s option is not present" % option_name) - self.assertEqual(ret[option_name], option_value, + value = (ret[option_name]).split() + self.assertEqual(value[0], option_value, ("Volume option for %s is not equal to %s" % (option_name, option_value))) g.log.info("Volume option %s is equal to the expected value %s", @@ -81,10 +85,10 @@ class TestGlusterDDefaultVolumeBehaviorQuorumOptions(GlusterBaseClass): 4. There shouldn't be any effect to the running glusterfsd processes. """ - # Check that quorum options are not set by default. + # Check the default quorum options are correct. self._validate_vol_options('cluster.server-quorum-type', 'off') self._validate_vol_options('cluster.server-quorum-ratio', - '51 (DEFAULT)', True) + '51', True) # Get the count of number of glusterfsd processes running. count_before_glusterd_kill = self._get_total_brick_processes_count() @@ -119,11 +123,22 @@ class TestGlusterDDefaultVolumeBehaviorQuorumOptions(GlusterBaseClass): def tearDown(self): """tear Down Callback""" + # Wait for peers to connect. + ret = wait_for_peers_to_connect(self.mnode, self.servers, 50) + if not ret: + raise ExecutionError("Peers are not in connected state.") + # Unmount volume and cleanup. ret = self.cleanup_volume() if not ret: raise ExecutionError("Failed to Unmount and Cleanup volume") g.log.info("Successful in unmount and cleanup operations") + # Reset the cluster options. + ret = volume_reset(self.mnode, "all") + if not ret: + raise ExecutionError("Failed to Reset the cluster options.") + g.log.info("Successfully reset cluster options.") + # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_gluster_process_stop_start_cycle.py b/tests/functional/glusterd/test_glusterd_gluster_process_stop_start_cycle.py new file mode 100644 index 000000000..3eb3518d2 --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_gluster_process_stop_start_cycle.py @@ -0,0 +1,123 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Checking gluster processes stop and start cycle. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_libs import ( + cleanup_volume, + wait_for_volume_process_to_be_online, + setup_volume) +from glustolibs.gluster.gluster_init import ( + start_glusterd, + wait_for_glusterd_to_start) +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect + + +@runs_on([['distributed', 'replicated', 'arbiter', 'dispersed', + 'distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed'], ['glusterfs']]) +class TestGlusterdStartStopCycle(GlusterBaseClass): + """ Testing Glusterd stop and start cycle """ + + def _wait_for_gluster_process_online_state(self): + """ + Function which waits for the glusterfs processes to come up + """ + # Wait for glusterd to be online and validate it's running. + self.assertTrue(wait_for_glusterd_to_start(self.servers), + "glusterd not up on the desired nodes.") + g.log.info("Glusterd is up and running on desired nodes.") + + # Wait for peers to connect + ret = wait_for_peers_to_connect(self.mnode, self.servers, 50) + self.assertTrue(ret, "Peers not in connected state.") + g.log.info("Peers in connected state.") + + # Wait for all volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, + self.volname, + timeout=600) + self.assertTrue(ret, ("All volume processes not up.")) + g.log.info("All volume processes are up.") + + def test_glusterd_start_stop_cycle(self): + """ + Test Glusterd stop-start cycle of gluster processes. + 1. Create a gluster volume. + 2. Kill all gluster related processes. + 3. Start glusterd service. + 4. Verify that all gluster processes are up. + 5. Repeat the above steps 5 times. + """ + # Create and start a volume + ret = setup_volume(self.mnode, self.all_servers_info, self.volume) + self.assertTrue(ret, "Failed to create and start volume") + + for _ in range(5): + killed_gluster_process_count = [] + # Kill gluster processes in all servers + for server in self.servers: + cmd = ('pkill --signal 9 -c -e "(glusterd|glusterfsd|glusterfs' + ')"|tail -1') + ret, out, err = g.run(server, cmd) + self.assertEqual(ret, 0, err) + killed_gluster_process_count.append(int(out)) + + # Start glusterd on all servers. + ret = start_glusterd(self.servers) + self.assertTrue(ret, ("Failed to restart glusterd on desired" + " nodes.")) + g.log.info("Glusterd started on desired nodes.") + + # Wait for gluster processes to come up. + self._wait_for_gluster_process_online_state() + + spawned_gluster_process_count = [] + # Get number of gluster processes spawned in all server + for server in self.servers: + cmd = ('pgrep -c "(glusterd|glusterfsd|glusterfs)"') + ret, out, err = g.run(server, cmd) + self.assertEqual(ret, 0, err) + spawned_gluster_process_count.append(int(out)) + + # Compare process count in each server. + for index, server in enumerate(self.servers): + self.assertEqual(killed_gluster_process_count[index], + spawned_gluster_process_count[index], + ("All processes not up and running on %s", + server)) + + def tearDown(self): + """ tear Down Callback """ + # Wait for peers to connect + ret = wait_for_peers_to_connect(self.mnode, self.servers, 50) + if not ret: + raise ExecutionError("Peers are not in connected state.") + + # Cleanup the volume + ret = cleanup_volume(self.mnode, self.volname) + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Successfully cleaned up the volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py new file mode 100644 index 000000000..92c48da6f --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py @@ -0,0 +1,207 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Increase in glusterd memory consumption on repetetive operations + for 100 volumes +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_ops import (volume_stop, volume_delete, + get_volume_list, + volume_start) +from glustolibs.gluster.gluster_init import (restart_glusterd, + wait_for_glusterd_to_start) +from glustolibs.gluster.volume_libs import (bulk_volume_creation, + cleanup_volume) +from glustolibs.gluster.volume_ops import set_volume_options + + +class TestGlusterMemoryConsumptionIncrease(GlusterBaseClass): + def tearDown(self): + # Clean up all volumes + if self.volume_present: + vol_list = get_volume_list(self.mnode) + if vol_list is None: + raise ExecutionError("Failed to get the volume list") + + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Unable to delete volume %s" % volume) + g.log.info("Volume deleted successfully : %s", volume) + + # Disable multiplex + ret = set_volume_options(self.mnode, 'all', + {'cluster.brick-multiplex': 'disable'}) + self.assertTrue(ret, "Failed to enable brick-multiplex" + " for the cluster") + + # Calling baseclass tearDown method + self.get_super_method(self, 'tearDown')() + + def _volume_operations_in_loop(self): + """ Create, start, stop and delete 100 volumes in a loop """ + # Create and start 100 volumes in a loop + self.volume_config = { + 'name': 'volume-', + 'servers': self.servers, + 'voltype': {'type': 'distributed-replicated', + 'dist_count': 2, + 'replica_count': 3}, + } + + ret = bulk_volume_creation(self.mnode, 100, self.all_servers_info, + self.volume_config, "", False, True) + self.assertTrue(ret, "Failed to create volumes") + + self.volume_present = True + + g.log.info("Successfully created all the volumes") + + # Start 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start volume: %s" + % self.volname) + + g.log.info("Successfully started all the volumes") + + # Stop 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret, _, _ = volume_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop volume: %s" + % self.volname) + + g.log.info("Successfully stopped all the volumes") + + # Delete 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret = volume_delete(self.mnode, self.volname) + self.assertTrue(ret, "Failed to delete volume: %s" + % self.volname) + + self.volume_present = False + + g.log.info("Successfully deleted all the volumes") + + def _memory_consumption_for_all_nodes(self, pid_list): + """Fetch the memory consumption by glusterd process for + all the nodes + """ + memory_consumed_list = [] + for i, server in enumerate(self.servers): + # Get the memory consumption of glusterd in each node + cmd = "top -b -n 1 -p %d | awk 'FNR==8 {print $6}'" % pid_list[i] + ret, mem, _ = g.run(server, cmd) + self.assertEqual(ret, 0, "Failed to get the memory usage of" + " glusterd process") + mem = int(mem)//1024 + memory_consumed_list.append(mem) + + return memory_consumed_list + + def test_glusterd_memory_consumption_increase(self): + """ + Test Case: + 1) Enable brick-multiplex and set max-bricks-per-process to 3 in + the cluster + 2) Get the glusterd memory consumption + 3) Perform create,start,stop,delete operation for 100 volumes + 4) Check glusterd memory consumption, it should not increase by + more than 50MB + 5) Repeat steps 3-4 for two more time + 6) Check glusterd memory consumption it should not increase by + more than 10MB + """ + # pylint: disable=too-many-locals + # Restarting glusterd to refresh its memory consumption + ret = restart_glusterd(self.servers) + self.assertTrue(ret, "Restarting glusterd failed") + + # check if glusterd is running post reboot + ret = wait_for_glusterd_to_start(self.servers) + self.assertTrue(ret, "Glusterd service is not running post reboot") + + # Enable brick-multiplex, set max-bricks-per-process to 3 in cluster + for key, value in (('cluster.brick-multiplex', 'enable'), + ('cluster.max-bricks-per-process', '3')): + ret = set_volume_options(self.mnode, 'all', {key: value}) + self.assertTrue(ret, "Failed to set {} to {} " + " for the cluster".format(key, value)) + + # Get the pidof of glusterd process + pid_list = [] + for server in self.servers: + # Get the pidof of glusterd process + cmd = "pidof glusterd" + ret, pid, _ = g.run(server, cmd) + self.assertEqual(ret, 0, "Failed to get the pid of glusterd") + pid = int(pid) + pid_list.append(pid) + + # Fetch the list of memory consumed in all the nodes + mem_consumed_list = self._memory_consumption_for_all_nodes(pid_list) + + # Perform volume operations for 100 volumes for first time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 1 iteration + mem_consumed_list_1 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_1): + condition_met = False + if mem - mem_consumed_list[i] <= 50: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") + + # Perform volume operations for 100 volumes for second time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 2 iterations + mem_consumed_list_2 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_2): + condition_met = False + if mem - mem_consumed_list_1[i] <= 10: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") + + # Perform volume operations for 100 volumes for third time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 3 iterations + mem_consumed_list_3 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_3): + condition_met = False + if mem - mem_consumed_list_2[i] <= 10: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") diff --git a/tests/functional/glusterd/test_glusterd_quorum_command.py b/tests/functional/glusterd/test_glusterd_quorum_command.py index c51293e38..034d626b3 100644 --- a/tests/functional/glusterd/test_glusterd_quorum_command.py +++ b/tests/functional/glusterd/test_glusterd_quorum_command.py @@ -24,6 +24,7 @@ from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_ops import ( set_volume_options, + volume_reset, get_volume_options) @@ -93,5 +94,11 @@ class TestGlusterDQuorumCLICommands(GlusterBaseClass): raise ExecutionError("Failed to unmount and cleanup volume") g.log.info("Successful in unmount and cleanup of volume") + # Reset the cluster options. + ret = volume_reset(self.mnode, "all") + if not ret: + raise ExecutionError("Failed to Reset the cluster options.") + g.log.info("Successfully reset cluster options.") + # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_probe_glusterd_down.py b/tests/functional/glusterd/test_probe_glusterd_down.py index 3705904a9..c851bf104 100644 --- a/tests/functional/glusterd/test_probe_glusterd_down.py +++ b/tests/functional/glusterd/test_probe_glusterd_down.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2020-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,17 +14,14 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -from time import sleep - from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.peer_ops import peer_probe from glustolibs.gluster.lib_utils import is_core_file_created from glustolibs.gluster.peer_ops import peer_detach, is_peer_connected -from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd, - wait_for_glusterd_to_start) -from glustolibs.misc.misc_libs import are_nodes_online +from glustolibs.gluster.gluster_init import stop_glusterd, start_glusterd +from glustolibs.misc.misc_libs import bring_down_network_interface class PeerProbeWhenGlusterdDown(GlusterBaseClass): @@ -57,7 +54,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): ret, test_timestamp, _ = g.run_local('date +%s') test_timestamp = test_timestamp.strip() - # detach one of the nodes which is part of the cluster + # Detach one of the nodes which is part of the cluster g.log.info("detaching server %s ", self.servers[1]) ret, _, err = peer_detach(self.mnode, self.servers[1]) msg = 'peer detach: failed: %s is not part of cluster\n' \ @@ -66,12 +63,12 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, msg, "Failed to detach %s " % (self.servers[1])) - # bring down glusterd of the server which has been detached + # Bring down glusterd of the server which has been detached g.log.info("Stopping glusterd on %s ", self.servers[1]) ret = stop_glusterd(self.servers[1]) self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1]) - # trying to peer probe the node whose glusterd was stopped using its IP + # Trying to peer probe the node whose glusterd was stopped using IP g.log.info("Peer probing %s when glusterd down ", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertNotEqual(ret, 0, "Peer probe should not pass when " @@ -79,7 +76,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, "peer probe: failed: Probe returned with " "Transport endpoint is not connected\n") - # trying to peer probe the same node with hostname + # Trying to peer probe the same node with hostname g.log.info("Peer probing node %s using hostname with glusterd down ", self.servers[1]) hostname = g.run(self.servers[1], "hostname") @@ -89,27 +86,24 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, "peer probe: failed: Probe returned with" " Transport endpoint is not connected\n") - # start glusterd again for the next set of test steps + # Start glusterd again for the next set of test steps g.log.info("starting glusterd on %s ", self.servers[1]) ret = start_glusterd(self.servers[1]) self.assertTrue(ret, "glusterd couldn't start successfully on %s" % self.servers[1]) - # reboot a server and then trying to peer probe at the time of reboot - g.log.info("Rebooting %s and checking peer probe", self.servers[1]) - reboot = g.run_async(self.servers[1], "reboot") - - # Mandatory sleep for 3 seconds to make sure node is in halted state - sleep(3) + # Bring down the network for sometime + network_status = bring_down_network_interface(self.servers[1], 150) # Peer probing the node using IP when it is still not online - g.log.info("Peer probing node %s which has been issued a reboot ", + g.log.info("Peer probing node %s when network is down", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to" " fail") - self.assertEqual(err, "peer probe: failed: Probe returned with " - "Transport endpoint is not connected\n") + self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe " + "returned with Transport endpoint" + " is not connected") # Peer probing the node using hostname when it is still not online g.log.info("Peer probing node %s using hostname which is still " @@ -118,35 +112,21 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): ret, _, err = peer_probe(self.mnode, hostname[1].strip()) self.assertNotEqual(ret, 0, "Peer probe should not pass when node " "has not come online") - self.assertEqual(err, "peer probe: failed: Probe returned with " - "Transport endpoint is not connected\n") + self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe " + "returned with Transport endpoint" + " is not connected") + + ret, _, _ = network_status.async_communicate() + if ret != 0: + g.log.error("Failed to perform network interface ops") - ret, _, _ = reboot.async_communicate() - self.assertEqual(ret, 255, "reboot failed") - - # Validate if rebooted node is online or not - count = 0 - while count < 40: - sleep(15) - ret, _ = are_nodes_online(self.servers[1]) - if ret: - g.log.info("Node %s is online", self.servers[1]) - break - count += 1 - self.assertTrue(ret, "Node in test not yet online") - - # check if glusterd is running post reboot - ret = wait_for_glusterd_to_start(self.servers[1], - glusterd_start_wait_timeout=120) - self.assertTrue(ret, "Glusterd service is not running post reboot") - - # peer probe the node must pass + # Peer probe the node must pass g.log.info("peer probing node %s", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with " "%s " % err) - # checking if core file created in "/", "/tmp" and "/var/log/core" + # Checking if core file created in "/", "/tmp" and "/var/log/core" ret = is_core_file_created(self.servers, test_timestamp) self.assertTrue(ret, "core file found") diff --git a/tests/functional/glusterd/test_rebalance_start_not_failed_with_socket_path_too_long.py b/tests/functional/glusterd/test_rebalance_start_not_failed_with_socket_path_too_long.py new file mode 100644 index 000000000..87cab40d0 --- /dev/null +++ b/tests/functional/glusterd/test_rebalance_start_not_failed_with_socket_path_too_long.py @@ -0,0 +1,173 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Description: + Test Rebalance should start successfully if name of volume more than 108 + chars +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.mount_ops import umount_volume, mount_volume +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, + wait_for_rebalance_to_complete +) +from glustolibs.gluster.volume_libs import ( + volume_start, + cleanup_volume +) +from glustolibs.gluster.volume_ops import volume_create, get_volume_list +from glustolibs.io.utils import run_linux_untar + + +class TestLookupDir(GlusterBaseClass): + def tearDown(self): + cmd = ("sed -i '/transport.socket.bind-address/d'" + " /etc/glusterfs/glusterd.vol") + ret, _, _ = g.run(self.mnode, cmd) + if ret: + raise ExecutionError("Failed to remove entry from 'glusterd.vol'") + for mount_dir in self.mount: + ret = umount_volume(self.clients[0], mount_dir) + if not ret: + raise ExecutionError("Failed to cleanup Volume") + + vol_list = get_volume_list(self.mnode) + if vol_list is not None: + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Volume deleted successfully : %s", volume) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_start_not_fail(self): + """ + 1. On Node N1, Add "transport.socket.bind-address N1" in the + /etc/glusterfs/glusterd.vol + 2. Create a replicate (1X3) and disperse (4+2) volumes with + name more than 108 chars + 3. Mount the both volumes using node 1 where you added the + "transport.socket.bind-address" and start IO(like untar) + 4. Perform add-brick on replicate volume 3-bricks + 5. Start rebalance on replicated volume + 6. Perform add-brick for disperse volume 6 bricks + 7. Start rebalance of disperse volume + """ + cmd = ("sed -i 's/end-volume/option " + "transport.socket.bind-address {}\\n&/g' " + "/etc/glusterfs/glusterd.vol".format(self.mnode)) + disperse = ("disperse_e4upxjmtre7dl4797wedbp7r3jr8equzvmcae9f55t6z1" + "ffhrlk40jtnrzgo4n48fjf6b138cttozw3c6of3ze71n9urnjkshoi") + replicate = ("replicate_e4upxjmtre7dl4797wedbp7r3jr8equzvmcae9f55t6z1" + "ffhrlk40tnrzgo4n48fjf6b138cttozw3c6of3ze71n9urnjskahn") + + volnames = (disperse, replicate) + for volume, vol_name in ( + ("disperse", disperse), ("replicate", replicate)): + + bricks_list = form_bricks_list(self.mnode, volume, + 6 if volume == "disperse" else 3, + self.servers, + self.all_servers_info) + if volume == "replicate": + ret, _, _ = volume_create(self.mnode, replicate, + bricks_list, + replica_count=3) + + else: + ret, _, _ = volume_create( + self.mnode, disperse, bricks_list, force=True, + disperse_count=6, redundancy_count=2) + + self.assertFalse( + ret, + "Unexpected: Volume create '{}' failed ".format(vol_name)) + ret, _, _ = volume_start(self.mnode, vol_name) + self.assertFalse(ret, "Failed to start volume") + + # Add entry in 'glusterd.vol' + ret, _, _ = g.run(self.mnode, cmd) + self.assertFalse( + ret, "Failed to add entry in 'glusterd.vol' file") + + self.list_of_io_processes = [] + + # mount volume + self.mount = ("/mnt/replicated_mount", "/mnt/disperse_mount") + for mount_dir, volname in zip(self.mount, volnames): + ret, _, _ = mount_volume( + volname, "glusterfs", mount_dir, self.mnode, + self.clients[0]) + self.assertFalse( + ret, "Failed to mount the volume '{}'".format(mount_dir)) + + # Run IO + # Create a dir to start untar + # for mount_point in self.mount: + self.linux_untar_dir = "{}/{}".format(mount_dir, "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[:1], mount_dir, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Add Brick to replicate Volume + bricks_list = form_bricks_list( + self.mnode, replicate, 3, + self.servers, self.all_servers_info, "replicate") + ret, _, _ = add_brick( + self.mnode, replicate, bricks_list, force=True) + self.assertFalse(ret, "Failed to add-brick '{}'".format(replicate)) + + # Trigger Rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, replicate) + self.assertFalse( + ret, "Failed to start rebalance on the volume '{}'".format( + replicate)) + + # Add Brick to disperse Volume + bricks_list = form_bricks_list( + self.mnode, disperse, 6, + self.servers, self.all_servers_info, "disperse") + + ret, _, _ = add_brick( + self.mnode, disperse, bricks_list, force=True) + self.assertFalse(ret, "Failed to add-brick '{}'".format(disperse)) + + # Trigger Rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, disperse) + self.assertFalse( + ret, + "Failed to start rebalance on the volume {}".format(disperse)) + + # Check if Rebalance is completed on both the volume + for volume in (replicate, disperse): + ret = wait_for_rebalance_to_complete( + self.mnode, volume, timeout=600) + self.assertTrue( + ret, "Rebalance is not Compleated on Volume '{}'".format( + volume)) diff --git a/tests/functional/glusterd/test_reserve_limt_change_while_rebalance.py b/tests/functional/glusterd/test_reserve_limt_change_while_rebalance.py new file mode 100644 index 000000000..2a7aacdac --- /dev/null +++ b/tests/functional/glusterd/test_reserve_limt_change_while_rebalance.py @@ -0,0 +1,127 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, + rebalance_stop, + wait_for_rebalance_to_complete +) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.io.utils import run_linux_untar + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestReserveLimitChangeWhileRebalance(GlusterBaseClass): + + def _set_vol_option(self, option): + """Method for setting volume option""" + ret = set_volume_options( + self.mnode, self.volname, option) + self.assertTrue(ret) + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Set I/O flag to false + cls.is_io_running = False + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + if not wait_for_rebalance_to_complete( + self.mnode, self.volname, timeout=300): + raise ExecutionError( + "Failed to complete rebalance on volume '{}'".format( + self.volname)) + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_reserve_limt_change_while_rebalance(self): + """ + 1) Create a distributed-replicated volume and start it. + 2) Enable storage.reserve option on the volume using below command, + gluster volume set storage.reserve 50 + 3) Mount the volume on a client + 4) Add some data on the mount point (should be within reserve limits) + 5) Now, add-brick and trigger rebalance. + While rebalance is in-progress change the reserve limit to a lower + value say (30) + 6. Stop the rebalance + 7. Reset the storage reserve value to 50 as in step 2 + 8. trigger rebalance + 9. while rebalance in-progress change the reserve limit to a higher + value say (70) + """ + + # Setting storage.reserve 50 + self._set_vol_option({"storage.reserve": "50"}) + + self.list_of_io_processes = [] + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Add bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick with rsync on volume %s" + % self.volname) + + # Trigger rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Setting storage.reserve 30 + self._set_vol_option({"storage.reserve": "30"}) + + # Stopping Rebalance + ret, _, _ = rebalance_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop rebalance on the volume %s" + % self.volname) + + # Setting storage.reserve 500 + self._set_vol_option({"storage.reserve": "500"}) + + # Trigger rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Setting storage.reserve 70 + self._set_vol_option({"storage.reserve": "70"}) diff --git a/tests/functional/glusterd/test_reserved_port_range_for_gluster.py b/tests/functional/glusterd/test_reserved_port_range_for_gluster.py new file mode 100644 index 000000000..b03c74884 --- /dev/null +++ b/tests/functional/glusterd/test_reserved_port_range_for_gluster.py @@ -0,0 +1,152 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Setting reserved port range for gluster +""" + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_ops import (volume_create, volume_start, + get_volume_list) +from glustolibs.gluster.volume_libs import cleanup_volume +from glustolibs.gluster.lib_utils import get_servers_bricks_dict +from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect + + +class TestReservedPortRangeForGluster(GlusterBaseClass): + def tearDown(self): + # Reset port range if some test fails + if self.port_range_changed: + cmd = "sed -i 's/49200/60999/' /etc/glusterfs/glusterd.vol" + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to set the max-port back to" + " 60999 in glusterd.vol file") + + # clean up all volumes + vol_list = get_volume_list(self.mnode) + if vol_list is None: + raise ExecutionError("Failed to get the volume list") + + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Unable to delete volume %s" % volume) + g.log.info("Volume deleted successfully : %s", volume) + + # Calling baseclass tearDown method + self.get_super_method(self, 'tearDown')() + + def test_reserved_port_range_for_gluster(self): + """ + Test Case: + 1) Set the max-port option in glusterd.vol file to 49200 + 2) Restart glusterd on one of the node + 3) Create 50 volumes in a loop + 4) Try to start the 50 volumes in a loop + 5) Confirm that the 50th volume failed to start + 6) Confirm the error message, due to which volume failed to start + 7) Set the max-port option in glusterd.vol file back to default value + 8) Restart glusterd on the same node + 9) Starting the 50th volume should succeed now + """ + # Set max port number as 49200 in glusterd.vol file + cmd = "sed -i 's/60999/49200/' /etc/glusterfs/glusterd.vol" + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to set the max-port to 49200 in" + " glusterd.vol file") + + self.port_range_changed = True + + # Restart glusterd + ret = restart_glusterd(self.mnode) + self.assertTrue(ret, "Failed to restart glusterd") + g.log.info("Successfully restarted glusterd on node: %s", self.mnode) + + # Check node on which glusterd was restarted is back to 'Connected' + # state from any other peer + ret = wait_for_peers_to_connect(self.servers[1], self.servers) + self.assertTrue(ret, "All the peers are not in connected state") + + # Fetch the available bricks dict + bricks_dict = get_servers_bricks_dict(self.servers, + self.all_servers_info) + self.assertIsNotNone(bricks_dict, "Failed to get the bricks dict") + + # Create 50 volumes in a loop + for i in range(1, 51): + self.volname = "volume-%d" % i + bricks_list = [] + j = 0 + for key, value in bricks_dict.items(): + j += 1 + brick = choice(value) + brick = "{}:{}/{}_brick-{}".format(key, brick, + self.volname, j) + bricks_list.append(brick) + + ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) + self.assertEqual(ret, 0, "Failed to create volume: %s" + % self.volname) + g.log.info("Successfully created volume: %s", self.volname) + + # Try to start 50 volumes in loop + for i in range(1, 51): + self.volname = "volume-%d" % i + ret, _, err = volume_start(self.mnode, self.volname) + if ret: + break + g.log.info("Successfully started all the volumes until volume: %s", + self.volname) + + # Confirm if the 50th volume failed to start + self.assertEqual(i, 50, "Failed to start the volumes volume-1 to" + " volume-49 in a loop") + + # Confirm the error message on volume start fail + err_msg = ("volume start: volume-50: failed: Commit failed on" + " localhost. Please check log file for details.") + self.assertEqual(err.strip(), err_msg, "Volume start failed with" + " a different error message") + + # Confirm the error message from the log file + cmd = ("cat /var/log/glusterfs/glusterd.log | %s" + % "grep -i 'All the ports in the range are exhausted' | wc -l") + ret, out, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to 'grep' the glusterd.log file") + self.assertNotEqual(out, "0", "Volume start didn't fail with expected" + " error message") + + # Set max port number back to default value in glusterd.vol file + cmd = "sed -i 's/49200/60999/' /etc/glusterfs/glusterd.vol" + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to set the max-port back to 60999 in" + " glusterd.vol file") + + self.port_range_changed = False + + # Restart glusterd on the same node + ret = restart_glusterd(self.mnode) + self.assertTrue(ret, "Failed to restart glusterd") + g.log.info("Successfully restarted glusterd on node: %s", self.mnode) + + # Starting the 50th volume should succeed now + self.volname = "volume-%d" % i + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname) diff --git a/tests/functional/glusterd/test_verify_df_output.py b/tests/functional/glusterd/test_verify_df_output.py new file mode 100644 index 000000000..4eac9193b --- /dev/null +++ b/tests/functional/glusterd/test_verify_df_output.py @@ -0,0 +1,171 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.io.utils import validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import (replace_brick_from_volume, + shrink_volume, expand_volume) +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['distributed-dispersed', 'distributed-replicated', + 'distributed-arbiter', 'dispersed', 'replicated', + 'arbiter'], + ['glusterfs']]) +class VerifyDFWithReplaceBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not upload_scripts(cls.clients, [cls.script_upload_path]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def _perform_io_and_validate(self): + """ Performs IO on the mount points and validates it""" + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 3 --max-num-of-dirs 3 " + "--num-of-files 2 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Validating IO's on mount point and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated IO's") + + def _replace_bricks_and_wait_for_heal_completion(self): + """ Replaces all the bricks and waits for the heal to complete""" + existing_bricks = get_all_bricks(self.mnode, self.volname) + for brick_to_replace in existing_bricks: + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info, + src_brick=brick_to_replace) + self.assertTrue(ret, + "Replace of %s failed" % brick_to_replace) + g.log.info("Replace of brick %s successful for volume %s", + brick_to_replace, self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + g.log.info('Heal has completed successfully') + + def _get_mount_size_from_df_h_output(self): + """ Extracts the mount size from the df -h output""" + + split_cmd = " | awk '{split($0,a,\" \");print a[2]}' | sed 's/.$//'" + cmd = ("cd {};df -h | grep {} {}".format(self.mounts[0].mountpoint, + self.volname, split_cmd)) + ret, mount_size, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to extract mount size") + return float(mount_size.split("\n")[0]) + + def test_verify_df_output_when_brick_replaced(self): + """ + - Take the output of df -h. + - Replace any one brick for the volumes. + - Wait till the heal is completed + - Repeat steps 1, 2 and 3 for all bricks for all volumes. + - Check if there are any inconsistencies in the output of df -h + - Remove bricks from volume and check output of df -h + - Add bricks to volume and check output of df -h + """ + + # Perform some IO on the mount point + self._perform_io_and_validate() + + # Get the mount size from df -h output + initial_mount_size = self._get_mount_size_from_df_h_output() + + # Replace all the bricks and wait till the heal completes + self._replace_bricks_and_wait_for_heal_completion() + + # Get df -h output after brick replace + mount_size_after_replace = self._get_mount_size_from_df_h_output() + + # Verify the mount point size remains the same after brick replace + self.assertEqual(initial_mount_size, mount_size_after_replace, + "The mount sizes before and after replace bricks " + "are not same") + + # Add bricks + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info, force=True) + self.assertTrue(ret, "Failed to add-brick to volume") + + # Get df -h output after volume expand + mount_size_after_expand = self._get_mount_size_from_df_h_output() + + # Verify df -h output returns greater value + self.assertGreater(mount_size_after_expand, initial_mount_size, + "The mount size has not increased after expanding") + + # Remove bricks + ret = shrink_volume(self.mnode, self.volname, force=True) + self.assertTrue(ret, ("Remove brick operation failed on " + "%s", self.volname)) + g.log.info("Remove brick operation is successful on " + "volume %s", self.volname) + + # Get df -h output after volume shrink + mount_size_after_shrink = self._get_mount_size_from_df_h_output() + + # Verify the df -h output returns smaller value + self.assertGreater(mount_size_after_expand, mount_size_after_shrink, + "The mount size has not reduced after shrinking") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_volume_set_when_glusterd_stopped_on_one_node.py b/tests/functional/glusterd/test_volume_set_when_glusterd_stopped_on_one_node.py new file mode 100644 index 000000000..d99fa185f --- /dev/null +++ b/tests/functional/glusterd/test_volume_set_when_glusterd_stopped_on_one_node.py @@ -0,0 +1,193 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Volume set operation when glusterd is stopped on one node +""" + +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import ( + set_volume_options, get_volume_info) +from glustolibs.gluster.brick_libs import get_online_bricks_list +from glustolibs.gluster.gluster_init import ( + start_glusterd, stop_glusterd, wait_for_glusterd_to_start) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestVolumeSetWhenGlusterdStoppedOnOneNode(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + # Uploading file_dir script in all client direcotries + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + self.get_super_method(self, 'setUp')() + # Creating Volume and mounting volume. + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + g.log.info("Volme created and mounted successfully : %s", + self.volname) + + def tearDown(self): + # Check if a node is still down + if self.glusterd_is_stopped: + ret = start_glusterd(self.random_server) + self.assertTrue(ret, "Failed to start glusterd on %s" + % self.random_server) + g.log.info("Successfully started glusterd on node: %s", + self.random_server) + + # Waiting for glusterd to start completely + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", + self.random_server) + + # Unmounting and cleaning volume. + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + g.log.info("Volume deleted successfully : %s", self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_volume_set_when_glusterd_stopped_on_one_node(self): + """ + Test Case: + 1) Setup and mount a volume on client. + 2) Stop glusterd on a random server. + 3) Start IO on mount points + 4) Set an option on the volume + 5) Start glusterd on the stopped node. + 6) Verify all the bricks are online after starting glusterd. + 7) Check if the volume info is synced across the cluster. + """ + # Fetching the bricks list and storing it for later use + list1 = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(list1, "Failed to get the list of online bricks " + "for volume: %s" % self.volname) + + # Fetching a random server from list. + self.random_server = choice(self.servers[1:]) + + # Stopping glusterd on one node. + ret = stop_glusterd(self.random_server) + self.assertTrue(ret, "Failed to stop glusterd on one node.") + g.log.info("Successfully stopped glusterd on one node.") + + self.glusterd_is_stopped = True + + # Start IO on mount points. + self.all_mounts_procs = [] + counter = 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dir-depth 4 " + "--dir-length 6 " + "--dirname-start-num %d " + "--max-num-of-dirs 3 " + "--num-of-files 5 %s" % ( + self.script_upload_path, + counter, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + counter += 1 + + # Validate IO + self.assertTrue( + validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on some of the clients" + ) + g.log.info("IO validation complete.") + + # set a option on volume, stat-prefetch on + self.options = {"stat-prefetch": "on"} + ret = set_volume_options(self.mnode, self.volname, self.options) + self.assertTrue(ret, ("Failed to set option stat-prefetch to on" + "for the volume %s" % self.volname)) + g.log.info("Succeeded in setting stat-prefetch option to on" + "for the volume %s", self.volname) + + # start glusterd on the node where glusterd is stopped + ret = start_glusterd(self.random_server) + self.assertTrue(ret, "Failed to start glusterd on %s" + % self.random_server) + g.log.info("Successfully started glusterd on node: %s", + self.random_server) + + # Waiting for glusterd to start completely + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", self.random_server) + + self.glusterd_is_stopped = False + + # Confirm if all the bricks are online or not + count = 0 + while count < 10: + list2 = get_online_bricks_list(self.mnode, self.volname) + if list1 == list2: + break + sleep(2) + count += 1 + + self.assertListEqual(list1, list2, "Unexpected: All the bricks in the" + "volume are not online") + g.log.info("All the bricks in the volume are back online") + + # volume info should be synced across the cluster + out1 = get_volume_info(self.mnode, self.volname) + self.assertIsNotNone(out1, "Failed to get the volume info from %s" + % self.mnode) + g.log.info("Getting volume info from %s is success", self.mnode) + + count = 0 + while count < 60: + out2 = get_volume_info(self.random_server, self.volname) + self.assertIsNotNone(out2, "Failed to get the volume info from %s" + % self.random_server) + if out1 == out2: + break + sleep(2) + count += 1 + + self.assertDictEqual(out1, out2, "Volume info is not synced in the" + "restarted node") + g.log.info("Volume info is successfully synced across the cluster") diff --git a/tests/functional/glusterd/test_xml_dump_of_gluster_volume_status_during_rebalance.py b/tests/functional/glusterd/test_xml_dump_of_gluster_volume_status_during_rebalance.py new file mode 100644 index 000000000..5712dcf32 --- /dev/null +++ b/tests/functional/glusterd/test_xml_dump_of_gluster_volume_status_during_rebalance.py @@ -0,0 +1,185 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.gluster_init import ( + stop_glusterd, start_glusterd, + is_glusterd_running +) +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect +from glustolibs.gluster.rebalance_ops import ( + get_rebalance_status, + rebalance_start +) +from glustolibs.gluster.volume_libs import ( + cleanup_volume +) +from glustolibs.gluster.volume_ops import ( + volume_stop, volume_create, volume_start, get_volume_status +) +from glustolibs.io.utils import ( + list_all_files_and_dirs_mounts, + wait_for_io_to_complete +) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class XmlDumpGlusterVolumeStatus(GlusterBaseClass): + """ + xml Dump of gluster volume status during rebalance, when one gluster + node is down + """ + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Start IO on mounts + cls.all_mounts_procs = [] + for index, mount_obj in enumerate(cls.mounts, start=1): + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 1 " + "--dir-length 5 " + "--max-num-of-dirs 10 " + "--num-of-files 60 %s" % ( + cls.script_upload_path, + index + 10, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + cls.all_mounts_procs.append(proc) + cls.io_validation_complete = False + + # Wait for IO to complete + if not cls.io_validation_complete: + g.log.info("Wait for IO to complete") + ret = wait_for_io_to_complete(cls.all_mounts_procs, cls.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + + ret = list_all_files_and_dirs_mounts(cls.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + + def test_xml_dump_of_gluster_volume_status_during_rebalance(self): + """ + 1. Create a trusted storage pool by peer probing the node + 2. Create a distributed-replicated volume + 3. Start the volume and fuse mount the volume and start IO + 4. Create another replicated volume and start it and stop it + 5. Start rebalance on the volume + 6. While rebalance in progress, stop glusterd on one of the nodes + in the Trusted Storage pool. + 7. Get the status of the volumes with --xml dump + """ + self.volname_2 = "test_volume_2" + + # create volume + # Fetching all the parameters for volume_create + list_of_three_servers = [] + server_info_for_three_nodes = {} + for server in self.servers[:3]: + list_of_three_servers.append(server) + server_info_for_three_nodes[server] = self.all_servers_info[ + server] + + bricks_list = form_bricks_list(self.mnode, self.volname, + 3, list_of_three_servers, + server_info_for_three_nodes) + # Creating volumes using 3 servers + ret, _, _ = volume_create(self.mnode, self.volname_2, + bricks_list, force=True) + self.assertFalse(ret, "Volume creation failed") + g.log.info("Volume %s created successfully", self.volname_2) + ret, _, _ = volume_start(self.mnode, self.volname_2) + self.assertFalse( + ret, "Failed to start volume {}".format(self.volname_2)) + ret, _, _ = volume_stop(self.mnode, self.volname_2) + self.assertFalse( + ret, "Failed to stop volume {}".format(self.volname_2)) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + + # Get rebalance status + status_info = get_rebalance_status(self.mnode, self.volname) + status = status_info['aggregate']['statusStr'] + + self.assertIn('in progress', status, + "Rebalance process is not running") + g.log.info("Rebalance process is running") + + # Stop glusterd + ret = stop_glusterd(self.servers[2]) + self.assertTrue(ret, "Failed to stop glusterd") + + ret, out, _ = g.run( + self.mnode, + "gluster v status | grep -A 4 'Rebalance' | awk 'NR==3{print " + "$3,$4}'") + + ret = get_volume_status(self.mnode, self.volname, options="tasks") + rebalance_status = ret[self.volname]['task_status'][0]['statusStr'] + self.assertIn(rebalance_status, out.replace("\n", "")) + + def tearDown(self): + ret = is_glusterd_running(self.servers) + if ret: + ret = start_glusterd(self.servers) + if not ret: + raise ExecutionError("Failed to start glusterd on %s" + % self.servers) + g.log.info("Glusterd started successfully on %s", self.servers) + + # Checking for peer status from every node + for server in self.servers: + ret = wait_for_peers_to_connect(server, self.servers) + if not ret: + raise ExecutionError("Servers are not in peer probed state") + + ret = cleanup_volume(self.mnode, self.volname_2) + if not ret: + raise ExecutionError( + "Unable to delete volume % s" % self.volname_2) + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterfind/test_gfind_list_cli.py b/tests/functional/glusterfind/test_gfind_list_cli.py new file mode 100644 index 000000000..bfc27da97 --- /dev/null +++ b/tests/functional/glusterfind/test_gfind_list_cli.py @@ -0,0 +1,111 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterfind_ops import (gfind_list, gfind_create, + gfind_delete) + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed', 'arbiter', + 'dispersed', 'replicated'], ['glusterfs']]) +class TestGlusterFindListCLI(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume(): + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + + def tearDown(self): + + # Cleanup glusterfind session and volume + ret, _, _ = gfind_delete(self.mnode, self.volname, self.session) + if ret: + raise ExecutionError("Failed to delete session '%s'" + % self.session) + + if not self.cleanup_volume(): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_glusterfind_list_output(self, out): + """Check if glusterfind list output is proper or not.""" + out = list( + filter(None, list(filter(None, out.split("\n")))[2].split(" "))) + self.assertEqual(out[0], self.session, + "Unexpected: Session name not poper in output") + self.assertEqual(out[1], self.volname, + "Unecpected: Volume name not proper in output") + + def test_gfind_list_cli(self): + """ + Verifying the glusterfind list command functionality with valid + and invalid values for the required and optional parameters. + + * Create a volume + * Create a session on the volume and call glusterfind list with the + following combinations: + - Valid values for optional parameters + - Invalid values for optional parameters + + NOTE: + There are no required parameters for glusterfind list command. + """ + # Creating a glusterfind session + self.session = "session1" + ret, _, _ = gfind_create(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, "Glusterfind session creation for the " + "volume %s failed" % self.volname) + + # Checking output of glusterfind list + ret, out, _ = gfind_list(self.mnode) + self.assertEqual(ret, 0, "Glusterfind list failed") + self._check_glusterfind_list_output(out) + g.log.info("glusterfind list cmd validation without any param passed") + + # Check output for glusterfind list with valid and invalid volume name + for volume, expected_value, validation in ((self.volname, 0, 'valid'), + ("abc", 1, 'invalid')): + ret, out, _ = gfind_list(self.mnode, volname=volume) + self.assertEqual(ret, expected_value, + "Glusterfind list --volume check with %s " + "parameter failed" % validation) + if not ret: + self._check_glusterfind_list_output(out) + g.log.info("glusterind list cmd check with --volume param passed") + + # Check output for glusterfind list with valid and invalid session name + for session, expected_value, validation in ((self.session, 0, 'valid'), + ("abc", 1, 'invalid')): + ret, out, _ = gfind_list(self.mnode, sessname=session) + self.assertEqual(ret, expected_value, + "Glusterfind list --session check with %s " + "parameter failed" % validation) + if not ret: + self._check_glusterfind_list_output(out) + g.log.info("glusterfind list cmd check with --session param passed") + + # Check output of glusterind list with debug parameter + ret, _, _ = gfind_list(self.mnode, debug=True) + self.assertEqual(ret, 0, "Glusterfind list --debug parameter failed") + g.log.info("glusterfind list cmd check with --debug param passed") diff --git a/tests/functional/glusterfind/test_glusterfind_when_brick_down.py b/tests/functional/glusterfind/test_glusterfind_when_brick_down.py new file mode 100644 index 000000000..de1ebaf23 --- /dev/null +++ b/tests/functional/glusterfind/test_glusterfind_when_brick_down.py @@ -0,0 +1,219 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test Glusterfind when brick is down +""" + +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect +from glustolibs.gluster.lib_utils import list_files +from glustolibs.gluster.volume_libs import volume_start +from glustolibs.gluster.glusterfile import ( + file_exists, + remove_file, + check_if_pattern_in_file) +from glustolibs.gluster.glusterfind_ops import ( + gfind_create, + gfind_list, + gfind_pre, + gfind_post, + gfind_delete) +from glustolibs.gluster.brick_libs import ( + get_all_bricks, + bring_bricks_offline) + + +@runs_on([["replicated", "distributed-replicated", "dispersed", + "distributed", "distributed-dispersed"], + ["glusterfs"]]) +class TestGlusterFindBrickDown(GlusterBaseClass): + """ + Test glusterfind operation when a brick is down. + """ + + def setUp(self): + """ + setup volume and mount volume + Initiate necessary variables + """ + + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.file_limit = 0 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + g.log.info("Successful in Setup Volume %s", self.volname) + self.session = "test-session-%s" % self.volname + self.outfiles = [("/tmp/test-outfile-%s-%s.txt" + % (self.volname, i))for i in range(0, 2)] + + # Set the changelog rollover-time to 1 second + # This needs to be done in order for glusterfind to keep checking + # for changes in the mount point + option = {'changelog.rollover-time': '1'} + ret = set_volume_options(self.mnode, self.volname, option) + if not ret: + raise ExecutionError("Failed to set the volume option %s for %s" + % (option, self.volname)) + g.log.info("Successfully set the volume option for the volume %s", + self.volname) + + def _perform_io_and_validate_presence_of_files(self): + """ + Function to perform the IO and validate the presence of files. + """ + self.file_limit += 10 + # Starting IO on the mounts + cmd = ("cd %s ; touch file{%d..%d}" % (self.mounts[0].mountpoint, + self.file_limit-10, + self.file_limit)) + + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Files created successfully on mountpoint") + + # Gather the list of files from the mount point + files = list_files(self.mounts[0].client_system, + self.mounts[0].mountpoint) + self.assertIsNotNone(files, "Failed to get the list of files") + g.log.info("Successfully gathered the list of files from mount point") + + # Check if the files exist + for filename in files: + ret = file_exists(self.mounts[0].client_system, filename) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % filename)) + g.log.info("Successfully validated existence of '%s'", filename) + + def _perform_glusterfind_pre_and_validate_outfile(self): + """ + Function to perform glusterfind pre and validate outfile + """ + # Perform glusterfind pre for the session + ret, _, _ = gfind_pre(self.mnode, self.volname, self.session, + self.outfiles[0], full=True, noencode=True, + debug=True) + self.assertEqual(ret, 0, ("Failed to perform glusterfind pre")) + g.log.info("Successfully performed glusterfind pre") + + # Check if the outfile exists + ret = file_exists(self.mnode, self.outfiles[0]) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % self.outfiles[0])) + g.log.info("Successfully validated existence of '%s'", + self.outfiles[0]) + + # Check if all the files are listed in the outfile + for i in range(1, self.file_limit+1): + ret = check_if_pattern_in_file(self.mnode, "file%s" % i, + self.outfiles[0]) + self.assertEqual(ret, 0, ("File 'file%s' not listed in %s" + % (i, self.outfiles[0]))) + g.log.info("File 'file%s' listed in %s", i, self.outfiles[0]) + + def test_gfind_when_brick_down(self): + """ + Verifying the glusterfind functionality when a brick is down. + + 1. Create a volume + 2. Create a session on the volume + 3. Create various files from mount point + 4. Bring down brick process on one of the node + 5. Perform glusterfind pre + 6. Perform glusterfind post + 7. Check the contents of outfile + """ + + # pylint: disable=too-many-statements + # Create a session for the volume + ret, _, _ = gfind_create(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the " + "volume %s failed" % self.volname)) + g.log.info("Successfully created a session for the volume %s", + self.volname) + + # Perform glusterfind list to check if session exists + _, out, _ = gfind_list(self.mnode, volname=self.volname, + sessname=self.session) + self.assertNotEqual(out, "No sessions found.", + "Failed to list the glusterfind session") + g.log.info("Successfully listed the glusterfind session") + + self._perform_io_and_validate_presence_of_files() + + # Wait for changelog to get updated + sleep(2) + + # Bring one of the brick down. + brick_list = get_all_bricks(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, choice(brick_list)) + self.assertTrue(ret, "Failed to bring down the brick.") + g.log.info("Succesfully brought down one brick.") + + self._perform_glusterfind_pre_and_validate_outfile() + + # Perform glusterfind post for the session + ret, _, _ = gfind_post(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Failed to perform glusterfind post")) + g.log.info("Successfully performed glusterfind post") + + # Bring the brick process up. + ret = volume_start(self.mnode, self.volname, force=True) + self.assertTrue(ret, "Failed to start the volume.") + g.log.info("Successfully started the volume.") + + def tearDown(self): + """ + tearDown for every test + Clean up and unmount the volume + """ + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Delete the glusterfind sessions + ret, _, _ = gfind_delete(self.mnode, self.volname, self.session) + if ret: + raise ExecutionError("Failed to delete session %s" % self.session) + g.log.info("Successfully deleted session %s", self.session) + + # Remove the outfiles created during 'glusterfind pre' + for out in self.outfiles: + ret = remove_file(self.mnode, out, force=True) + if not ret: + raise ExecutionError("Failed to remove the outfile %s" % out) + g.log.info("Successfully removed the outfiles") + + # Wait for the peers to be connected. + ret = wait_for_peers_to_connect(self.mnode, self.servers, 100) + if not ret: + raise ExecutionError("Peers are not in connected state.") + + # Cleanup the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Cleanup Volume") + g.log.info("Successful in Cleanup Volume") diff --git a/tests/functional/glusterfind/test_glusterfind_when_node_down.py b/tests/functional/glusterfind/test_glusterfind_when_node_down.py new file mode 100644 index 000000000..1d8b2572a --- /dev/null +++ b/tests/functional/glusterfind/test_glusterfind_when_node_down.py @@ -0,0 +1,280 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test Glusterfind when node is down +""" + +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect +from glustolibs.gluster.lib_utils import list_files +from glustolibs.gluster.glusterfile import ( + file_exists, + remove_file, + check_if_pattern_in_file) +from glustolibs.gluster.glusterfind_ops import ( + gfind_create, + gfind_list, + gfind_pre, + gfind_post, + gfind_delete) +from glustolibs.gluster.gluster_init import ( + stop_glusterd, + start_glusterd, + wait_for_glusterd_to_start) +from glustolibs.misc.misc_libs import ( + reboot_nodes, + are_nodes_online) + + +@runs_on([["replicated", "distributed-replicated", "dispersed", + "distributed", "distributed-dispersed"], + ["glusterfs"]]) +class TestGlusterFindNodeDown(GlusterBaseClass): + """ + Test glusterfind operation when a node is down. + """ + + def setUp(self): + """ + setup volume and mount volume + Initiate necessary variables + """ + + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.file_limit = 0 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + g.log.info("Successful in Setup Volume %s", self.volname) + self.session = "test-session-%s" % self.volname + self.outfiles = [("/tmp/test-outfile-%s-%s.txt" + % (self.volname, i))for i in range(0, 2)] + + # Set the changelog rollover-time to 1 second + # This needs to be done in order for glusterfind to keep checking + # for changes in the mount point + option = {'changelog.rollover-time': '1'} + ret = set_volume_options(self.mnode, self.volname, option) + if not ret: + raise ExecutionError("Failed to set the volume option %s for %s" + % (option, self.volname)) + g.log.info("Successfully set the volume option for the volume %s", + self.volname) + + def _perform_io_and_validate_presence_of_files(self): + """ + Function to perform the IO and validate the presence of files. + """ + self.file_limit += 10 + # Starting IO on the mounts + cmd = ("cd %s ; touch file{%d..%d}" % (self.mounts[0].mountpoint, + self.file_limit-10, + self.file_limit)) + + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Files created successfully on mountpoint") + + # Gather the list of files from the mount point + files = list_files(self.mounts[0].client_system, + self.mounts[0].mountpoint) + self.assertIsNotNone(files, "Failed to get the list of files") + g.log.info("Successfully gathered the list of files from mount point") + + # Check if the files exist + for filename in files: + ret = file_exists(self.mounts[0].client_system, filename) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % filename)) + g.log.info("Successfully validated existence of '%s'", filename) + + def _perform_glusterfind_pre_and_validate_outfile(self): + """ + Function to perform glusterfind pre and validate outfile + """ + # Perform glusterfind pre for the session + ret, _, _ = gfind_pre(self.mnode, self.volname, self.session, + self.outfiles[0], full=True, noencode=True, + debug=True) + self.assertEqual(ret, 0, ("Failed to perform glusterfind pre")) + g.log.info("Successfully performed glusterfind pre") + + # Check if the outfile exists + ret = file_exists(self.mnode, self.outfiles[0]) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % self.outfiles[0])) + g.log.info("Successfully validated existence of '%s'", + self.outfiles[0]) + + # Check if all the files are listed in the outfile + for i in range(1, self.file_limit+1): + ret = check_if_pattern_in_file(self.mnode, "file%s" % i, + self.outfiles[0]) + self.assertEqual(ret, 0, ("File 'file%s' not listed in %s" + % (i, self.outfiles[0]))) + g.log.info("File 'file%s' listed in %s", i, self.outfiles[0]) + + def test_gfind_when_node_down(self): + """ + Verifying the glusterfind functionality when node is down. + + 1. Create a volume + 2. Create a session on the volume + 3. Create various files from mount point + 4. Bring down glusterd on one of the node + 5. Perform glusterfind pre + 6. Perform glusterfind post + 7. Check the contents of outfile + 8. Create more files from mountpoint + 9. Reboot one of the nodes + 10. Perform gluserfind pre + 11. Perform glusterfind post + 12. Check the contents of outfile + """ + + # pylint: disable=too-many-statements + # Create a session for the volume + ret, _, _ = gfind_create(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the " + "volume %s failed" % self.volname)) + g.log.info("Successfully created a session for the volume %s", + self.volname) + + # Perform glusterfind list to check if session exists + _, out, _ = gfind_list(self.mnode, volname=self.volname, + sessname=self.session) + self.assertNotEqual(out, "No sessions found.", + "Failed to list the glusterfind session") + g.log.info("Successfully listed the glusterfind session") + + self._perform_io_and_validate_presence_of_files() + + # Wait for changelog to get updated + sleep(2) + + # Bring one of the node down. + self.random_server = choice(self.servers[1:]) + ret = stop_glusterd(self.random_server) + self.assertTrue(ret, "Failed to stop glusterd on one node.") + g.log.info("Succesfully stopped glusterd on one node.") + + self._perform_glusterfind_pre_and_validate_outfile() + + # Perform glusterfind post for the session + ret, _, _ = gfind_post(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Failed to perform glusterfind post")) + g.log.info("Successfully performed glusterfind post") + + # Bring glusterd which was downed on a random node, up. + ret = start_glusterd(self.random_server) + self.assertTrue(ret, "Failed to start glusterd on %s" + % self.random_server) + g.log.info("Successfully started glusterd on node : %s", + self.random_server) + + # Waiting for glusterd to start completely. + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", + self.random_server) + + self._perform_io_and_validate_presence_of_files() + + # Perform IO + self._perform_io_and_validate_presence_of_files() + + # Wait for changelog to get updated + sleep(2) + + # Reboot one of the nodes. + self.random_server = choice(self.servers[1:]) + ret = reboot_nodes(self.random_server) + self.assertTrue(ret, "Failed to reboot the said node.") + g.log.info("Successfully started reboot process on one node.") + + self._perform_glusterfind_pre_and_validate_outfile() + + # Perform glusterfind post for the session + ret, _, _ = gfind_post(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Failed to perform glusterfind post")) + g.log.info("Successfully performed glusterfind post") + + # Gradual sleep backoff till the node has rebooted. + counter = 0 + timeout = 300 + ret = False + while counter < timeout: + ret, _ = are_nodes_online(self.random_server) + if not ret: + g.log.info("Node's offline, Retrying after 5 seconds ...") + sleep(5) + counter += 5 + else: + ret = True + break + self.assertTrue(ret, "Node is still offline.") + g.log.info("Rebooted node is online") + + # Wait for glusterd to start completely + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", + self.random_server) + + def tearDown(self): + """ + tearDown for every test + Clean up and unmount the volume + """ + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Delete the glusterfind sessions + ret, _, _ = gfind_delete(self.mnode, self.volname, self.session) + if ret: + raise ExecutionError("Failed to delete session %s" % self.session) + g.log.info("Successfully deleted session %s", self.session) + + # Remove the outfiles created during 'glusterfind pre' + for out in self.outfiles: + ret = remove_file(self.mnode, out, force=True) + if not ret: + raise ExecutionError("Failed to remove the outfile %s" % out) + g.log.info("Successfully removed the outfiles") + + # Wait for the peers to be connected. + ret = wait_for_peers_to_connect(self.mnode, self.servers, 100) + if not ret: + raise ExecutionError("Peers are not in connected state.") + + # Cleanup the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Cleanup Volume") + g.log.info("Successful in Cleanup Volume") diff --git a/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py b/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py new file mode 100644 index 000000000..ab29fdbe7 --- /dev/null +++ b/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py @@ -0,0 +1,113 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestMemoryLeakWithRm(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Set test_id for get gathering + self.test_id = self.id() + + # Set I/O flag to false + self.is_io_running = False + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_memory_leak_with_rm(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create 10,000 files each of size 200K + 3. Delete the files created at step 2 + 4. Check if the files are deleted from backend + 5. Check if there are any memory leaks and OOM killers. + """ + # Start monitoring resource usage on servers and clients + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=30) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + # Create files on mount point + cmd = ('cd %s;for i in {1..10000};' + 'do dd if=/dev/urandom bs=200K count=1 of=file$i;done;' + 'rm -rf %s/file*' + % (self.mounts[0].mountpoint, self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create and delete files on" + " mountpoint") + g.log.info("Successfully created and removed files on mountpoint") + + # Delete files from mount point and check if all files + # are deleted or not from mount point as well as backend bricks. + ret, _, _ = g.run(self.clients[0], + "rm -rf {}/*".format(self.mounts[0].mountpoint)) + self.assertFalse(ret, "rm -rf * failed on mount point") + + ret = get_dir_contents(self.clients[0], + "{}/".format(self.mounts[0].mountpoint)) + self.assertEqual(ret, [], "Unexpected: Files and directories still " + "seen from mount point") + + for brick in get_all_bricks(self.mnode, self.volname): + node, brick_path = brick.split(":") + ret = get_dir_contents(node, "{}/".format(brick_path)) + self.assertEqual(ret, [], "Unexpected: Files and dirs still seen " + "on brick %s on node %s" % (brick_path, node)) + g.log.info("rm -rf * on mount point successful") + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, + "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") + + ret = self.check_for_memory_leaks_and_oom_kills_on_clients( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on clients") + g.log.info("No memory leaks or OOM kills found on serves and clients") diff --git a/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py b/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py new file mode 100644 index 000000000..25f8325df --- /dev/null +++ b/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py @@ -0,0 +1,231 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from datetime import datetime, timedelta +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.volume_libs import (get_subvols, bulk_volume_creation, + volume_stop, volume_start, + set_volume_options) +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brickmux_ops import (enable_brick_mux, + disable_brick_mux, + is_brick_mux_enabled) +from glustolibs.gluster.mount_ops import mount_volume, umount_volume + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestMemLeakAfterMgmntEncrypEnabled(GlusterBaseClass): + + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + self.test_id = self.id() + # Setup Volume + self.volume['dist_count'] = 2 + self.volume['replica_count'] = 3 + + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup and Mount Volume") + + # Disable I/O encryption + self._disable_io_encryption() + + def tearDown(self): + # Disable brick_mux + if is_brick_mux_enabled(self.mnode): + ret = disable_brick_mux(self.mnode) + self.assertTrue(ret, "Failed to brick multiplex") + g.log.info("Disable brick multiplex") + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _run_io(self): + """ Run IO and fill vol upto ~88%""" + bricks = get_all_bricks(self.mnode, self.volname) + usable_size = int(get_usable_size_per_disk(bricks[0]) * 0.88) + + self.procs = [] + counter = 1 + for _ in get_subvols(self.mnode, self.volname)['volume_subvols']: + filename = "{}/test_file_{}".format(self.mounts[0].mountpoint, + str(counter)) + proc = g.run_async(self.mounts[0].client_system, + "fallocate -l {}G {}".format(usable_size, + filename)) + self.procs.append(proc) + counter += 1 + + def _perform_gluster_v_heal_for_12_hrs(self): + """ Run 'guster v heal info' for 12 hours""" + # Perform gluster heal info for 12 hours + end_time = datetime.now() + timedelta(hours=12) + while True: + curr_time = datetime.now() + cmd = "gluster volume heal %s info" % self.volname + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to execute heal info cmd") + if curr_time > end_time: + g.log.info("Successfully ran for 12 hours. Checking for " + "memory leaks") + break + + def _verify_memory_leak(self): + """ Verify memory leak is found """ + + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") + + ret = self.check_for_memory_leaks_and_oom_kills_on_clients( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on clients") + + def _disable_io_encryption(self): + """ Disables IO encryption """ + # UnMount Volume + g.log.info("Starting to Unmount Volume %s", self.volname) + ret, _, _ = umount_volume(self.mounts[0].client_system, + self.mounts[0].mountpoint, + mtype=self.mount_type) + self.assertEqual(ret, 0, "Failed to Unmount volume") + + # Stop Volume + ret, _, _ = volume_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to Stop volume") + + # Disable server and client SSL usage + options = {"server.ssl": "off", + "client.ssl": "off"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, "Failed to set volume options") + + # Start Volume + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to Start volume") + + # Mount Volume + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mounts[0].mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + self.assertEqual(ret, 0, "Failed to mount the volume back") + + def test_mem_leak_on_gluster_procs_with_management_encrpytion(self): + """ + Steps: + 1) Enable management encryption on the cluster. + 2) Create a 2X3 volume. + 3) Mount the volume using FUSE on a client node. + 4) Start doing IO on the mount (ran IO till the volume is ~88% full) + 5) Simultaneously start collecting the memory usage for + 'glusterfsd' process. + 6) Issue the command "# gluster v heal <volname> info" continuously + in a loop. + """ + # Run IO + self._run_io() + + # Start monitoring resource usage on servers and clients + # default interval = 60 sec, count = 780 (60 *12) => for 12 hrs + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=780) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + + ret = validate_io_procs(self.procs, self.mounts) + self.assertTrue(ret, "IO Failed") + + self._perform_gluster_v_heal_for_12_hrs() + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + self._verify_memory_leak() + g.log.info("No memory leaks/OOM kills found on serves and clients") + + def test_mem_leak_on_gluster_procs_with_brick_multiplex(self): + """ + Steps: + 1) Enable cluster.brick-multiplex + 2) Enable SSL on management layer + 3) Start creating volumes + 4) Mount a volume and starting I/O + 5) Monitor the memory consumption by glusterd process + """ + + # Enable cluster.brick-mulitplex + ret = enable_brick_mux(self.mnode) + self.assertTrue(ret, "Failed to enable brick-multiplex") + + # Verify the operation + ret = is_brick_mux_enabled(self.mnode) + self.assertTrue(ret, "Brick mux enble op not successful") + + # Create few volumes + self.volume['replica_count'] = 3 + ret = bulk_volume_creation(self.mnode, 20, self.all_servers_info, + self.volume, is_force=True) + + self.assertTrue(ret, "Failed to create bulk volume") + + # Run IO + self._run_io() + + # Start memory usage logging + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=60) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + + ret = validate_io_procs(self.procs, self.mounts) + self.assertTrue(ret, "IO Failed") + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + self._verify_memory_leak() + g.log.info("No memory leaks/OOM kills found on serves and clients") + + # Disable Brick multiplex + ret = disable_brick_mux(self.mnode) + self.assertTrue(ret, "Failed to brick multiplex") diff --git a/tests/functional/resource_leak/test_verify_gluster_memleak_with_ssl.py b/tests/functional/resource_leak/test_verify_gluster_memleak_with_ssl.py new file mode 100644 index 000000000..1d7edbe32 --- /dev/null +++ b/tests/functional/resource_leak/test_verify_gluster_memleak_with_ssl.py @@ -0,0 +1,128 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from datetime import datetime, timedelta +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestMemLeakAfterSSLEnabled(GlusterBaseClass): + + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + self.test_id = self.id() + # Setup Volume + self.volume['dist_count'] = 2 + self.volume['replica_count'] = 3 + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + g.log.error("Failed to Setup and Mount Volume") + raise ExecutionError("Failed to Setup and Mount Volume") + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_mem_leak_on_gluster_procs_after_ssl_enabled(self): + """ + Steps: + Scenario 1: + 1) Enable management encryption on the cluster. + 2) Create a 2X3 volume. + 3) Mount the volume using FUSE on a client node. + 4) Start doing IO on the mount (ran IO till the volume is ~88% full) + 5) Simultaneously start collecting the memory usage for + 'glusterfsd' process. + 6) Issue the command "# gluster v heal <volname> info" continuously + in a loop. + """ + + # Fill the vol approx 88% + bricks = get_all_bricks(self.mnode, self.volname) + usable_size = int(get_usable_size_per_disk(bricks[0]) * 0.88) + + procs = [] + counter = 1 + for _ in get_subvols(self.mnode, self.volname)['volume_subvols']: + filename = "{}/test_file_{}".format(self.mounts[0].mountpoint, + str(counter)) + proc = g.run_async(self.mounts[0].client_system, + "fallocate -l {}G {}".format(usable_size, + filename)) + procs.append(proc) + counter += 1 + + # Start monitoring resource usage on servers and clients + # default interval = 60 sec + # count = 780 (60 *12) => for 12 hrs + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=780) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + + ret = validate_io_procs(procs, self.mounts) + self.assertTrue(ret, "IO Failed") + + # Perform gluster heal info for 12 hours + end_time = datetime.now() + timedelta(hours=12) + while True: + curr_time = datetime.now() + cmd = "gluster volume heal %s info" % self.volname + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to execute heal info cmd") + if curr_time > end_time: + g.log.info("Successfully ran for 12 hours. Checking for " + "memory leaks") + break + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, + "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") + + ret = self.check_for_memory_leaks_and_oom_kills_on_clients( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on clients") + g.log.info( + "No memory leaks/OOM kills found on serves and clients") |