diff options
Diffstat (limited to 'tests/functional/afr/heal')
10 files changed, 1768 insertions, 171 deletions
diff --git a/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py new file mode 100644 index 000000000..df05dd86c --- /dev/null +++ b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py @@ -0,0 +1,199 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep +from random import sample + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols, expand_volume, + wait_for_volume_process_to_be_online) +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter', 'replicated', + 'distributed-replicated'], ['glusterfs']]) +class TestAfrSelfHealAddBrickRebalance(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients % s", + cls.clients) + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Unable to setup and mount volume") + + def tearDown(self): + + # Wait if any IOs are pending from the test + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass Teardown + self.get_super_method(self, 'tearDown')() + + def test_afr_self_heal_add_brick_rebalance(self): + """ + Test Steps: + 1. Create a replicated/distributed-replicate volume and mount it + 2. Start IO from the clients + 3. Bring down a brick from the subvol and validate it is offline + 4. Bring back the brick online and wait for heal to complete + 5. Once the heal is completed, expand the volume. + 6. Trigger rebalance and wait for rebalance to complete + 7. Validate IO, no errors during the steps performed from step 2 + 8. Check arequal of the subvol and all the brick in the same subvol + should have same checksum + """ + # Start IO from the clients + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 30 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + # List a brick in each subvol and bring them offline + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + brick_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brick_to_bring_offline.extend(sample(subvol, 1)) + + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, "Unable to bring brick: {} offline".format( + brick_to_bring_offline)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, "Brick:{} is still online".format( + brick_to_bring_offline)) + + # Wait for 10 seconds for IO to be generated + sleep(10) + + # Start volume with force to bring all bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Volume start with force failed") + g.log.info("Volume: %s started successfully", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online", + self.volname)) + + # Monitor heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname, + interval_check=10), + "Heal failed after 20 mins") + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + + # Expanding volume by adding bricks to the volume when IO in progress + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume when IO in " + "progress on volume %s", self.volname)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the " + "volume %s", self.volname) + + # Without sleep the next step will fail with Glusterd Syncop locking. + sleep(2) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on " + "the volume %s", self.volname) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.io_validation_complete = True + self.assertTrue(ret, "IO failed on some of the clients") + self.all_mounts_procs *= 0 + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + self.assertTrue(ret, "Failed to list all files and dirs") + + # Check arequal checksum of all the bricks is same + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_dir_time_stamp_restoration.py b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py new file mode 100644 index 000000000..6a4ef2a19 --- /dev/null +++ b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py @@ -0,0 +1,160 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Check if parent directory timestamps are restored after an entry heal. +""" +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + select_volume_bricks_to_bring_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.glusterdir import (mkdir, rmdir) +from glustolibs.gluster.glusterfile import (get_fattr, get_file_stat) +from glustolibs.gluster.volume_libs import set_volume_options +from glustolibs.gluster.heal_libs import monitor_heal_completion + + +@runs_on([['replicated'], + ['glusterfs']]) +class TestDirTimeStampRestore(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def are_mdata_xattrs_equal(self): + """Check if atime/mtime/ctime in glusterfs.mdata xattr are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + fattr = get_fattr(server, '%s/%s' % (brick, "dir1"), + 'trusted.glusterfs.mdata') + self.assertIsNotNone(fattr, 'Unable to get mdata xattr') + timestamps.append(fattr) + + g.log.debug("mdata list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def are_stat_timestamps_equal(self): + """Check if atime/mtime/ctime in stat info are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + stat_data = get_file_stat(server, "%s/dir1" % brick) + ts_string = "{}-{}-{}".format(stat_data['epoch_atime'], + stat_data['epoch_mtime'], + stat_data['epoch_ctime']) + timestamps.append(ts_string) + + g.log.debug("stat list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def perform_test(self, ctime): + """ + Testcase steps: + 1. Enable/disable features,ctime based on function argument. + 2. Create a directory on the mount point. + 3. Kill a brick and create a file inside the directory. + 4. Bring the brick online. + 5. Trigger heal and wait for its completion. + 6. Verify that the atime, mtime and ctime of the directory are same on + all bricks of the replica. + """ + if ctime: + option = {'features.ctime': 'on'} + else: + option = {'features.ctime': 'off'} + ret = set_volume_options(self.mnode, self.volname, option) + self.assertTrue(ret, 'failed to set option %s on %s' + % (option, self.volume)) + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + dirpath = '{}/dir1'.format(m_point) + ret = mkdir(client, dirpath) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + cmd = 'touch {}/file1'.format(dirpath) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Unable to create file from mount point') + + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + if ctime: + ret = self.are_mdata_xattrs_equal() + self.assertTrue(ret, "glusterfs.mdata mismatch for {}" + .format(dirpath)) + else: + ret = self.are_stat_timestamps_equal() + self.assertTrue(ret, "stat mismatch for {}".format(dirpath)) + + ret = rmdir(client, dirpath, force=True) + self.assertTrue(ret, 'Unable to delete directory from mount point') + + def test_dir_time_stamp_restoration(self): + """ + Create pending entry self-heal on a replica volume and verify that + after the heal is complete, the atime, mtime and ctime of the parent + directory are identical on all bricks of the replica. + + The test is run with features.ctime enabled as well as disabled. + """ + self.perform_test(ctime=True) + self.perform_test(ctime=False) diff --git a/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py new file mode 100644 index 000000000..163596bb7 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py @@ -0,0 +1,175 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline, + bring_bricks_offline, + get_online_bricks_list, + are_bricks_online) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.glusterfile import set_fattr, get_fattr +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal + + +@runs_on([['replicated'], ['glusterfs']]) +class TestHealForConservativeMergeWithTwoBricksBlame(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it. + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_brick_offline_and_check(self, brick): + """Brings brick offline an checks if it is offline or not""" + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, "Unable to bring brick: {} offline".format(brick)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, "Brick:{} is still online".format(brick)) + + def _get_fattr_for_the_brick(self, brick): + """Get xattr of trusted.afr.volname-client-0 for the given brick""" + host, fqpath = brick.split(":") + fqpath = fqpath + "/dir1" + fattr = "trusted.afr.{}-client-0".format(self.volname) + return get_fattr(host, fqpath, fattr, encode="hex") + + def _check_peers_status(self): + """Validates peers are connected or not""" + count = 0 + while count < 4: + if self.validate_peers_are_connected(): + return + sleep(5) + count += 1 + self.fail("Peers are not in connected state") + + def test_heal_for_conservative_merge_with_two_bricks_blame(self): + """ + 1) Create 1x3 volume and fuse mount the volume + 2) On mount created a dir dir1 + 3) Pkill glusterfsd on node n1 (b2 on node2 and b3 and node3 up) + 4) touch f{1..10} on the mountpoint + 5) b2 and b3 xattrs would be blaming b1 as files are created while + b1 is down + 6) Reset the b3 xattrs to NOT blame b1 by using setattr + 7) Now pkill glusterfsd of b2 on node2 + 8) Restart glusterd on node1 to bring up b1 + 9) Now bricks b1 online , b2 down, b3 online + 10) touch x{1..10} under dir1 itself + 11) Again reset xattr on node3 of b3 so that it doesn't blame b2, + as done for b1 in step 6 + 12) Do restart glusterd on node2 hosting b2 to bring all bricks online + 13) Check for heal info, split-brain and arequal for the bricks + """ + # pylint: disable=too-many-locals + # Create dir `dir1/` on mountpont + path = self.mounts[0].mountpoint + "/dir1" + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume") + brick1, brick2, brick3 = all_bricks + + # Bring first brick offline + self._bring_brick_offline_and_check(brick1) + + # touch f{1..10} files on the mountpoint + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch f$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Check b2 and b3 xattrs are blaming b1 and are same + self.assertEqual(self._get_fattr_for_the_brick(brick2), + self._get_fattr_for_the_brick(brick3), + "Both the bricks xattrs are not blaming " + "brick: {}".format(brick1)) + + # Reset the xattrs of dir1 on b3 for brick b1 + first_xattr_to_reset = "trusted.afr.{}-client-0".format(self.volname) + xattr_value = "0x000000000000000000000000" + host, brick_path = brick3.split(":") + brick_path = brick_path + "/dir1" + ret = set_fattr(host, brick_path, first_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Kill brick2 on the node2 + self._bring_brick_offline_and_check(brick2) + + # Restart glusterd on node1 to bring the brick1 online + self.assertTrue(restart_glusterd([brick1.split(":")[0]]), "Unable to " + "restart glusterd") + # checking for peer status post glusterd restart + self._check_peers_status() + + # Check if the brick b1 on node1 is online or not + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Unable to fetch online bricks") + self.assertIn(brick1, online_bricks, "Brick:{} is still offline after " + "glusterd restart".format(brick1)) + + # Create 10 files under dir1 naming x{1..10} + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch x$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Reset the xattrs from brick3 on to brick2 + second_xattr_to_reset = "trusted.afr.{}-client-1".format(self.volname) + ret = set_fattr(host, brick_path, second_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Bring brick2 online + self.assertTrue(restart_glusterd([brick2.split(":")[0]]), "Unable to " + "restart glusterd") + self._check_peers_status() + + self.assertTrue(are_bricks_online(self.mnode, self.volname, [brick2])) + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + "Conservative merge of files failed") + + # Check arequal checksum of all the bricks is same + ret, arequal_from_the_bricks = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, "Arequal is collected successfully across the" + " bricks in the subvol {}".format(all_bricks)) + self.assertEqual(len(set(arequal_from_the_bricks)), 1, "Arequal is " + "same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_heal_info_no_hang.py b/tests/functional/afr/heal/test_heal_info_no_hang.py new file mode 100644 index 000000000..82f8b0598 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_info_no_hang.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + heal info completes when there is ongoing I/O and a lot of pending heals. +""" +import random +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.io.utils import run_linux_untar +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['distributed-replicated'], + ['glusterfs']]) +class TestHealInfoNoHang(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + self.is_io_running = False + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + + def tearDown(self): + if self.is_io_running: + if not self._wait_for_untar_completion(): + g.log.error("I/O failed to stop on clients") + + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def _wait_for_untar_completion(self): + """Wait for the kernel untar to complete""" + has_process_stopped = [] + for proc in self.list_of_io_processes: + try: + ret, _, _ = proc.async_communicate() + if not ret: + has_process_stopped.append(False) + has_process_stopped.append(True) + except ValueError: + has_process_stopped.append(True) + return all(has_process_stopped) + + def _does_heal_info_complete_within_timeout(self): + """Check if heal info CLI completes within a specific timeout""" + # We are just assuming 1 entry takes one second to process, which is + # a very high number but some estimate is better than a random magic + # value for timeout. + timeout = self.num_entries * 1 + + # heal_info_data = get_heal_info(self.mnode, self.volname) + cmd = "timeout %s gluster volume heal %s info" % (timeout, + self.volname) + ret, _, _ = g.run(self.mnode, cmd) + if ret: + return False + return True + + def test_heal_info_no_hang(self): + """ + Testcase steps: + 1. Start kernel untar on the mount + 2. While untar is going on, kill a brick of the replica. + 3. Wait for the untar to be over, resulting in pending heals. + 4. Get the approx. number of pending heals and save it + 5. Bring the brick back online. + 6. Trigger heal + 7. Run more I/Os with dd command + 8. Run heal info command and check that it completes successfully under + a timeout that is based on the no. of heals in step 4. + """ + self.list_of_io_processes = [] + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Kill brick resulting in heal backlog. + brick_to_bring_offline = random.choice(self.bricks_list) + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' + % brick_to_bring_offline) + ret = are_bricks_offline(self.mnode, self.volname, + [brick_to_bring_offline]) + self.assertTrue(ret, 'Bricks %s are not offline' + % brick_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + brick_to_bring_offline) + + ret = self._wait_for_untar_completion() + self.assertFalse(ret, "IO didn't complete or failed on client") + self.is_io_running = False + + # Get approx. no. of entries to be healed. + cmd = ("gluster volume heal %s statistics heal-count | grep Number " + "| awk '{sum+=$4} END {print sum/2}'" % self.volname) + ret, self.num_entries, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to get heal-count statistics") + + # Restart the down bricks + ret = bring_bricks_online(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick_to_bring_offline) + g.log.info('Bringing brick %s online is successful', + brick_to_bring_offline) + # Trigger heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + g.log.info('Index heal launched') + + # Run more I/O + cmd = ("for i in `seq 1 10`; do dd if=/dev/urandom of=%s/file_$i " + "bs=1M count=100; done" % self.mounts[0].mountpoint) + ret = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + + # Get heal info + ret = self._does_heal_info_complete_within_timeout() + self.assertTrue(ret, 'Heal info timed out') + g.log.info('Heal info completed succesfully') diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py index d2b43bfe3..bbefe0cff 100644 --- a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py +++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,7 +58,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): for volume_config in cls.volume_configs: ret = setup_volume(mnode=cls.mnode, all_servers_info=cls.all_servers_info, - volume_config=volume_config) + volume_config=volume_config, multi_vol=True) volname = volume_config['name'] if not ret: raise ExecutionError("Failed to setup Volume" diff --git a/tests/functional/afr/heal/test_self_heal.py b/tests/functional/afr/heal/test_self_heal.py index 6bbcccdfc..4fb6dea7e 100755 --- a/tests/functional/afr/heal/test_self_heal.py +++ b/tests/functional/afr/heal/test_self_heal.py @@ -16,14 +16,12 @@ # pylint: disable=too-many-lines from glusto.core import Glusto as g - from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError -from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_ops import get_volume_options from glustolibs.gluster.volume_libs import ( verify_all_process_of_volume_are_online, wait_for_volume_process_to_be_online) -from glustolibs.gluster.volume_libs import expand_volume from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, bring_bricks_offline, bring_bricks_online, @@ -34,8 +32,6 @@ from glustolibs.gluster.heal_libs import ( is_heal_complete, is_volume_in_split_brain, is_shd_daemonized) -from glustolibs.gluster.rebalance_ops import (rebalance_start, - wait_for_rebalance_to_complete) from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @@ -43,12 +39,12 @@ from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @runs_on([['replicated', 'distributed-replicated'], - ['glusterfs', 'cifs', 'nfs']]) + ['glusterfs', 'cifs']]) class TestSelfHeal(GlusterBaseClass): """ Description: - Arbiter Test cases related to - healing in default configuration of the volume + AFR Test cases related to healing in + default configuration of the volume """ @classmethod @@ -121,12 +117,15 @@ class TestSelfHeal(GlusterBaseClass): # Calling GlusterBaseClass teardown self.get_super_method(self, 'tearDown')() - def test_data_self_heal_daemon_off(self): + def test_data_self_heal_command(self): """ Test Data-Self-Heal (heal command) Description: - - set the volume option + - get the client side healing volume options and check + if they have already been disabled by default + NOTE: Client side healing has been disabled by default + since GlusterFS 6.0 "metadata-self-heal": "off" "entry-self-heal": "off" "data-self-heal": "off" @@ -135,7 +134,7 @@ class TestSelfHeal(GlusterBaseClass): - set the volume option "self-heal-daemon": "off" - bring down all bricks processes from selected set - - Get areeual after getting bricks offline and compare with + - Get arequal after getting bricks offline and compare with arequal before getting bricks offline - modify the data - bring bricks online @@ -144,8 +143,6 @@ class TestSelfHeal(GlusterBaseClass): - check daemons and start healing - check if heal is completed - check for split-brain - - add bricks - - do rebalance - create 5k files - while creating files - kill bricks and bring bricks online one by one in cycle @@ -153,15 +150,16 @@ class TestSelfHeal(GlusterBaseClass): """ # pylint: disable=too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Successfully set %s for volume %s", - options, self.volname) + # Checking if Client side healing options are disabled by default + g.log.info('Checking Client side healing is disabled by default') + options = ('cluster.metadata-self-heal', 'cluster.data-self-heal', + 'cluster.entry-self-heal') + for option in options: + ret = get_volume_options(self.mnode, self.volname, option)[option] + self.assertTrue(bool(ret == 'off' or ret == 'off (DEFAULT)'), + "{} option is not disabled by default" + .format(option)) + g.log.info("Client side healing options are disabled by default") # Creating files on client side for mount_obj in self.mounts: @@ -193,20 +191,10 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Getting arequal before getting bricks offline ' 'is successful') - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") - # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -269,13 +257,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -284,7 +265,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in waiting for volume %s processes to be " "online", self.volname) - # Verify volume's all process are online + # Verify volume's all processes are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" @@ -316,23 +297,6 @@ class TestSelfHeal(GlusterBaseClass): self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) - g.log.info("Expanding volume is successful on " - "volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - # Create 1k files self.all_mounts_procs = [] for mount_obj in self.mounts: @@ -405,50 +369,26 @@ class TestSelfHeal(GlusterBaseClass): ) self.io_validation_complete = True - def test_self_heal_50k_files_heal_command_by_add_brick(self): + def test_self_heal_50k_files_heal_default(self): """ - Test self-heal of 50k files (heal command + Test self-heal of 50k files by heal default Description: - - set the volume option - "metadata-self-heal": "off" - "entry-self-heal": "off" - "data-self-heal": "off" - "self-heal-daemon": "off" - bring down all bricks processes from selected set - create IO (50k files) - Get arequal before getting bricks online - - bring bricks online - - set the volume option - "self-heal-daemon": "on" - - check for daemons - - start healing + - check for daemons to come online + - heal daemon should pick up entries to heal automatically - check if heal is completed - check for split-brain - get arequal after getting bricks online and compare with arequal before getting bricks online - - add bricks - - do rebalance - - get arequal after adding bricks and compare with - arequal after getting bricks online """ # pylint: disable=too-many-locals,too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off", - "self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options') - g.log.info("Successfully set %s for volume %s", options, self.volname) # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -500,13 +440,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -528,11 +461,7 @@ class TestSelfHeal(GlusterBaseClass): self.assertTrue(ret, "Either No self heal daemon process found") g.log.info("All self-heal-daemons are online") - # Start healing - ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Heal is not started') - g.log.info('Healing is started') - + # Default Heal testing, wait for shd to pick up healing # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname, timeout_period=3600) @@ -557,40 +486,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(result_before_online, result_after_online, + 'Checksums before and after bringing bricks online ' + 'are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') - - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume when IO in " - "progress on volume %s", self.volname)) - g.log.info("Expanding volume is successful on volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - - # Get arequal after adding bricks - g.log.info('Getting arequal after adding bricks...') - ret, result_after_adding_bricks = collect_mounts_arequal(self.mounts) - self.assertTrue(ret, 'Failed to get arequal') - g.log.info('Getting arequal after getting bricks ' - 'is successful') - - # Checking arequals after bringing bricks online - # and after adding bricks - self.assertItemsEqual(result_after_online, result_after_adding_bricks, - 'Checksums after bringing bricks online and ' - 'after adding bricks are not equal') - g.log.info('Checksums after bringing bricks online and ' - 'after adding bricks are equal') diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py index 5c88460f6..ea598b1fc 100755 --- a/tests/functional/afr/heal/test_self_heal_daemon_process.py +++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py @@ -449,10 +449,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # bring bricks offline g.log.info("Going to bring down the brick process " @@ -533,10 +530,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) diff --git a/tests/functional/afr/heal/test_self_heal_with_link_files.py b/tests/functional/afr/heal/test_self_heal_with_link_files.py new file mode 100644 index 000000000..d029c3d9e --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_link_files.py @@ -0,0 +1,405 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import (get_subvols, + replace_brick_from_volume) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_files_and_dirs_on_mount_point(self, second_attempt=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_link_self_heal on mount point + if not second_attempt: + ret = mkdir(self.first_client, + '{}/{}'.format(self.mountpoint, + 'test_link_self_heal')) + self.assertTrue(ret, "Failed to create dir test_link_self_heal") + + # Create dirctories and files inside directory test_link_self_heal + io_cmd = ("for i in `seq 1 5`; do mkdir dir.$i; " + "for j in `seq 1 10`; do dd if=/dev/random " + "of=dir.$i/file.$j bs=1k count=$j; done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5` ; do for j in `seq 1 10`; " + "do dd if=/dev/random of=sym_link_dir.$i/" + "new_file.$j bs=1k count=$j; done; done ") + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create dirs and files inside") + + def _create_soft_links_to_directories(self): + """Create soft links to directories""" + cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s " + "dir.$i sym_link_dir.$i; done".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create soft links to dirs") + + def _verify_soft_links_to_dir(self, option=0): + """Verify soft links to dir""" + + cmd_list = [ + ("for i in `seq 1 5`; do stat -c %F sym_link_dir.$i | " + "grep -F 'symbolic link'; if [ $? -ne 0 ]; then exit 1;" + " fi ; done; for i in `seq 1 5` ; do readlink sym_link_dir.$i | " + "grep \"dir.$i\"; if [ $? -ne 0 ]; then exit 1; fi; done; "), + ("for i in `seq 1 5`; do for j in `seq 1 10`; do ls " + "dir.$i/new_file.$j; if [ $? -ne 0 ]; then exit 1; fi; done; " + "done")] + + # Generate command to check according to option + if option == 2: + verify_cmd = "".join(cmd_list) + else: + verify_cmd = cmd_list[option] + + cmd = ("cd {}/test_link_self_heal; {}".format(self.mountpoint, + verify_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Symlinks aren't proper") + + def _create_hard_links_to_files(self, second_attempt=False): + """Create hard links to files""" + io_cmd = ("for i in `seq 1 5`;do for j in `seq 1 10`;do ln " + "dir.$i/file.$j dir.$i/link_file.$j;done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5`; do mkdir new_dir.$i; for j in " + "`seq 1 10`; do ln dir.$i/file.$j new_dir.$i/new_file." + "$j;done; done;") + + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create hard links to files") + + def _verify_hard_links_to_files(self, second_set=False): + """Verify if hard links to files""" + file_to_compare = "dir.$i/link_file.$j" + if second_set: + file_to_compare = "new_dir.$i/new_file.$j" + + cmd = ("cd {}/test_link_self_heal;for i in `seq 1 5`; do for j in `seq" + " 1 10`;do if [ `stat -c %i dir.$i/file.$j` -ne `stat -c %i " + "{}` ];then exit 1; fi; done; done" + .format(self.mountpoint, file_to_compare)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to verify hard links to files") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal([self.mounts[0]]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + ret, arequals = collect_bricks_arequal([brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _replace_one_random_brick(self): + """Replace one random brick from the volume""" + brick = choice(get_all_bricks(self.mnode, self.volname)) + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, self.all_servers_info, + src_brick=brick) + self.assertTrue(ret, "Failed to replace brick %s " % brick) + g.log.info("Successfully replaced brick %s", brick) + + def test_self_heal_of_hard_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create hard links for the files created in step 2. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring brack all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if hard links are proper or not. + 12. Do a lookup on mount point. + 13. Bring down brick processes accoding to the volume type. + 14. Create a second set of hard links to the files. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring brack all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify both set of hard links are proper or not. + 21. Do a lookup on mount point. + 22. Pick a random brick and replace it. + 23. Wait for heal to complete on the volume. + 24. Check if heal is complete and check if volume is in split brain. + 25. Collect and compare arequal-checksum according to the volume type + for bricks. + 26. Verify both set of hard links are proper or not. + 27. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + for attempt in (False, True): + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create hardlinks for the files created in step 2 + self._create_hard_links_to_files(second_attempt=attempt) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + if attempt: + self._verify_hard_links_to_files(second_set=attempt) + + # Pick a random brick and replace it + self._replace_one_random_brick() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + self._verify_hard_links_to_files(second_set=True) + + def test_self_heal_of_soft_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create soft links for the dirs created in step 2. + 6. Verify if soft links are proper or not. + 7. Add files through the soft links. + 8. Verify if the soft links are proper or not. + 9. Check if heal info is showing all the files and dirs to be healed. + 10. Bring brack all brick processes which were killed. + 11. Wait for heal to complete on the volume. + 12. Check if heal is complete and check if volume is in split brain. + 13. Collect and compare arequal-checksum according to the volume type + for bricks. + 14. Verify if soft links are proper or not. + 15. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create soft links for the dirs created in step 2 + self._create_soft_links_to_directories() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir() + + # Add files through the soft links + self._create_files_and_dirs_on_mount_point(second_attempt=True) + + # Verify if the soft links are proper or not + self._verify_soft_links_to_dir(option=1) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir(option=2) diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py new file mode 100644 index 000000000..37bd2ec52 --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py @@ -0,0 +1,600 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete, + enable_granular_heal, + disable_granular_heal) +from glustolibs.gluster.lib_utils import (add_user, del_user, group_del, + group_add, collect_bricks_arequal) +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + self.user_group_created = False + + # If test case running is test_self_heal_meta_data + # create user and group + test_name_splitted = self.id().split('.') + test_id = test_name_splitted[len(test_name_splitted) - 1] + if test_id == 'test_self_heal_meta_data': + + # Create non-root group + if not group_add(self.first_client, 'qa_all'): + raise ExecutionError("Failed to create group qa_all") + + # Create non-root users + self.users = ('qa_func', 'qa_system', 'qa_perf') + for user in self.users: + if not add_user(self.first_client, user, group='qa_all'): + raise ExecutionError("Failed to create user {}" + .format(user)) + + self.user_group_created = True + g.log.info("Successfully created all users.") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users and group if created + if self.user_group_created: + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + g.log.info("Successfully deleted all users") + + # Delete non-root group + group_del(self.first_client, 'qa_all') + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _set_granular_heal_to_on_or_off(self, enabled=False): + """Set granular heal to ON or OFF""" + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if enabled: + if granular['cluster.granular-entry-heal'] != 'on': + ret = enable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + else: + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to off") + + def _run_cmd(self, io_cmd, err_msg): + """Run cmd and show error message if it fails""" + cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, err_msg) + + def _create_files_and_dirs_on_mount_point(self, index, second_set=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_self_heal on mount point + if not second_set: + ret = mkdir(self.first_client, '{}/{}'.format( + self.mountpoint, 'test_self_heal')) + self.assertTrue(ret, "Failed to create dir test_self_heal") + + # Create dirctories and files inside directory test_self_heal + io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random" + " of=file.$i count=1K bs=$i; done", + + "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;" + " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j" + ";done;done", + + "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq " + "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;" + " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k" + " bs=1k count=$k; done; done; done;", + + "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq" + " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j " + "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i" + " count=1K bs=$i; done ;") + self._run_cmd( + io_cmd[index], "Failed to create dirs and files inside") + + def _delete_files_and_dirs(self): + """Delete files and dirs from mount point""" + io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done") + self._run_cmd(io_cmd, "Failed to delete dirs and files") + + def _rename_files_and_dirs(self): + """Rename files and dirs from mount point""" + io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;" + " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j " + "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i " + "renamed_dir.$i; done;") + self._run_cmd(io_cmd, "Failed to rename dirs and files") + + def _change_meta_deta_of_dirs_and_files(self): + """Change meta data of dirs and files""" + cmds = ( + # Change permission + "for i in `seq 1 100`; do chmod 555 dir.$i; done; " + "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 " + "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for " + "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;", + + # Change ownership + "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; " + "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; " + "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;", + + # Change group + "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, + "Failed to change meta data on dirs and files") + g.log.info("Successfully changed meta data on dirs and files") + + def _verify_meta_data_of_files_and_dirs(self): + """Verify meta data of files and dirs""" + cmds = ( + # Verify permissions + "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";" + " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do " + "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]" + "; then exit 1; fi; done; done; for i in `seq 51 100`; do " + "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then " + "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j" + " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;" + "done;", + + # Verify ownership + "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;", + + # Verify group + "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, "Meta data of dirs and files not proper") + + def _set_and_remove_extended_attributes(self, remove=False): + """Set and remove extended attributes""" + # Command to set extended attribute to files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 " + "5`;do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_files dir.$i/file.$j; done; " + "done;") + err_msg = "Failed to set extended attributes to files and dirs" + if remove: + # Command to remove extended attribute set on files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name " + "dir.$i; for j in `seq 1 5`; do setfattr -x " + "trusted.name dir.$i/file.$j ; done ; done ;") + err_msg = "Failed to remove extended attributes to files and dirs" + + self._run_cmd(io_cmd, err_msg) + + def _verify_if_extended_attributes_are_proper(self, remove=False): + """Verify if extended attributes are set or remove properly""" + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text " + "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? " + "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do " + "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F " + "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then " + "exit 1; fi; done; done;") + err_msg = "Extended attributes on files and dirs are not proper" + if remove: + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e " + "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in" + " `seq 1 5`; do getfattr -n trusted.name -e text " + "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; " + "done;") + err_msg = "Extended attributes set to files and dirs not removed" + self._run_cmd(io_cmd, err_msg) + + def _remove_files_and_create_dirs_with_the_same_name(self): + """Remove files and create dirs with the same name""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir " + "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;") + self._run_cmd(io_cmd, + "Failed to remove files and create dirs with same name") + + def _verify_if_dirs_are_proper_or_not(self): + """Verify if dirs are proper or not""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | " + "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; " + "done; done; done;") + self._run_cmd(io_cmd, "Dirs created instead of files aren't proper") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/test_self_heal'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_heal_status_restart_vol_wait_and_check_data(self): + """ + Perform repatative steps mentioned below: + 1 Check if heal info is showing all the files and dirs to be healed + 2 Bring back all brick processes which were killed + 3 Wait for heal to complete on the volume + 4 Check if heal is complete and check if volume is in split brain + 5 Collect and compare arequal-checksum according to the volume type + for bricks + """ + # Check if heal info is showing all the files and dirs to be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + def _run_test_self_heal_entry_heal(self): + """Run steps of test_self_heal_entry_heal""" + # Create a directory and create files and directories inside it on + # mount point + self._create_files_and_dirs_on_mount_point(0) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create a new set of files and directories on mount point + self._create_files_and_dirs_on_mount_point(3, second_set=True) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Delete files and directories from mount point + self._delete_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Rename the existing files and dirs + self._rename_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + def test_self_heal_entry_heal(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create a new set of files and directories on mount point. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Bring down brick processes accoding to the volume type. + 12. Delete files and directories from mount point. + 13. Check if heal info is showing all the files and dirs to be healed. + 14. Bring back all brick processes which were killed. + 15. Wait for heal to complete on the volume. + 16. Check if heal is complete and check if volume is in split brain. + 17. Collect and compare arequal-checksum according to the volume type + for bricks. + 18. Bring down brick processes accoding to the volume type. + 19. Rename the existing files and dirs. + 20. Check if heal info is showing all the files and dirs to be healed. + 21. Bring back all brick processes which were killed. + 22. Wait for heal to complete on the volume. + 23. Check if heal is complete and check if volume is in split brain. + 24. Collect and compare arequal-checksum according to the volume type + for bricks. + + Note: + Do this test with both Granular-entry-heal set enable and disable. + """ + for value in (False, True): + if value: + # Cleanup old data from mount point + ret, _, _ = g.run(self.first_client, + 'rm -rf {}/*'.format(self.mountpoint)) + self.assertFalse(ret, 'Failed to cleanup mount point') + g.log.info("Testing with granular heal set to enabled") + self._set_granular_heal_to_on_or_off(enabled=value) + self._run_test_self_heal_entry_heal() + + def test_self_heal_meta_data(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Change the meta data of files and dirs. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if the meta data of files and dirs. + 12. Bring down brick processes accoding to the volume type. + 13. Set extended attributes on the files and dirs. + 14. Verify if the extended attributes are set properly or not. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring back all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify if extended attributes are consitent or not. + 21. Bring down brick processes accoding to the volume type + 22. Remove extended attributes on the files and dirs. + 23. Verify if extended attributes were removed properly. + 24. Check if heal info is showing all the files and dirs to be healed. + 25. Bring back all brick processes which were killed. + 26. Wait for heal to complete on the volume. + 27. Check if heal is complete and check if volume is in split brain. + 28. Collect and compare arequal-checksum according to the volume type + for bricks. + 29. Verify if extended attributes are removed or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(1) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Change the meta data of files and dirs + self._change_meta_deta_of_dirs_and_files() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if the meta data of files and dirs + self._verify_meta_data_of_files_and_dirs() + + for value in (False, True): + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Set or remove extended attributes on the files and dirs + self._set_and_remove_extended_attributes(remove=value) + + # Verify if the extended attributes are set properly or not + self._verify_if_extended_attributes_are_proper(remove=value) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if extended attributes are consitent or not + self._verify_if_extended_attributes_are_proper(remove=value) + + def test_self_heal_of_dir_with_files_removed(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Remove all files and create dir which have name of files. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if dirs are healed properly or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(2) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Remove all files and create dir which have name of files + self._remove_files_and_create_dirs_with_the_same_name() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if dirs are healed properly or not + self._verify_if_dirs_are_proper_or_not() diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py index be1a6fc0f..a449e396f 100644 --- a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py +++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,12 +26,14 @@ from glustolibs.gluster.brick_libs import ( select_volume_bricks_to_bring_offline, get_online_bricks_list) from glustolibs.gluster.heal_libs import ( get_self_heal_daemon_pid, is_shd_daemonized, - monitor_heal_completion, bring_self_heal_daemon_process_offline) + monitor_heal_completion, bring_self_heal_daemon_process_offline, + disable_granular_heal) from glustolibs.gluster.heal_ops import (get_heal_info_summary, trigger_heal_full) from glustolibs.io.utils import validate_io_procs from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) from glustolibs.gluster.mount_ops import mount_volume, umount_volume @@ -42,49 +44,43 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): verifies the self-heal daemon process on a single volume """ - @classmethod - def setUpClass(cls): - """ - setup volume, mount volume and initialize necessary variables - which is used in tests - """ + def setUp(self): # Calling GlusterBaseClass setUpClass - cls.get_super_method(cls, 'setUpClass')() + self.get_super_method(self, 'setUp')() + + # Upload script + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.clients, [self.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") # Setup Volume and Mount Volume - ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts) + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") # Verify glustershd process releases its parent process - ret = is_shd_daemonized(cls.servers) + ret = is_shd_daemonized(self.servers) if not ret: raise ExecutionError("Self Heal Daemon process was still" " holding parent process.") g.log.info("Self Heal Daemon processes are online") - # Upload script - cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") - ret = upload_scripts(cls.clients, [cls.script_upload_path]) - if not ret: - raise ExecutionError("Failed to upload IO scripts to clients") - - @classmethod - def tearDownClass(cls): + def tearDown(self): """ Clean up the volume and umount volume from client """ # Stopping the volume - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") g.log.info("Successful in Unmount Volume and Cleanup Volume") - # Calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_server_side_healing_happens_only_when_glustershd_running(self): """ @@ -105,6 +101,15 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): * heal should complete successfully """ # pylint: disable=too-many-locals,too-many-statements,too-many-lines + + # Disable granular heal if not disabled already + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + # Setting Volume options options = {"metadata-self-heal": "on", "entry-self-heal": "on", @@ -137,7 +142,7 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): all_mounts_procs, num_files_to_write = [], 100 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_files " - "-f %s --base-file-name file %s" % (self.script_upload_path, + "-f %d --base-file-name file %s" % (self.script_upload_path, num_files_to_write, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, @@ -227,8 +232,8 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s read %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;" + " done " % (mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) |