diff options
Diffstat (limited to 'tests/functional/afr')
64 files changed, 7344 insertions, 883 deletions
diff --git a/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py new file mode 100644 index 000000000..df05dd86c --- /dev/null +++ b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py @@ -0,0 +1,199 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep +from random import sample + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols, expand_volume, + wait_for_volume_process_to_be_online) +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter', 'replicated', + 'distributed-replicated'], ['glusterfs']]) +class TestAfrSelfHealAddBrickRebalance(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients % s", + cls.clients) + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Unable to setup and mount volume") + + def tearDown(self): + + # Wait if any IOs are pending from the test + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass Teardown + self.get_super_method(self, 'tearDown')() + + def test_afr_self_heal_add_brick_rebalance(self): + """ + Test Steps: + 1. Create a replicated/distributed-replicate volume and mount it + 2. Start IO from the clients + 3. Bring down a brick from the subvol and validate it is offline + 4. Bring back the brick online and wait for heal to complete + 5. Once the heal is completed, expand the volume. + 6. Trigger rebalance and wait for rebalance to complete + 7. Validate IO, no errors during the steps performed from step 2 + 8. Check arequal of the subvol and all the brick in the same subvol + should have same checksum + """ + # Start IO from the clients + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 30 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + # List a brick in each subvol and bring them offline + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + brick_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brick_to_bring_offline.extend(sample(subvol, 1)) + + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, "Unable to bring brick: {} offline".format( + brick_to_bring_offline)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, "Brick:{} is still online".format( + brick_to_bring_offline)) + + # Wait for 10 seconds for IO to be generated + sleep(10) + + # Start volume with force to bring all bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Volume start with force failed") + g.log.info("Volume: %s started successfully", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online", + self.volname)) + + # Monitor heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname, + interval_check=10), + "Heal failed after 20 mins") + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + + # Expanding volume by adding bricks to the volume when IO in progress + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume when IO in " + "progress on volume %s", self.volname)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the " + "volume %s", self.volname) + + # Without sleep the next step will fail with Glusterd Syncop locking. + sleep(2) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on " + "the volume %s", self.volname) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.io_validation_complete = True + self.assertTrue(ret, "IO failed on some of the clients") + self.all_mounts_procs *= 0 + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + self.assertTrue(ret, "Failed to list all files and dirs") + + # Check arequal checksum of all the bricks is same + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_data_split_brain_resolution.py b/tests/functional/afr/heal/test_data_split_brain_resolution.py index e1284cad6..73fd144c1 100644 --- a/tests/functional/afr/heal/test_data_split_brain_resolution.py +++ b/tests/functional/afr/heal/test_data_split_brain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ """ from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.misc.misc_libs import upload_scripts @@ -45,7 +46,7 @@ class HealDataSplitBrain(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volume if cls.volume_type == "replicated": @@ -57,11 +58,9 @@ class HealDataSplitBrain(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -75,17 +74,19 @@ class HealDataSplitBrain(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): - - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - GlusterBaseClass.tearDownClass.im_func(cls) + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() def verify_brick_arequals(self): g.log.info("Fetching bricks for the volume: %s", self.volname) diff --git a/tests/functional/afr/heal/test_dir_time_stamp_restoration.py b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py new file mode 100644 index 000000000..6a4ef2a19 --- /dev/null +++ b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py @@ -0,0 +1,160 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Check if parent directory timestamps are restored after an entry heal. +""" +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + select_volume_bricks_to_bring_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.glusterdir import (mkdir, rmdir) +from glustolibs.gluster.glusterfile import (get_fattr, get_file_stat) +from glustolibs.gluster.volume_libs import set_volume_options +from glustolibs.gluster.heal_libs import monitor_heal_completion + + +@runs_on([['replicated'], + ['glusterfs']]) +class TestDirTimeStampRestore(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def are_mdata_xattrs_equal(self): + """Check if atime/mtime/ctime in glusterfs.mdata xattr are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + fattr = get_fattr(server, '%s/%s' % (brick, "dir1"), + 'trusted.glusterfs.mdata') + self.assertIsNotNone(fattr, 'Unable to get mdata xattr') + timestamps.append(fattr) + + g.log.debug("mdata list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def are_stat_timestamps_equal(self): + """Check if atime/mtime/ctime in stat info are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + stat_data = get_file_stat(server, "%s/dir1" % brick) + ts_string = "{}-{}-{}".format(stat_data['epoch_atime'], + stat_data['epoch_mtime'], + stat_data['epoch_ctime']) + timestamps.append(ts_string) + + g.log.debug("stat list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def perform_test(self, ctime): + """ + Testcase steps: + 1. Enable/disable features,ctime based on function argument. + 2. Create a directory on the mount point. + 3. Kill a brick and create a file inside the directory. + 4. Bring the brick online. + 5. Trigger heal and wait for its completion. + 6. Verify that the atime, mtime and ctime of the directory are same on + all bricks of the replica. + """ + if ctime: + option = {'features.ctime': 'on'} + else: + option = {'features.ctime': 'off'} + ret = set_volume_options(self.mnode, self.volname, option) + self.assertTrue(ret, 'failed to set option %s on %s' + % (option, self.volume)) + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + dirpath = '{}/dir1'.format(m_point) + ret = mkdir(client, dirpath) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + cmd = 'touch {}/file1'.format(dirpath) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Unable to create file from mount point') + + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + if ctime: + ret = self.are_mdata_xattrs_equal() + self.assertTrue(ret, "glusterfs.mdata mismatch for {}" + .format(dirpath)) + else: + ret = self.are_stat_timestamps_equal() + self.assertTrue(ret, "stat mismatch for {}".format(dirpath)) + + ret = rmdir(client, dirpath, force=True) + self.assertTrue(ret, 'Unable to delete directory from mount point') + + def test_dir_time_stamp_restoration(self): + """ + Create pending entry self-heal on a replica volume and verify that + after the heal is complete, the atime, mtime and ctime of the parent + directory are identical on all bricks of the replica. + + The test is run with features.ctime enabled as well as disabled. + """ + self.perform_test(ctime=True) + self.perform_test(ctime=False) diff --git a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py index 64f5254a5..3fe682e59 100755 --- a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py +++ b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py @@ -46,7 +46,7 @@ class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass): which is used in tests """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -74,8 +74,8 @@ class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass): raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") g.log.info("Successful in Unmount Volume and Cleanup Volume") - # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDownClass.im_func(self) + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_existing_glustershd_should_take_care_of_self_healing(self): """ diff --git a/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py new file mode 100644 index 000000000..163596bb7 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py @@ -0,0 +1,175 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline, + bring_bricks_offline, + get_online_bricks_list, + are_bricks_online) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.glusterfile import set_fattr, get_fattr +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal + + +@runs_on([['replicated'], ['glusterfs']]) +class TestHealForConservativeMergeWithTwoBricksBlame(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it. + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_brick_offline_and_check(self, brick): + """Brings brick offline an checks if it is offline or not""" + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, "Unable to bring brick: {} offline".format(brick)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, "Brick:{} is still online".format(brick)) + + def _get_fattr_for_the_brick(self, brick): + """Get xattr of trusted.afr.volname-client-0 for the given brick""" + host, fqpath = brick.split(":") + fqpath = fqpath + "/dir1" + fattr = "trusted.afr.{}-client-0".format(self.volname) + return get_fattr(host, fqpath, fattr, encode="hex") + + def _check_peers_status(self): + """Validates peers are connected or not""" + count = 0 + while count < 4: + if self.validate_peers_are_connected(): + return + sleep(5) + count += 1 + self.fail("Peers are not in connected state") + + def test_heal_for_conservative_merge_with_two_bricks_blame(self): + """ + 1) Create 1x3 volume and fuse mount the volume + 2) On mount created a dir dir1 + 3) Pkill glusterfsd on node n1 (b2 on node2 and b3 and node3 up) + 4) touch f{1..10} on the mountpoint + 5) b2 and b3 xattrs would be blaming b1 as files are created while + b1 is down + 6) Reset the b3 xattrs to NOT blame b1 by using setattr + 7) Now pkill glusterfsd of b2 on node2 + 8) Restart glusterd on node1 to bring up b1 + 9) Now bricks b1 online , b2 down, b3 online + 10) touch x{1..10} under dir1 itself + 11) Again reset xattr on node3 of b3 so that it doesn't blame b2, + as done for b1 in step 6 + 12) Do restart glusterd on node2 hosting b2 to bring all bricks online + 13) Check for heal info, split-brain and arequal for the bricks + """ + # pylint: disable=too-many-locals + # Create dir `dir1/` on mountpont + path = self.mounts[0].mountpoint + "/dir1" + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume") + brick1, brick2, brick3 = all_bricks + + # Bring first brick offline + self._bring_brick_offline_and_check(brick1) + + # touch f{1..10} files on the mountpoint + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch f$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Check b2 and b3 xattrs are blaming b1 and are same + self.assertEqual(self._get_fattr_for_the_brick(brick2), + self._get_fattr_for_the_brick(brick3), + "Both the bricks xattrs are not blaming " + "brick: {}".format(brick1)) + + # Reset the xattrs of dir1 on b3 for brick b1 + first_xattr_to_reset = "trusted.afr.{}-client-0".format(self.volname) + xattr_value = "0x000000000000000000000000" + host, brick_path = brick3.split(":") + brick_path = brick_path + "/dir1" + ret = set_fattr(host, brick_path, first_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Kill brick2 on the node2 + self._bring_brick_offline_and_check(brick2) + + # Restart glusterd on node1 to bring the brick1 online + self.assertTrue(restart_glusterd([brick1.split(":")[0]]), "Unable to " + "restart glusterd") + # checking for peer status post glusterd restart + self._check_peers_status() + + # Check if the brick b1 on node1 is online or not + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Unable to fetch online bricks") + self.assertIn(brick1, online_bricks, "Brick:{} is still offline after " + "glusterd restart".format(brick1)) + + # Create 10 files under dir1 naming x{1..10} + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch x$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Reset the xattrs from brick3 on to brick2 + second_xattr_to_reset = "trusted.afr.{}-client-1".format(self.volname) + ret = set_fattr(host, brick_path, second_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Bring brick2 online + self.assertTrue(restart_glusterd([brick2.split(":")[0]]), "Unable to " + "restart glusterd") + self._check_peers_status() + + self.assertTrue(are_bricks_online(self.mnode, self.volname, [brick2])) + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + "Conservative merge of files failed") + + # Check arequal checksum of all the bricks is same + ret, arequal_from_the_bricks = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, "Arequal is collected successfully across the" + " bricks in the subvol {}".format(all_bricks)) + self.assertEqual(len(set(arequal_from_the_bricks)), 1, "Arequal is " + "same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_heal_info_no_hang.py b/tests/functional/afr/heal/test_heal_info_no_hang.py new file mode 100644 index 000000000..82f8b0598 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_info_no_hang.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + heal info completes when there is ongoing I/O and a lot of pending heals. +""" +import random +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.io.utils import run_linux_untar +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['distributed-replicated'], + ['glusterfs']]) +class TestHealInfoNoHang(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + self.is_io_running = False + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + + def tearDown(self): + if self.is_io_running: + if not self._wait_for_untar_completion(): + g.log.error("I/O failed to stop on clients") + + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def _wait_for_untar_completion(self): + """Wait for the kernel untar to complete""" + has_process_stopped = [] + for proc in self.list_of_io_processes: + try: + ret, _, _ = proc.async_communicate() + if not ret: + has_process_stopped.append(False) + has_process_stopped.append(True) + except ValueError: + has_process_stopped.append(True) + return all(has_process_stopped) + + def _does_heal_info_complete_within_timeout(self): + """Check if heal info CLI completes within a specific timeout""" + # We are just assuming 1 entry takes one second to process, which is + # a very high number but some estimate is better than a random magic + # value for timeout. + timeout = self.num_entries * 1 + + # heal_info_data = get_heal_info(self.mnode, self.volname) + cmd = "timeout %s gluster volume heal %s info" % (timeout, + self.volname) + ret, _, _ = g.run(self.mnode, cmd) + if ret: + return False + return True + + def test_heal_info_no_hang(self): + """ + Testcase steps: + 1. Start kernel untar on the mount + 2. While untar is going on, kill a brick of the replica. + 3. Wait for the untar to be over, resulting in pending heals. + 4. Get the approx. number of pending heals and save it + 5. Bring the brick back online. + 6. Trigger heal + 7. Run more I/Os with dd command + 8. Run heal info command and check that it completes successfully under + a timeout that is based on the no. of heals in step 4. + """ + self.list_of_io_processes = [] + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Kill brick resulting in heal backlog. + brick_to_bring_offline = random.choice(self.bricks_list) + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' + % brick_to_bring_offline) + ret = are_bricks_offline(self.mnode, self.volname, + [brick_to_bring_offline]) + self.assertTrue(ret, 'Bricks %s are not offline' + % brick_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + brick_to_bring_offline) + + ret = self._wait_for_untar_completion() + self.assertFalse(ret, "IO didn't complete or failed on client") + self.is_io_running = False + + # Get approx. no. of entries to be healed. + cmd = ("gluster volume heal %s statistics heal-count | grep Number " + "| awk '{sum+=$4} END {print sum/2}'" % self.volname) + ret, self.num_entries, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to get heal-count statistics") + + # Restart the down bricks + ret = bring_bricks_online(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick_to_bring_offline) + g.log.info('Bringing brick %s online is successful', + brick_to_bring_offline) + # Trigger heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + g.log.info('Index heal launched') + + # Run more I/O + cmd = ("for i in `seq 1 10`; do dd if=/dev/urandom of=%s/file_$i " + "bs=1M count=100; done" % self.mounts[0].mountpoint) + ret = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + + # Get heal info + ret = self._does_heal_info_complete_within_timeout() + self.assertTrue(ret, 'Heal info timed out') + g.log.info('Heal info completed succesfully') diff --git a/tests/functional/afr/heal/test_heal_info_while_accessing_file.py b/tests/functional/afr/heal/test_heal_info_while_accessing_file.py index 2fa7b194c..24450702b 100644 --- a/tests/functional/afr/heal/test_heal_info_while_accessing_file.py +++ b/tests/functional/afr/heal/test_heal_info_while_accessing_file.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,8 +13,8 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import get_subvols @@ -41,16 +41,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -73,7 +71,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -118,7 +116,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_heal_info_shouldnot_list_files_being_accessed(self): """ @@ -152,8 +150,9 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Creating files - cmd = ("python %s create_files -f 100 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s create_files -f 100 %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) @@ -214,9 +213,7 @@ class TestSelfHeal(GlusterBaseClass): # Compare dicts before accessing and while accessing g.log.info('Comparing entries before modifying and while modifying...') - ret = cmp(entries_before_accessing, entries_while_accessing) - self.assertEqual(ret, 0, 'Entries before modifying and while modifying' - 'are not equal') + self.assertDictEqual(entries_before_accessing, entries_while_accessing) g.log.info('Comparison entries before modifying and while modifying' 'finished successfully.') diff --git a/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py new file mode 100644 index 000000000..efd2f8745 --- /dev/null +++ b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py @@ -0,0 +1,186 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, wait_for_volume_process_to_be_online, + setup_volume, cleanup_volume) +from glustolibs.gluster.lib_utils import get_servers_bricks_dict +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_ops import replace_brick +from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, + do_bricks_exist_in_shd_volfile, + is_shd_daemonized) +from glustolibs.gluster.volume_ops import get_volume_list + + +class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which + verifies the self-heal daemon process on multiple volumes running. + """ + def setUp(self): + """ + setup volume and initialize necessary variables + which is used in tests + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume for all the volume types + self.volume_configs = [] + for volume_type in self.default_volume_type_config: + self.volume_configs.append( + {'name': 'testvol_%s' % volume_type, + 'servers': self.servers, + 'voltype': self.default_volume_type_config[volume_type]}) + + for volume_config in self.volume_configs[1:]: + ret = setup_volume(mnode=self.mnode, + all_servers_info=self.all_servers_info, + volume_config=volume_config, + multi_vol=True) + volname = volume_config['name'] + if not ret: + raise ExecutionError("Failed to setup Volume" + " %s" % volname) + g.log.info("Successful in setting volume %s", volname) + + # Verify volume's all process are online for 60 sec + ret = wait_for_volume_process_to_be_online(self.mnode, volname, 60) + if not ret: + raise ExecutionError("Volume %s : All process are not online" + % volname) + g.log.info("Successfully Verified volume %s processes are online", + volname) + + # Verfiy glustershd process releases its parent process + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + self.glustershd = "/var/lib/glusterd/glustershd/glustershd-server.vol" + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + + # Cleanup volume + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup Volume %s" % volume) + g.log.info("Successfully Cleaned up all Volumes") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_impact_of_replace_brick_on_glustershd(self): + """ + Test Script to verify the glustershd server vol file + has only entries for replicate volumes + 1.Create multiple volumes and start all volumes + 2.Check the glustershd processes - Only 1 glustershd should be listed + 3.Do replace brick on the replicate volume + 4.Confirm that the brick is replaced + 5.Check the glustershd processes - Only 1 glustershd should be listed + and pid should be different + 6.glustershd server vol should be updated with new bricks + """ + # Check the self-heal daemon process + ret, glustershd_pids = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, ("Either no self heal daemon process found or " + "more than one self heal daemon process " + "found : %s" % glustershd_pids)) + g.log.info("Successful in getting single self heal daemon process" + " on all nodes %s", self.servers) + + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + + # Log Volume Info and Status before replacing brick + ret = log_volume_info_and_status(self.mnode, volume) + self.assertTrue(ret, ("Logging volume info and status " + "failed on volume %s", volume)) + g.log.info("Successful in logging volume info and status " + "of volume %s", volume) + + # Selecting a random source brick to replace + src_brick = choice(get_all_bricks(self.mnode, volume)) + src_node, original_brick = src_brick.split(":") + + # Creating a random destination brick in such a way + # that the brick is select from the same node but always + # picks a different from the original brick + list_of_bricks = [ + brick for brick in get_servers_bricks_dict( + src_node, self.all_servers_info)[src_node] + if brick not in original_brick] + dst_brick = ('{}:{}/{}_replaced'.format( + src_node, choice(list_of_bricks), + original_brick.split('/')[::-1][0])) + + # Replace brick for the volume + ret, _, _ = replace_brick(self.mnode, volume, + src_brick, dst_brick) + self.assertFalse(ret, "Failed to replace brick " + "from the volume %s" % volume) + g.log.info("Successfully replaced faulty brick from " + "the volume %s", volume) + + # Verify all volume process are online + ret = wait_for_volume_process_to_be_online(self.mnode, volume) + self.assertTrue(ret, "Volume %s : All process are not online" + % volume) + g.log.info("Volume %s : All process are online", volume) + + # Check the self-heal daemon process after replacing brick + ret, pid_after_replace = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, "Either no self heal daemon process " + "found or more than one self heal " + "daemon process found : %s" % pid_after_replace) + g.log.info("Successful in getting Single self heal " + " daemon process on all nodes %s", self.servers) + + # Compare the glustershd pids + self.assertNotEqual(glustershd_pids, pid_after_replace, + "Self heal daemon process should be different " + "after replacing bricks in %s volume" + % volume) + g.log.info("EXPECTED: Self heal daemon process should be different" + " after replacing bricks in replicate volume") + + # Get the bricks for the volume + bricks_list = get_all_bricks(self.mnode, volume) + g.log.info("Brick List : %s", bricks_list) + + # Validate the bricks present in volume info with + # glustershd server volume file + ret = do_bricks_exist_in_shd_volfile(self.mnode, volume, + bricks_list) + self.assertTrue(ret, ("Brick List from volume info is " + "different from glustershd server " + "volume file. Please check log file " + "for details")) + g.log.info("Bricks in volume %s exists in glustershd server " + "volume file", volume) diff --git a/tests/functional/afr/heal/test_metadata_split_brain_resolution.py b/tests/functional/afr/heal/test_metadata_split_brain_resolution.py index 75c513a5f..7782a4de8 100644 --- a/tests/functional/afr/heal/test_metadata_split_brain_resolution.py +++ b/tests/functional/afr/heal/test_metadata_split_brain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ class HealMetadataSplitBrain(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volume if cls.volume_type == "replicated": @@ -56,11 +56,9 @@ class HealMetadataSplitBrain(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -74,17 +72,19 @@ class HealMetadataSplitBrain(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): - - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - GlusterBaseClass.tearDownClass.im_func(cls) + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() def verify_brick_arequals(self): g.log.info("Fetching bricks for the volume: %s", self.volname) diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py new file mode 100644 index 000000000..bbefe0cff --- /dev/null +++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py @@ -0,0 +1,177 @@ +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test Cases in this module tests the self heal daemon process. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_libs import ( + wait_for_volume_process_to_be_online, setup_volume, cleanup_volume, + get_volume_type_info) +from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, + is_shd_daemonized,) +from glustolibs.gluster.volume_ops import (volume_stop, volume_start, + get_volume_list) + + +class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which + verifies the self-heal daemon process on multiple volumes running. + """ + @classmethod + def setUpClass(cls): + """ + setup volume and initialize necessary variables + which is used in tests + """ + # calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + list_of_vol = ['distributed-dispersed', 'replicated', + 'dispersed', 'distributed', 'distributed-replicated'] + cls.volume_configs = [] + if cls.default_volume_type_config['distributed']['dist_count'] > 3: + cls.default_volume_type_config['distributed']['dist_count'] = 3 + + for volume_type in list_of_vol: + cls.volume_configs.append( + {'name': 'testvol_%s' % (volume_type), + 'servers': cls.servers, + 'voltype': cls.default_volume_type_config[volume_type]}) + for volume_config in cls.volume_configs: + ret = setup_volume(mnode=cls.mnode, + all_servers_info=cls.all_servers_info, + volume_config=volume_config, multi_vol=True) + volname = volume_config['name'] + if not ret: + raise ExecutionError("Failed to setup Volume" + " %s" % volname) + g.log.info("Successful in setting volume %s", volname) + + # Verify volume's all process are online for 60 sec + g.log.info("Verifying volume's all process are online") + ret = wait_for_volume_process_to_be_online(cls.mnode, volname, 60) + if not ret: + raise ExecutionError("Volume %s : All process are not online" + % volname) + g.log.info("Successfully Verified volume %s processes are online", + volname) + + # Verfiy glustershd process releases its parent process + g.log.info("Verifying Self Heal Daemon process is daemonized") + ret = is_shd_daemonized(cls.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + @classmethod + def tearDownClass(cls): + """ + Clean up the volume and umount volume from client + """ + + # stopping the volume + g.log.info("Starting to Cleanup all Volumes") + volume_list = get_volume_list(cls.mnode) + for volume in volume_list: + ret = cleanup_volume(cls.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup Volume %s" % volume) + g.log.info("Volume: %s cleanup is done", volume) + g.log.info("Successfully Cleanedup all Volumes") + + # calling GlusterBaseClass tearDownClass + cls.get_super_method(cls, 'tearDownClass')() + + def test_no_glustershd_with_distribute(self): + """ + Test Script to verify the glustershd server vol file + has only entries for replicate volumes + + * Create multiple volumes and start all volumes + * Check the glustershd processes - Only 1 glustershd should be listed + * Stop all volumes + * Check the glustershd processes - No glustershd should be running + * Start the distribute volume only + * Check the glustershd processes - No glustershd should be running + + """ + + nodes = self.servers + + # check the self-heal daemon process + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertTrue(ret, ("Either no self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in getting single self heal daemon process" + " on all nodes %s", nodes) + + # stop all the volumes + g.log.info("Going to stop all the volumes") + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + g.log.info("Stopping Volume : %s", volume) + ret = volume_stop(self.mnode, volume) + self.assertTrue(ret, ("Failed to stop volume %s" % volume)) + g.log.info("Successfully stopped volume %s", volume) + g.log.info("Successfully stopped all the volumes") + + # check the self-heal daemon process after stopping all volumes + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertFalse(ret, ("Self heal daemon process is still running " + "after stopping all volumes ")) + for node in pids: + self.assertEqual(pids[node][0], -1, ("Self heal daemon is still " + "running on node %s even " + "after stoppong all " + "volumes" % node)) + g.log.info("EXPECTED: No self heal daemon process is " + "running after stopping all volumes") + + # start the distribute volume only + for volume in volume_list: + volume_type_info = get_volume_type_info(self.mnode, volume) + volume_type = (volume_type_info['volume_type_info']['typeStr']) + if volume_type == 'Distribute': + g.log.info("starting to start distribute volume: %s", volume) + ret = volume_start(self.mnode, volume) + self.assertTrue(ret, ("Failed to start volume %s" % volume)) + g.log.info("Successfully started volume %s", volume) + break + + # check the self-heal daemon process after starting distribute volume + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertFalse(ret, ("Self heal daemon process is still running " + "after stopping all volumes ")) + for node in pids: + self.assertEqual(pids[node][0], -1, ("Self heal daemon is still " + "running on node %s even " + "after stopping all " + "volumes" % node)) + g.log.info("EXPECTED: No self heal daemon process is running " + "after stopping all volumes") diff --git a/tests/functional/afr/heal/test_self_heal.py b/tests/functional/afr/heal/test_self_heal.py index fe060e4f5..4fb6dea7e 100755 --- a/tests/functional/afr/heal/test_self_heal.py +++ b/tests/functional/afr/heal/test_self_heal.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,15 +15,13 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # pylint: disable=too-many-lines - from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError -from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_ops import get_volume_options from glustolibs.gluster.volume_libs import ( verify_all_process_of_volume_are_online, wait_for_volume_process_to_be_online) -from glustolibs.gluster.volume_libs import expand_volume from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, bring_bricks_offline, bring_bricks_online, @@ -34,8 +32,6 @@ from glustolibs.gluster.heal_libs import ( is_heal_complete, is_volume_in_split_brain, is_shd_daemonized) -from glustolibs.gluster.rebalance_ops import (rebalance_start, - wait_for_rebalance_to_complete) from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @@ -43,27 +39,25 @@ from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @runs_on([['replicated', 'distributed-replicated'], - ['glusterfs', 'cifs', 'nfs']]) + ['glusterfs', 'cifs']]) class TestSelfHeal(GlusterBaseClass): """ Description: - Arbiter Test cases related to - healing in default configuration of the volume + AFR Test cases related to healing in + default configuration of the volume """ @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -86,7 +80,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -121,14 +115,17 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() - def test_data_self_heal_daemon_off(self): + def test_data_self_heal_command(self): """ Test Data-Self-Heal (heal command) Description: - - set the volume option + - get the client side healing volume options and check + if they have already been disabled by default + NOTE: Client side healing has been disabled by default + since GlusterFS 6.0 "metadata-self-heal": "off" "entry-self-heal": "off" "data-self-heal": "off" @@ -137,7 +134,7 @@ class TestSelfHeal(GlusterBaseClass): - set the volume option "self-heal-daemon": "off" - bring down all bricks processes from selected set - - Get areeual after getting bricks offline and compare with + - Get arequal after getting bricks offline and compare with arequal before getting bricks offline - modify the data - bring bricks online @@ -146,8 +143,6 @@ class TestSelfHeal(GlusterBaseClass): - check daemons and start healing - check if heal is completed - check for split-brain - - add bricks - - do rebalance - create 5k files - while creating files - kill bricks and bring bricks online one by one in cycle @@ -155,15 +150,16 @@ class TestSelfHeal(GlusterBaseClass): """ # pylint: disable=too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Successfully set %s for volume %s", - options, self.volname) + # Checking if Client side healing options are disabled by default + g.log.info('Checking Client side healing is disabled by default') + options = ('cluster.metadata-self-heal', 'cluster.data-self-heal', + 'cluster.entry-self-heal') + for option in options: + ret = get_volume_options(self.mnode, self.volname, option)[option] + self.assertTrue(bool(ret == 'off' or ret == 'off (DEFAULT)'), + "{} option is not disabled by default" + .format(option)) + g.log.info("Client side healing options are disabled by default") # Creating files on client side for mount_obj in self.mounts: @@ -171,8 +167,10 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files -f 100 --fixed-file-size 1k %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 1k %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -193,20 +191,10 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Getting arequal before getting bricks offline ' 'is successful') - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") - # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -243,8 +231,10 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files -f 100 --fixed-file-size 10k %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 10k %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -267,13 +257,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -282,7 +265,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in waiting for volume %s processes to be " "online", self.volname) - # Verify volume's all process are online + # Verify volume's all processes are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" @@ -314,23 +297,6 @@ class TestSelfHeal(GlusterBaseClass): self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) - g.log.info("Expanding volume is successful on " - "volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - # Create 1k files self.all_mounts_procs = [] for mount_obj in self.mounts: @@ -338,8 +304,9 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files -f 1000 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 1000 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -402,50 +369,26 @@ class TestSelfHeal(GlusterBaseClass): ) self.io_validation_complete = True - def test_self_heal_50k_files_heal_command_by_add_brick(self): + def test_self_heal_50k_files_heal_default(self): """ - Test self-heal of 50k files (heal command + Test self-heal of 50k files by heal default Description: - - set the volume option - "metadata-self-heal": "off" - "entry-self-heal": "off" - "data-self-heal": "off" - "self-heal-daemon": "off" - bring down all bricks processes from selected set - create IO (50k files) - Get arequal before getting bricks online - - bring bricks online - - set the volume option - "self-heal-daemon": "on" - - check for daemons - - start healing + - check for daemons to come online + - heal daemon should pick up entries to heal automatically - check if heal is completed - check for split-brain - get arequal after getting bricks online and compare with arequal before getting bricks online - - add bricks - - do rebalance - - get arequal after adding bricks and compare with - arequal after getting bricks online """ # pylint: disable=too-many-locals,too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off", - "self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options') - g.log.info("Successfully set %s for volume %s", options, self.volname) # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -466,8 +409,9 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create 50k files g.log.info('Creating files...') - command = ("python %s create_files -f 50000 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 50000 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -496,13 +440,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -524,11 +461,7 @@ class TestSelfHeal(GlusterBaseClass): self.assertTrue(ret, "Either No self heal daemon process found") g.log.info("All self-heal-daemons are online") - # Start healing - ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Heal is not started') - g.log.info('Healing is started') - + # Default Heal testing, wait for shd to pick up healing # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname, timeout_period=3600) @@ -553,40 +486,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(result_before_online, result_after_online, + 'Checksums before and after bringing bricks online ' + 'are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') - - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume when IO in " - "progress on volume %s", self.volname)) - g.log.info("Expanding volume is successful on volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - - # Get arequal after adding bricks - g.log.info('Getting arequal after adding bricks...') - ret, result_after_adding_bricks = collect_mounts_arequal(self.mounts) - self.assertTrue(ret, 'Failed to get arequal') - g.log.info('Getting arequal after getting bricks ' - 'is successful') - - # Checking arequals after bringing bricks online - # and after adding bricks - self.assertItemsEqual(result_after_online, result_after_adding_bricks, - 'Checksums after bringing bricks online and ' - 'after adding bricks are not equal') - g.log.info('Checksums after bringing bricks online and ' - 'after adding bricks are equal') diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py index edb4575eb..ea598b1fc 100755 --- a/tests/functional/afr/heal/test_self_heal_daemon_process.py +++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,9 +18,11 @@ Test Cases in this module tests the self heal daemon process. """ -import time import calendar +import time + from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import ( @@ -37,7 +39,8 @@ from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, is_shd_daemonized, are_all_self_heal_daemons_are_online) from glustolibs.gluster.volume_ops import (volume_stop, volume_start) -from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.gluster_init import ( + restart_glusterd, wait_for_glusterd_to_start) from glustolibs.io.utils import validate_io_procs from glustolibs.misc.misc_libs import upload_scripts @@ -55,16 +58,14 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -78,7 +79,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -103,7 +104,6 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): """ Clean up the volume and umount volume from client """ - # stopping the volume g.log.info("Starting to Unmount Volume and Cleanup Volume") ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) @@ -112,7 +112,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_glustershd_with_add_remove_brick(self): """ @@ -356,6 +356,10 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): g.log.info("Successfully restarted glusterd on all nodes %s", nodes) + self.assertTrue( + wait_for_glusterd_to_start(self.servers), + "Failed to start glusterd on %s" % self.servers) + # check the self heal daemon process after restarting glusterd process g.log.info("Starting to get self-heal daemon process on" " nodes %s", nodes) @@ -445,10 +449,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # bring bricks offline g.log.info("Going to bring down the brick process " @@ -529,10 +530,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -548,11 +546,14 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): bricks_to_bring_offline) # Creating files for all volumes + self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_files -f 100 %s/test_dir" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("/usr/bin/env python %s create_files -f 100 " + "%s/test_dir" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) diff --git a/tests/functional/afr/heal/test_self_heal_with_link_files.py b/tests/functional/afr/heal/test_self_heal_with_link_files.py new file mode 100644 index 000000000..d029c3d9e --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_link_files.py @@ -0,0 +1,405 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import (get_subvols, + replace_brick_from_volume) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_files_and_dirs_on_mount_point(self, second_attempt=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_link_self_heal on mount point + if not second_attempt: + ret = mkdir(self.first_client, + '{}/{}'.format(self.mountpoint, + 'test_link_self_heal')) + self.assertTrue(ret, "Failed to create dir test_link_self_heal") + + # Create dirctories and files inside directory test_link_self_heal + io_cmd = ("for i in `seq 1 5`; do mkdir dir.$i; " + "for j in `seq 1 10`; do dd if=/dev/random " + "of=dir.$i/file.$j bs=1k count=$j; done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5` ; do for j in `seq 1 10`; " + "do dd if=/dev/random of=sym_link_dir.$i/" + "new_file.$j bs=1k count=$j; done; done ") + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create dirs and files inside") + + def _create_soft_links_to_directories(self): + """Create soft links to directories""" + cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s " + "dir.$i sym_link_dir.$i; done".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create soft links to dirs") + + def _verify_soft_links_to_dir(self, option=0): + """Verify soft links to dir""" + + cmd_list = [ + ("for i in `seq 1 5`; do stat -c %F sym_link_dir.$i | " + "grep -F 'symbolic link'; if [ $? -ne 0 ]; then exit 1;" + " fi ; done; for i in `seq 1 5` ; do readlink sym_link_dir.$i | " + "grep \"dir.$i\"; if [ $? -ne 0 ]; then exit 1; fi; done; "), + ("for i in `seq 1 5`; do for j in `seq 1 10`; do ls " + "dir.$i/new_file.$j; if [ $? -ne 0 ]; then exit 1; fi; done; " + "done")] + + # Generate command to check according to option + if option == 2: + verify_cmd = "".join(cmd_list) + else: + verify_cmd = cmd_list[option] + + cmd = ("cd {}/test_link_self_heal; {}".format(self.mountpoint, + verify_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Symlinks aren't proper") + + def _create_hard_links_to_files(self, second_attempt=False): + """Create hard links to files""" + io_cmd = ("for i in `seq 1 5`;do for j in `seq 1 10`;do ln " + "dir.$i/file.$j dir.$i/link_file.$j;done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5`; do mkdir new_dir.$i; for j in " + "`seq 1 10`; do ln dir.$i/file.$j new_dir.$i/new_file." + "$j;done; done;") + + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create hard links to files") + + def _verify_hard_links_to_files(self, second_set=False): + """Verify if hard links to files""" + file_to_compare = "dir.$i/link_file.$j" + if second_set: + file_to_compare = "new_dir.$i/new_file.$j" + + cmd = ("cd {}/test_link_self_heal;for i in `seq 1 5`; do for j in `seq" + " 1 10`;do if [ `stat -c %i dir.$i/file.$j` -ne `stat -c %i " + "{}` ];then exit 1; fi; done; done" + .format(self.mountpoint, file_to_compare)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to verify hard links to files") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal([self.mounts[0]]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + ret, arequals = collect_bricks_arequal([brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _replace_one_random_brick(self): + """Replace one random brick from the volume""" + brick = choice(get_all_bricks(self.mnode, self.volname)) + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, self.all_servers_info, + src_brick=brick) + self.assertTrue(ret, "Failed to replace brick %s " % brick) + g.log.info("Successfully replaced brick %s", brick) + + def test_self_heal_of_hard_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create hard links for the files created in step 2. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring brack all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if hard links are proper or not. + 12. Do a lookup on mount point. + 13. Bring down brick processes accoding to the volume type. + 14. Create a second set of hard links to the files. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring brack all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify both set of hard links are proper or not. + 21. Do a lookup on mount point. + 22. Pick a random brick and replace it. + 23. Wait for heal to complete on the volume. + 24. Check if heal is complete and check if volume is in split brain. + 25. Collect and compare arequal-checksum according to the volume type + for bricks. + 26. Verify both set of hard links are proper or not. + 27. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + for attempt in (False, True): + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create hardlinks for the files created in step 2 + self._create_hard_links_to_files(second_attempt=attempt) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + if attempt: + self._verify_hard_links_to_files(second_set=attempt) + + # Pick a random brick and replace it + self._replace_one_random_brick() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + self._verify_hard_links_to_files(second_set=True) + + def test_self_heal_of_soft_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create soft links for the dirs created in step 2. + 6. Verify if soft links are proper or not. + 7. Add files through the soft links. + 8. Verify if the soft links are proper or not. + 9. Check if heal info is showing all the files and dirs to be healed. + 10. Bring brack all brick processes which were killed. + 11. Wait for heal to complete on the volume. + 12. Check if heal is complete and check if volume is in split brain. + 13. Collect and compare arequal-checksum according to the volume type + for bricks. + 14. Verify if soft links are proper or not. + 15. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create soft links for the dirs created in step 2 + self._create_soft_links_to_directories() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir() + + # Add files through the soft links + self._create_files_and_dirs_on_mount_point(second_attempt=True) + + # Verify if the soft links are proper or not + self._verify_soft_links_to_dir(option=1) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir(option=2) diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py new file mode 100644 index 000000000..37bd2ec52 --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py @@ -0,0 +1,600 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete, + enable_granular_heal, + disable_granular_heal) +from glustolibs.gluster.lib_utils import (add_user, del_user, group_del, + group_add, collect_bricks_arequal) +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + self.user_group_created = False + + # If test case running is test_self_heal_meta_data + # create user and group + test_name_splitted = self.id().split('.') + test_id = test_name_splitted[len(test_name_splitted) - 1] + if test_id == 'test_self_heal_meta_data': + + # Create non-root group + if not group_add(self.first_client, 'qa_all'): + raise ExecutionError("Failed to create group qa_all") + + # Create non-root users + self.users = ('qa_func', 'qa_system', 'qa_perf') + for user in self.users: + if not add_user(self.first_client, user, group='qa_all'): + raise ExecutionError("Failed to create user {}" + .format(user)) + + self.user_group_created = True + g.log.info("Successfully created all users.") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users and group if created + if self.user_group_created: + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + g.log.info("Successfully deleted all users") + + # Delete non-root group + group_del(self.first_client, 'qa_all') + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _set_granular_heal_to_on_or_off(self, enabled=False): + """Set granular heal to ON or OFF""" + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if enabled: + if granular['cluster.granular-entry-heal'] != 'on': + ret = enable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + else: + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to off") + + def _run_cmd(self, io_cmd, err_msg): + """Run cmd and show error message if it fails""" + cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, err_msg) + + def _create_files_and_dirs_on_mount_point(self, index, second_set=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_self_heal on mount point + if not second_set: + ret = mkdir(self.first_client, '{}/{}'.format( + self.mountpoint, 'test_self_heal')) + self.assertTrue(ret, "Failed to create dir test_self_heal") + + # Create dirctories and files inside directory test_self_heal + io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random" + " of=file.$i count=1K bs=$i; done", + + "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;" + " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j" + ";done;done", + + "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq " + "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;" + " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k" + " bs=1k count=$k; done; done; done;", + + "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq" + " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j " + "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i" + " count=1K bs=$i; done ;") + self._run_cmd( + io_cmd[index], "Failed to create dirs and files inside") + + def _delete_files_and_dirs(self): + """Delete files and dirs from mount point""" + io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done") + self._run_cmd(io_cmd, "Failed to delete dirs and files") + + def _rename_files_and_dirs(self): + """Rename files and dirs from mount point""" + io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;" + " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j " + "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i " + "renamed_dir.$i; done;") + self._run_cmd(io_cmd, "Failed to rename dirs and files") + + def _change_meta_deta_of_dirs_and_files(self): + """Change meta data of dirs and files""" + cmds = ( + # Change permission + "for i in `seq 1 100`; do chmod 555 dir.$i; done; " + "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 " + "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for " + "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;", + + # Change ownership + "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; " + "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; " + "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;", + + # Change group + "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, + "Failed to change meta data on dirs and files") + g.log.info("Successfully changed meta data on dirs and files") + + def _verify_meta_data_of_files_and_dirs(self): + """Verify meta data of files and dirs""" + cmds = ( + # Verify permissions + "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";" + " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do " + "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]" + "; then exit 1; fi; done; done; for i in `seq 51 100`; do " + "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then " + "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j" + " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;" + "done;", + + # Verify ownership + "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;", + + # Verify group + "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, "Meta data of dirs and files not proper") + + def _set_and_remove_extended_attributes(self, remove=False): + """Set and remove extended attributes""" + # Command to set extended attribute to files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 " + "5`;do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_files dir.$i/file.$j; done; " + "done;") + err_msg = "Failed to set extended attributes to files and dirs" + if remove: + # Command to remove extended attribute set on files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name " + "dir.$i; for j in `seq 1 5`; do setfattr -x " + "trusted.name dir.$i/file.$j ; done ; done ;") + err_msg = "Failed to remove extended attributes to files and dirs" + + self._run_cmd(io_cmd, err_msg) + + def _verify_if_extended_attributes_are_proper(self, remove=False): + """Verify if extended attributes are set or remove properly""" + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text " + "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? " + "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do " + "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F " + "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then " + "exit 1; fi; done; done;") + err_msg = "Extended attributes on files and dirs are not proper" + if remove: + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e " + "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in" + " `seq 1 5`; do getfattr -n trusted.name -e text " + "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; " + "done;") + err_msg = "Extended attributes set to files and dirs not removed" + self._run_cmd(io_cmd, err_msg) + + def _remove_files_and_create_dirs_with_the_same_name(self): + """Remove files and create dirs with the same name""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir " + "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;") + self._run_cmd(io_cmd, + "Failed to remove files and create dirs with same name") + + def _verify_if_dirs_are_proper_or_not(self): + """Verify if dirs are proper or not""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | " + "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; " + "done; done; done;") + self._run_cmd(io_cmd, "Dirs created instead of files aren't proper") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/test_self_heal'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_heal_status_restart_vol_wait_and_check_data(self): + """ + Perform repatative steps mentioned below: + 1 Check if heal info is showing all the files and dirs to be healed + 2 Bring back all brick processes which were killed + 3 Wait for heal to complete on the volume + 4 Check if heal is complete and check if volume is in split brain + 5 Collect and compare arequal-checksum according to the volume type + for bricks + """ + # Check if heal info is showing all the files and dirs to be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + def _run_test_self_heal_entry_heal(self): + """Run steps of test_self_heal_entry_heal""" + # Create a directory and create files and directories inside it on + # mount point + self._create_files_and_dirs_on_mount_point(0) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create a new set of files and directories on mount point + self._create_files_and_dirs_on_mount_point(3, second_set=True) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Delete files and directories from mount point + self._delete_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Rename the existing files and dirs + self._rename_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + def test_self_heal_entry_heal(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create a new set of files and directories on mount point. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Bring down brick processes accoding to the volume type. + 12. Delete files and directories from mount point. + 13. Check if heal info is showing all the files and dirs to be healed. + 14. Bring back all brick processes which were killed. + 15. Wait for heal to complete on the volume. + 16. Check if heal is complete and check if volume is in split brain. + 17. Collect and compare arequal-checksum according to the volume type + for bricks. + 18. Bring down brick processes accoding to the volume type. + 19. Rename the existing files and dirs. + 20. Check if heal info is showing all the files and dirs to be healed. + 21. Bring back all brick processes which were killed. + 22. Wait for heal to complete on the volume. + 23. Check if heal is complete and check if volume is in split brain. + 24. Collect and compare arequal-checksum according to the volume type + for bricks. + + Note: + Do this test with both Granular-entry-heal set enable and disable. + """ + for value in (False, True): + if value: + # Cleanup old data from mount point + ret, _, _ = g.run(self.first_client, + 'rm -rf {}/*'.format(self.mountpoint)) + self.assertFalse(ret, 'Failed to cleanup mount point') + g.log.info("Testing with granular heal set to enabled") + self._set_granular_heal_to_on_or_off(enabled=value) + self._run_test_self_heal_entry_heal() + + def test_self_heal_meta_data(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Change the meta data of files and dirs. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if the meta data of files and dirs. + 12. Bring down brick processes accoding to the volume type. + 13. Set extended attributes on the files and dirs. + 14. Verify if the extended attributes are set properly or not. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring back all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify if extended attributes are consitent or not. + 21. Bring down brick processes accoding to the volume type + 22. Remove extended attributes on the files and dirs. + 23. Verify if extended attributes were removed properly. + 24. Check if heal info is showing all the files and dirs to be healed. + 25. Bring back all brick processes which were killed. + 26. Wait for heal to complete on the volume. + 27. Check if heal is complete and check if volume is in split brain. + 28. Collect and compare arequal-checksum according to the volume type + for bricks. + 29. Verify if extended attributes are removed or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(1) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Change the meta data of files and dirs + self._change_meta_deta_of_dirs_and_files() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if the meta data of files and dirs + self._verify_meta_data_of_files_and_dirs() + + for value in (False, True): + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Set or remove extended attributes on the files and dirs + self._set_and_remove_extended_attributes(remove=value) + + # Verify if the extended attributes are set properly or not + self._verify_if_extended_attributes_are_proper(remove=value) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if extended attributes are consitent or not + self._verify_if_extended_attributes_are_proper(remove=value) + + def test_self_heal_of_dir_with_files_removed(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Remove all files and create dir which have name of files. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if dirs are healed properly or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(2) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Remove all files and create dir which have name of files + self._remove_files_and_create_dirs_with_the_same_name() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if dirs are healed properly or not + self._verify_if_dirs_are_proper_or_not() diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py new file mode 100644 index 000000000..a449e396f --- /dev/null +++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py @@ -0,0 +1,250 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test Cases in this module tests the self heal daemon process. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, bring_bricks_online, + select_volume_bricks_to_bring_offline, get_online_bricks_list) +from glustolibs.gluster.heal_libs import ( + get_self_heal_daemon_pid, is_shd_daemonized, + monitor_heal_completion, bring_self_heal_daemon_process_offline, + disable_granular_heal) +from glustolibs.gluster.heal_ops import (get_heal_info_summary, + trigger_heal_full) +from glustolibs.io.utils import validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) +from glustolibs.gluster.mount_ops import mount_volume, umount_volume + + +@runs_on([['replicated'], ['glusterfs']]) +class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithSingleVolume contains tests which + verifies the self-heal daemon process on a single volume + """ + + def setUp(self): + + # Calling GlusterBaseClass setUpClass + self.get_super_method(self, 'setUp')() + + # Upload script + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.clients, [self.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + # Verify glustershd process releases its parent process + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_server_side_healing_happens_only_when_glustershd_running(self): + """ + Test Script which verifies that the server side healing must happen + only if the heal daemon is running on the node where source brick + resides. + + * Create and start the Replicate volume + * Check the glustershd processes - Only 1 glustershd should be listed + * Bring down the bricks without affecting the cluster + * Create files on volume + * kill the glustershd on node where bricks is running + * bring the bricks up which was killed in previous steps + * check the heal info - heal info must show pending heal info, heal + shouldn't happen since glustershd is down on source node + * issue heal + * trigger client side heal + * heal should complete successfully + """ + # pylint: disable=too-many-locals,too-many-statements,too-many-lines + + # Disable granular heal if not disabled already + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + + # Setting Volume options + options = {"metadata-self-heal": "on", + "entry-self-heal": "on", + "data-self-heal": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Successfully set %s for volume %s", + options, self.volname) + + # Check the self-heal daemon process + ret, pids = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in verifying self heal daemon process" + " on all nodes %s", self.servers) + + # Select the bricks to bring offline + bricks_to_bring_offline = (select_volume_bricks_to_bring_offline + (self.mnode, self.volname)) + g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline) + + # Bring down the selected bricks + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, "Failed to bring down the bricks") + g.log.info("Brought down the brick process " + "for %s", bricks_to_bring_offline) + + # Write files on all mounts + all_mounts_procs, num_files_to_write = [], 100 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_files " + "-f %d --base-file-name file %s" % (self.script_upload_path, + num_files_to_write, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Get online bricks list + online_bricks = get_online_bricks_list(self.mnode, self.volname) + g.log.info("Online Bricks for volume %s : %s", + self.volname, online_bricks) + + # Get the nodes where bricks are running + bring_offline_glustershd_nodes = [] + for brick in online_bricks: + bring_offline_glustershd_nodes.append(brick.split(":")[0]) + g.log.info("self heal deamon on nodes %s to be killed", + bring_offline_glustershd_nodes) + + # Kill the self heal daemon process on nodes + ret = bring_self_heal_daemon_process_offline( + bring_offline_glustershd_nodes) + self.assertTrue(ret, ("Unable to bring self heal daemon process" + " offline for nodes %s" + % bring_offline_glustershd_nodes)) + g.log.info("Sucessfully brought down self heal process for " + "nodes %s", bring_offline_glustershd_nodes) + + # Check the heal info + heal_info = get_heal_info_summary(self.mnode, self.volname) + g.log.info("Successfully got heal info %s for the volume %s", + heal_info, self.volname) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline, 'glusterd_restart') + self.assertTrue(ret, ("Failed to bring bricks: %s online" + % bricks_to_bring_offline)) + + # Issue heal + ret = trigger_heal_full(self.mnode, self.volname) + self.assertFalse(ret, ("Able to trigger heal on volume %s where " + "self heal daemon is not running" + % self.volname)) + g.log.info("Expected : Unable to trigger heal on volume %s where " + "self heal daemon is not running", self.volname) + + # Wait for 130 sec to heal + ret = monitor_heal_completion(self.mnode, self.volname, 130) + self.assertFalse(ret, ("Heal Completed on volume %s" % self.volname)) + g.log.info("Expected : Heal pending on volume %s", self.volname) + + # Check the heal info + heal_info_after_triggering_heal = get_heal_info_summary(self.mnode, + self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + + # Compare with heal pending with the files wrote + for node in online_bricks: + self.assertGreaterEqual( + int(heal_info_after_triggering_heal[node]['numberOfEntries']), + num_files_to_write, + ("Some of the files are healed from source bricks %s where " + "self heal daemon is not running" % node)) + g.log.info("EXPECTED: No files are healed from source bricks where " + "self heal daemon is not running") + + # Unmount and Mount volume again as volume options were set + # after mounting the volume + for mount_obj in self.mounts: + ret, _, _ = umount_volume(mount_obj.client_system, + mount_obj.mountpoint) + self.assertEqual(ret, 0, "Failed to unmount %s" + % mount_obj.client_system) + ret, _, _ = mount_volume(self.volname, + mtype='glusterfs', + mpoint=mount_obj.mountpoint, + mserver=self.mnode, + mclient=mount_obj.client_system) + self.assertEqual(ret, 0, "Failed to mount %s" + % mount_obj.client_system) + + all_mounts_procs = [] + for mount_obj in self.mounts: + cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;" + " done " % (mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "Reads failed on some of the clients") + g.log.info("Reads successful on all mounts") + + # Wait for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "Unable to heal the pending entries") + g.log.info("Successfully healed the pending entries for volume %s", + self.volname) diff --git a/tests/functional/afr/test_add_brick_followed_by_remove_brick.py b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py new file mode 100644 index 000000000..a653b792d --- /dev/null +++ b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py @@ -0,0 +1,170 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.dht_test_utils import is_layout_complete +from glustolibs.gluster.glusterfile import (file_exists, + occurences_of_pattern_in_file) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume, shrink_volume +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['replicated'], ['glusterfs']]) +class TestAddBrickFollowedByRemoveBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + cls.first_client = cls.mounts[0].client_system + cls.mountpoint = cls.mounts[0].mountpoint + cls.is_io_running = False + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not file_exists(cls.first_client, cls.script_upload_path): + if not upload_scripts(cls.first_client, cls.script_upload_path): + raise ExecutionError( + "Failed to upload IO scripts to client %s" + % cls.first_client) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if self.is_io_running: + if not wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[0]]): + raise ExecutionError("IO failed on some of the clients") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_layout_of_bricks(self): + """Check the layout of bricks""" + ret = is_layout_complete(self.mnode, self.volname, "/") + self.assertTrue(ret, ("Volume %s: Layout is not complete", + self.volname)) + g.log.info("Volume %s: Layout is complete", self.volname) + + def _add_brick_and_wait_for_rebalance_to_complete(self): + """Add brick and wait for rebalance to complete""" + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + self._check_layout_of_bricks() + + def _remove_brick_from_volume(self): + """Remove bricks from volume""" + # Remove bricks from the volume + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=2000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + def test_add_brick_followed_by_remove_brick(self): + """ + Test case: + 1. Create a volume, start it and mount it to a client. + 2. Start I/O on volume. + 3. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 1x3 should now be 2x3) + 4. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 2x3 should now be 3x3) + 5. Remove brick from volume such that it becomes a 2x3. + 6. Remove brick from volume such that it becomes a 1x3. + 7. Wait for I/O to complete and check for any input/output errors in + both client and rebalance logs. + """ + # Start I/O on mount point + self.all_mounts_procs = [] + cmd = ("/usr/bin/env python {} create_deep_dirs_with_files " + "--dirname-start-num {} --dir-depth 5 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 {}" + .format(self.script_upload_path, 10, self.mountpoint)) + proc = g.run_async(self.first_client, cmd) + self.all_mounts_procs.append(proc) + self.is_io_running = True + + # Convert 1x3 to 2x3 and then convert 2x3 to 3x3 + for _ in range(0, 2): + self._add_brick_and_wait_for_rebalance_to_complete() + + # Convert 3x3 to 2x3 and then convert 2x3 to 1x3 + for _ in range(0, 2): + self._remove_brick_from_volume() + + # Validate I/O processes running on the nodes + ret = validate_io_procs(self.all_mounts_procs, [self.mounts[0]]) + self.is_io_running = False + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO on all mounts: Complete") + + # Check for Input/output errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "Input/output error", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + + # Check for Input/output errors in client logs + ret = occurences_of_pattern_in_file( + self.first_client, "Input/output error", + "/var/log/glusterfs/mnt-{}_{}.log".format(self.volname, + self.mount_type)) + self.assertEqual(ret, 0, + "[Input/output error] present in client log file") + g.log.info("Expanding and shrinking volume successful and no I/O " + "errors see in rebalance and client logs") diff --git a/tests/functional/afr/test_afr_cli_gfid_splitbrain.py b/tests/functional/afr/test_afr_cli_gfid_splitbrain.py index ec3f803e9..d99aa5b36 100644 --- a/tests/functional/afr/test_afr_cli_gfid_splitbrain.py +++ b/tests/functional/afr/test_afr_cli_gfid_splitbrain.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,8 +15,8 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # pylint: disable=too-many-statements, too-many-locals - from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.brick_libs import (get_all_bricks, @@ -38,16 +38,14 @@ class TestSelfHeal(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -79,7 +77,7 @@ class TestSelfHeal(GlusterBaseClass): raise ExecutionError("Failed to create volume") g.log.info("Successful in cleaning up Volume %s", cls.volname) - GlusterBaseClass.tearDownClass.im_func(cls) + cls.get_super_method(cls, 'tearDownClass')() def test_afr_gfid_heal(self): @@ -111,9 +109,10 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating a file from mount point") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 1 --base-file-name test_file --fixed-file-size 1k %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 1 --base-file-name test_file --fixed-file-size 1k %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd) all_mounts_procs.append(proc) # Validate I/O @@ -137,9 +136,10 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating a new file of same name from mount point") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 1 --base-file-name test_file --fixed-file-size 1k %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 1 --base-file-name test_file --fixed-file-size 1k %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd) all_mounts_procs.append(proc) # Validate I/O diff --git a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py index e9a6681f4..1acd11faa 100644 --- a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py +++ b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,8 +15,8 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # pylint: disable=too-many-statements, too-many-locals, unused-variable - from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.brick_libs import (get_all_bricks, @@ -41,16 +41,14 @@ class TestSelfHeal(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -82,7 +80,7 @@ class TestSelfHeal(GlusterBaseClass): raise ExecutionError("Failed to create volume") g.log.info("Successful in cleaning up Volume %s", cls.volname) - GlusterBaseClass.tearDownClass.im_func(cls) + cls.get_super_method(cls, 'tearDownClass')() def test_afr_gfid_heal(self): @@ -114,16 +112,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 files from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("python %s create_files " - "-f 5 --base-file-name test_file --fixed-file-size 1k %s" - % (self.script_upload_path, mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 1k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a file from mount point") @@ -150,16 +148,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 new files of same name from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("python %s create_files " - "-f 5 --base-file-name test_file --fixed-file-size 10k %s" - % (self.script_upload_path, mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 10k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a new file of same name " @@ -225,10 +223,11 @@ class TestSelfHeal(GlusterBaseClass): fpath = (self.mounts[0].mountpoint + '/test_file' + str(fcount) + '.txt') status = get_fattr(self.mounts[0].client_system, - fpath, 'replica.split-brain-status') + fpath, 'replica.split-brain-status', + encode="text") compare_string = ("The file is not under data or metadata " "split-brain") - self.assertEqual(status.rstrip('\x00'), compare_string, + self.assertEqual(status, compare_string, "file test_file%s is under" " split-brain" % str(fcount)) g.log.info("none of the files are under split-brain") diff --git a/tests/functional/afr/test_afr_dir_entry_creation_with_subvol_down.py b/tests/functional/afr/test_afr_dir_entry_creation_with_subvol_down.py new file mode 100644 index 000000000..9cc249e3c --- /dev/null +++ b/tests/functional/afr/test_afr_dir_entry_creation_with_subvol_down.py @@ -0,0 +1,194 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.dht_test_utils import (create_brickobjectlist, + find_specific_hashed) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols) +from glustolibs.gluster.mount_ops import umount_volume, mount_volume + + +@runs_on([['distributed-arbiter', 'distributed-replicated'], ['glusterfs']]) +class TestAfrDirEntryCreationWithSubvolDown(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Changing the distrubte count to 3 as per the test. + self.volume['voltype']['dist_count'] = 3 + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_file_exists(self, subvol, directory, exists=True): + """ Validates given directory present on brick path of each subvol """ + for each_brick in subvol: + node, brick_path = each_brick.split(":") + path = brick_path + directory + ret = file_exists(node, path) + self.assertEqual(exists, ret, "Unexpected behaviour, existence " + "check of directory {} on brick returned" + " {}".format(directory, each_brick)) + + def _create_file(self, location, file_name): + """ Creates a file with file_name on the specified location""" + source_file = "{}/{}".format(location, file_name) + ret, _, err = g.run(self.mounts[0].client_system, + ("touch %s" % source_file)) + self.assertEqual(ret, 0, ("Failed to create {} on {}: err" + " {}".format(source_file, location, err))) + g.log.info("Successfully created %s on: %s", file_name, location) + + def _create_number_of_files_on_the_subvol(self, subvol_object, directory, + number_of_files, mountpath): + """Creates number of files specified on the given subvol""" + name = None + for _ in range(number_of_files): + hashed = find_specific_hashed(self.subvols, directory, + subvol_object, existing_names=name) + self.assertIsNotNone(hashed, "Couldn't find a subvol to " + "create a file.") + self._create_file(mountpath, hashed.newname) + name = hashed.newname + + def test_afr_dir_entry_creation_with_subvol_down(self): + """ + 1. Create a distributed-replicated(3X3)/distributed-arbiter(3X(2+1)) + and mount it on one client + 2. Kill 3 bricks corresponding to the 1st subvol + 3. Unmount and remount the volume on the same client + 4. Create deep dir from mount point + mkdir -p dir1/subdir1/deepdir1 + 5. Create files under dir1/subdir1/deepdir1; touch <filename> + 6. Now bring all sub-vols up by volume start force + 7. Validate backend bricks for dir creation, the subvol which is + offline will have no dirs created, whereas other subvols will have + dirs created from step 4 + 8. Trigger heal from client by "#find . | xargs stat" + 9. Verify that the directory entries are created on all back-end bricks + 10. Create new dir (dir2) on location dir1/subdir1/deepdir1 + 11. Trigger rebalance and wait for the completion + 12. Check backend bricks for all entries of dirs + 13. Check if files are getting created on the subvol which was offline + """ + # Bring down first subvol of bricks offline + self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + first_subvol = self.subvols[0] + ret = bring_bricks_offline(self.volname, first_subvol) + self.assertTrue(ret, "Unable to bring {} bricks offline". + format(first_subvol)) + + # Check bricks are offline or not + ret = are_bricks_offline(self.mnode, self.volname, first_subvol) + self.assertTrue(ret, "Bricks {} are still online".format(first_subvol)) + + # Unmount and remount the volume + ret, _, _ = umount_volume( + self.mounts[0].client_system, self.mounts[0].mountpoint) + self.assertFalse(ret, "Failed to unmount volume.") + ret, _, _ = mount_volume(self.volname, self.mount_type, + self.mounts[0].mountpoint, self.mnode, + self.mounts[0].client_system) + self.assertFalse(ret, "Failed to remount volume.") + g.log.info('Successfully umounted and remounted volume.') + + # At this step, sleep is must otherwise file creation will fail + sleep(2) + + # Create dir `dir1/subdir1/deepdir1` on mountpont + directory1 = "dir1/subdir1/deepdir1" + path = self.mounts[0].mountpoint + "/" + directory1 + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + # Create files on the 2nd and 3rd subvols which are online + brickobject = create_brickobjectlist(self.subvols, directory1) + self.assertIsNotNone(brickobject, "Failed to get brick object list") + self._create_number_of_files_on_the_subvol( + brickobject[1], directory1, 5, mountpath=path) + self._create_number_of_files_on_the_subvol( + brickobject[2], directory1, 5, mountpath=path) + + # Bring bricks online using volume start force + ret, _, err = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, err) + g.log.info("Volume: %s started successfully", self.volname) + + # Check all bricks are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, "Few process after volume start are offline for " + "volume: {}".format(self.volname)) + + # Validate Directory is not created on the bricks of the subvol which + # is offline + for subvol in self.subvols: + self._check_file_exists(subvol, "/" + directory1, + exists=(subvol != first_subvol)) + + # Trigger heal from the client + cmd = "cd {}; find . | xargs stat".format(self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + + # Validate the directory1 is present on all the bricks + for subvol in self.subvols: + self._check_file_exists(subvol, "/" + directory1, exists=True) + + # Create new dir (dir2) on location dir1/subdir1/deepdir1 + directory2 = "/" + directory1 + '/dir2' + path = self.mounts[0].mountpoint + directory2 + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + # Trigger rebalance and validate the completion + ret, _, err = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, err) + g.log.info("Rebalance on volume %s started successfully", self.volname) + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, "Rebalance didn't complete on the volume: " + "{}".format(self.volname)) + + # Validate all dirs are present on all bricks in each subvols + for subvol in self.subvols: + for each_dir in ("/" + directory1, directory2): + self._check_file_exists(subvol, each_dir, exists=True) + + # Validate if files are getting created on the subvol which was + # offline + self._create_number_of_files_on_the_subvol( + brickobject[0], directory1, 5, mountpath=path) diff --git a/tests/functional/afr/test_afr_reset_brick_heal_full.py b/tests/functional/afr/test_afr_reset_brick_heal_full.py new file mode 100644 index 000000000..bdc90ee62 --- /dev/null +++ b/tests/functional/afr/test_afr_reset_brick_heal_full.py @@ -0,0 +1,157 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_ops import reset_brick +from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import rmdir +from glustolibs.gluster.glusterfile import remove_file +from glustolibs.gluster.heal_ops import trigger_heal_full +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import ( + get_subvols, wait_for_volume_process_to_be_online) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['replicated', 'distributed-replicated'], + ['glusterfs', 'nfs']]) +class TestAfrResetBrickHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ( + "/usr/share/glustolibs/io/scripts/file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients {}". + format(cls.clients)) + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it. + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Wait if any IOs are pending from the test + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + @classmethod + def tearDownClass(cls): + for each_client in cls.clients: + ret = remove_file(each_client, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to delete file {}". + format(cls.script_upload_path)) + + cls.get_super_method(cls, 'tearDownClass')() + + def test_afr_reset_brick_heal_full(self): + """ + 1. Create files/dirs from mount point + 2. With IO in progress execute reset-brick start + 3. Now format the disk from back-end, using rm -rf <brick path> + 4. Execute reset brick commit and check for the brick is online. + 5. Issue volume heal using "gluster vol heal <volname> full" + 6. Check arequal for all bricks to verify all backend bricks + including the resetted brick have same data + """ + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume") + brick_to_reset = choice(all_bricks) + + # Start reset brick + ret, _, err = reset_brick(self.mnode, self.volname, + src_brick=brick_to_reset, option="start") + self.assertEqual(ret, 0, err) + g.log.info("Reset brick: %s started", brick_to_reset) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, [brick_to_reset]) + self.assertTrue(ret, "Brick:{} is still online".format(brick_to_reset)) + + # rm -rf of the brick directory + node, brick_path = brick_to_reset.split(":") + ret = rmdir(node, brick_path, force=True) + self.assertTrue(ret, "Unable to delete the brick {} on " + "node {}".format(brick_path, node)) + + # Reset brick commit + ret, _, err = reset_brick(self.mnode, self.volname, + src_brick=brick_to_reset, option="commit") + self.assertEqual(ret, 0, err) + g.log.info("Reset brick committed successfully") + + # Check the brick is online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Few volume processess are offline for the " + "volume: {}".format(self.volname)) + + # Trigger full heal + ret = trigger_heal_full(self.mnode, self.volname) + self.assertTrue(ret, "Unable to trigger the heal full command") + + # Wait for the heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "Heal didn't complete in 20 mins time") + + # Validate io on the clients + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on the mounts") + self.all_mounts_procs *= 0 + + # Check arequal of the back-end bricks after heal completion + all_subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + for subvol in all_subvols: + ret, arequal_from_subvol = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across the" + " bricks in the subvol {}".format(subvol)) + self.assertEqual(len(set(arequal_from_subvol)), 1, "Arequal is " + "same on all the bricks in the subvol") diff --git a/tests/functional/afr/test_afr_with_snapshot.py b/tests/functional/afr/test_afr_with_snapshot.py new file mode 100644 index 000000000..e6c8daf8c --- /dev/null +++ b/tests/functional/afr/test_afr_with_snapshot.py @@ -0,0 +1,383 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Description: Test cases related to afr snapshot. +""" +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.snap_ops import (snap_create, snap_restore_complete) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.heal_libs import (is_heal_complete) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (collect_mounts_arequal, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) + + +@runs_on([['replicated', 'distributed-replicated'], + ['glusterfs', 'nfs', 'cifs']]) +class TestAFRSnapshot(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + g.log.info("Listing all files and directories is successful") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Checking brick dir and cleaning it. + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + cmd = "rm -rf " + brick + ret, _, _ = g.run(server, cmd) + if ret: + raise ExecutionError("Failed to delete the brick " + "dirs of deleted volume.") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_entry_transaction_crash_consistency_create(self): + """ + Test entry transaction crash consistency : create + + Description: + - Create IO + - Calculate arequal before creating snapshot + - Create snapshot + - Modify the data + - Stop the volume + - Restore snapshot + - Start the volume + - Get arequal after restoring snapshot + - Compare arequals + """ + + # Creating files on client side + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_files " + "--base-file-name %d -f 200 %s" + % (self.script_upload_path, + count, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + count = count + 10 + + # Wait for IO to complete + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed to complete on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Get arequal before creating snapshot + ret, result_before_snapshot = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Create snapshot + snapshot_name = ('entry_transaction_crash_consistency_create-%s-%s' + % (self.volname, self.mount_type)) + ret, _, err = snap_create(self.mnode, self.volname, snapshot_name) + self.assertEqual(ret, 0, err) + g.log.info("Snapshot %s created successfully", snapshot_name) + + # Modify the data + self.all_mounts_procs = [] + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s append %s" + % (self.script_upload_path, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed to complete on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Restore snapshot + ret = snap_restore_complete(self.mnode, self.volname, + snapshot_name) + self.assertTrue(ret, 'Failed to restore snapshot %s' + % snapshot_name) + g.log.info("Snapshot %s restored successfully", snapshot_name) + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Wait for volume graph to get loaded. + sleep(10) + + # Get arequal after restoring snapshot + ret, result_after_restoring = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Checking arequal before creating snapshot + # and after restoring snapshot + self.assertEqual(result_before_snapshot, result_after_restoring, + 'Checksums are not equal') + g.log.info('Checksums are equal') + + def test_entry_transaction_crash_consistency_delete(self): + """ + Test entry transaction crash consistency : delete + + Description: + - Create IO of 50 files + - Delete 20 files + - Calculate arequal before creating snapshot + - Create snapshot + - Delete 20 files more + - Stop the volume + - Restore snapshot + - Start the volume + - Get arequal after restoring snapshot + - Compare arequals + """ + + # Creating files on client side + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_files " + "--base-file-name %d -f 25 %s" % ( + self.script_upload_path, + count, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + count = count + 10 + + # Wait for IO to complete + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed to complete on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Delete 20 files from the dir + for mount_object in self.mounts: + self.io_validation_complete = False + g.log.info("Deleting files for %s:%s", + mount_object.client_system, mount_object.mountpoint) + command = ("for file in `ls -1 | head -n 20`;do " + "rm -rf %s/$file; done" % mount_object.mountpoint) + + ret, _, err = g.run(mount_object.client_system, command) + self.assertEqual(ret, 0, err) + self.io_validation_complete = True + g.log.info("Deleted files for %s:%s successfully", + mount_object.client_system, mount_object.mountpoint) + + # Get arequal before creating snapshot + ret, result_before_snapshot = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Create snapshot + snapshot_name = ('entry_transaction_crash_consistency_delete-%s-%s' + % (self.volname, self.mount_type)) + ret, _, err = snap_create(self.mnode, self.volname, snapshot_name) + self.assertEqual(ret, 0, err) + g.log.info("Snapshot %s created successfully", snapshot_name) + + # Delete all the remaining files + for mount_object in self.mounts: + self.io_validation_complete = False + command = ("for file in `ls -1 | head -n 20`;do " + "rm -rf %s/$file; done" % mount_object.mountpoint) + ret, _, err = g.run(mount_object.client_system, command) + self.assertEqual(ret, 0, err) + self.io_validation_complete = True + g.log.info("Deleted files for %s:%s successfully", + mount_object.client_system, mount_object.mountpoint) + + # Restore snapshot + ret = snap_restore_complete(self.mnode, self.volname, + snapshot_name) + self.assertTrue(ret, 'Failed to restore snapshot %s' + % snapshot_name) + g.log.info("Snapshot %s restored successfully", snapshot_name) + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Wait for volume graph to get loaded. + sleep(10) + + # Get arequal after restoring snapshot + ret, result_after_restoring = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Checking arequal before creating snapshot + # and after restoring snapshot + self.assertEqual(result_before_snapshot, result_after_restoring, + 'Checksums are not equal') + g.log.info('Checksums are equal') + + def test_entry_transaction_crash_consistency_rename(self): + """ + Test entry transaction crash consistency : rename + + Description: + - Create IO of 50 files + - Rename 20 files + - Calculate arequal before creating snapshot + - Create snapshot + - Rename 20 files more + - Stop the volume + - Restore snapshot + - Start the volume + - Get arequal after restoring snapshot + - Compare arequals + """ + + # Creating files on client side + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_files " + "--base-file-name %d -f 25 %s" + % (self.script_upload_path, + count, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + count = count + 10 + + # Wait for IO to complete + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed to complete on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Rename files + self.all_mounts_procs, self.io_validation_complete = [], False + cmd = ("/usr/bin/env python %s mv -s FirstRename %s" + % (self.script_upload_path, + self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]) + self.assertTrue(ret, "IO failed to complete on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Get arequal before creating snapshot + ret, result_before_snapshot = collect_mounts_arequal(self.mounts[0]) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Create snapshot + snapshot_name = ('entry_transaction_crash_consistency_rename-%s-%s' + % (self.volname, self.mount_type)) + ret, _, err = snap_create(self.mnode, self.volname, snapshot_name) + self.assertEqual(ret, 0, err) + g.log.info("Snapshot %s created successfully", snapshot_name) + + # Rename files + self.all_mounts_procs, self.io_validation_complete = [], False + cmd = ("/usr/bin/env python %s mv -s SecondRename %s" + % (self.script_upload_path, + self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]) + self.assertTrue(ret, "IO failed to complete on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Restore snapshot + ret = snap_restore_complete(self.mnode, self.volname, + snapshot_name) + self.assertTrue(ret, 'Failed to restore snapshot %s' + % snapshot_name) + g.log.info("Snapshot %s restored successfully", snapshot_name) + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Wait for volume graph to get loaded. + sleep(10) + + # Get arequal after restoring snapshot + ret, result_after_restoring = collect_mounts_arequal(self.mounts[0]) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Checking arequal before creating snapshot + # and after restoring snapshot + self.assertEqual(result_before_snapshot, result_after_restoring, + 'Checksums are not equal') + g.log.info('Checksums are equal') diff --git a/tests/functional/afr/test_arb_to_repl_conversion_with_io.py b/tests/functional/afr/test_arb_to_repl_conversion_with_io.py new file mode 100644 index 000000000..8e54fa6ee --- /dev/null +++ b/tests/functional/afr/test_arb_to_repl_conversion_with_io.py @@ -0,0 +1,221 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from datetime import datetime, timedelta +from time import sleep, time + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_ops import add_brick, remove_brick +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import get_volume_info, set_volume_options +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +# pylint: disable=too-many-locals,too-many-statements +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestArbiterToReplicatedConversion(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + cls.script_path = '/usr/share/glustolibs/io/scripts/file_dir_ops.py' + ret = upload_scripts(cls.clients, cls.script_path) + if not ret: + raise ExecutionError('Failed to upload IO scripts to clients') + + def setUp(self): + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + def tearDown(self): + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[1]]) + if not ret: + raise ExecutionError('Wait for IO completion failed on client') + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def _get_arbiter_bricks(self): + """ + Returns tuple of arbiter bricks from the volume + """ + + # Get all subvols + subvols = get_subvols(self.mnode, self.volname) + self.assertTrue(subvols, + 'Not able to get subvols of {}'.format(self.volname)) + + # Last brick in every subvol will be the arbiter + return tuple(zip(*subvols.get('volume_subvols')))[-1] + + def test_arb_to_repl_conversion_with_io(self): + """ + Description: To perform a volume conversion from Arbiter to Replicated + with background IOs + + Steps: + - Create, start and mount an arbiter volume in two clients + - Create two dir's, fill IO in first dir and take note of arequal + - Start a continuous IO from second directory + - Convert arbiter to x2 replicated volume (remove brick) + - Convert x2 replicated to x3 replicated volume (add brick) + - Wait for ~5 min for vol file to be updated on all clients + - Enable client side heal options and issue volume heal + - Validate heal completes with no errors and arequal of first dir + matches against initial checksum + """ + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + # Fill IO in first directory + cmd = ('/usr/bin/env python {} ' + 'create_deep_dirs_with_files --dir-depth 10 ' + '--fixed-file-size 1M --num-of-files 100 ' + '--dirname-start-num 1 {}'.format(self.script_path, m_point)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Not able to fill directory with IO') + + # Take `arequal` checksum on first directory + ret, exp_arequal = collect_mounts_arequal(self.mounts[0], + m_point + '/user1') + self.assertTrue(ret, 'Failed to get arequal checksum on mount') + + # Start continuous IO from second directory + client = self.mounts[1].client_system + cmd = ('/usr/bin/env python {} ' + 'create_deep_dirs_with_files --dir-depth 10 ' + '--fixed-file-size 1M --num-of-files 250 ' + '--dirname-start-num 2 {}'.format(self.script_path, m_point)) + proc = g.run_async(client, cmd) + self.all_mounts_procs.append(proc) + + # Wait for IO to fill before volume conversion + sleep(30) + + # Remove arbiter bricks ( arbiter to x2 replicated ) + kwargs = {'replica_count': 2} + ret, _, _ = remove_brick(self.mnode, + self.volname, + self._get_arbiter_bricks(), + option='force', + **kwargs) + self.assertEqual(ret, 0, 'Not able convert arbiter to x2 replicated ' + 'volume') + # Wait for IO to fill after volume conversion + sleep(30) + + # Add bricks (x2 replicated to x3 replicated) + kwargs['replica_count'] = 3 + vol_info = get_volume_info(self.mnode, volname=self.volname) + self.assertIsNotNone(vol_info, 'Not able to get volume info') + dist_count = vol_info[self.volname]['distCount'] + bricks_list = form_bricks_list( + self.mnode, + self.volname, + number_of_bricks=int(dist_count) * 1, + servers=self.servers, + servers_info=self.all_servers_info, + ) + self.assertTrue(bricks_list, 'Not able to get unused list of bricks') + ret, _, _ = add_brick(self.mnode, + self.volname, + bricks_list, + force='True', + **kwargs) + self.assertEqual(ret, 0, 'Not able to add-brick to ' + '{}'.format(self.volname)) + # Wait for IO post x3 replicated volume conversion + sleep(30) + + # Validate volume info + vol_info = get_volume_info(self.mnode, volname=self.volname) + self.assertIsNotNone(vol_info, 'Not able to get volume info') + vol_info = vol_info[self.volname] + repl_count, brick_count = (vol_info['replicaCount'], + vol_info['brickCount']) + + # Wait for the volfile to sync up on clients + cmd = ('grep -ir connected {}/.meta/graphs/active/{}-client-*/private ' + '| wc -l') + wait_time = time() + 300 + in_sync = False + while time() <= wait_time: + ret, rout, _ = g.run(client, cmd.format(m_point, self.volname)) + self.assertEqual(ret, 0, + 'Not able to grep for volfile sync from client') + if int(rout) == int(brick_count): + in_sync = True + break + sleep(30) + self.assertTrue( + in_sync, 'Volfiles from clients are not synced even ' + 'after polling for ~5 min') + + self.assertEqual( + int(repl_count), kwargs['replica_count'], 'Not able ' + 'to validate x2 to x3 replicated volume conversion') + + # Enable client side heal options, trigger and monitor heal + ret = set_volume_options( + self.mnode, self.volname, { + 'data-self-heal': 'on', + 'entry-self-heal': 'on', + 'metadata-self-heal': 'on' + }) + self.assertTrue(ret, 'Unable to set client side heal options') + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger heal on volume') + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, + 'Heal is not completed for {}'.format(self.volname)) + + # Validate IO + prev_time = datetime.now().replace(microsecond=0) + ret = validate_io_procs(self.all_mounts_procs, [self.mounts[1]]) + curr_time = datetime.now().replace(microsecond=0) + self.assertTrue(ret, 'Not able to validate completion of IO on mount') + self.all_mounts_procs *= 0 + + # To ascertain IO was happening during brick operations + self.assertGreater( + curr_time - prev_time, timedelta(seconds=10), 'Unable ' + 'to validate IO was happening during brick operations') + + # Take and validate `arequal` checksum on first directory + ret, act_areequal = collect_mounts_arequal(self.mounts[1], + m_point + '/user1') + self.assertTrue(ret, 'Failed to get arequal checksum from mount') + self.assertEqual( + exp_arequal, act_areequal, '`arequal` checksum did ' + 'not match post arbiter to x3 replicated volume conversion') + + g.log.info('PASS: Arbiter to x3 replicated volume conversion complete') diff --git a/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py b/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py index df75b771a..4a695c241 100644 --- a/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py +++ b/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py @@ -1,6 +1,23 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import calendar import time + from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.brick_libs import (bring_bricks_offline, @@ -26,16 +43,14 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -49,7 +64,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -92,7 +107,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_brick_process_not_started_on_read_only_node_disks(self): """ @@ -111,10 +126,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -133,9 +145,10 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_files -f 100 %s/%s/test_dir" - % (self.script_upload_path, mount_obj.mountpoint, - mount_obj.client_system)) + cmd = ("/usr/bin/env python %s create_files -f 100 " + "%s/%s/test_dir" % ( + self.script_upload_path, + mount_obj.mountpoint, mount_obj.client_system)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_client_side_quorum_with_auto_option.py b/tests/functional/afr/test_client_side_quorum_with_auto_option.py index 79e167f9d..8a319f609 100755 --- a/tests/functional/afr/test_client_side_quorum_with_auto_option.py +++ b/tests/functional/afr/test_client_side_quorum_with_auto_option.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ """ Description: Test Cases in this module tests the client side quorum. """ - from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import ( @@ -42,20 +42,20 @@ class ClientSideQuorumTests(GlusterBaseClass): """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py" - cls.script_upload_path = script_abs_path - ret = upload_scripts(cls.clients, script_abs_path) + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients") def setUp(self): # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) @@ -73,7 +73,7 @@ class ClientSideQuorumTests(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # Calling GlusterBaseClass tearDown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_client_side_quorum_with_auto_option(self): """ @@ -98,9 +98,10 @@ class ClientSideQuorumTests(GlusterBaseClass): # write files on all mounts g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) - cmd = ("python %s create_files " - "-f 10 --base-file-name file %s" % (self.script_upload_path, - self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) ret, _, err = g.run(self.mounts[0].client_system, cmd) self.assertFalse(ret, "IO failed on %s with %s" % (self.mounts[0].client_system, err)) diff --git a/tests/functional/afr/test_client_side_quorum_with_auto_option_overwrite_fixed.py b/tests/functional/afr/test_client_side_quorum_with_auto_option_overwrite_fixed.py index 77884bd4d..1a6a85946 100755 --- a/tests/functional/afr/test_client_side_quorum_with_auto_option_overwrite_fixed.py +++ b/tests/functional/afr/test_client_side_quorum_with_auto_option_overwrite_fixed.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ """ Description: Test Cases in this module tests the client side quorum. """ - from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import ( @@ -43,14 +43,14 @@ class ClientSideQuorumTests(GlusterBaseClass): """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py" - cls.script_upload_path = script_abs_path - ret = upload_scripts(cls.clients, [script_abs_path]) + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients") @@ -60,7 +60,7 @@ class ClientSideQuorumTests(GlusterBaseClass): """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) @@ -82,7 +82,7 @@ class ClientSideQuorumTests(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # Calling GlusterBaseClass tearDown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_client_side_quorum_with_auto_option_overwrite_fixed(self): """ @@ -150,9 +150,10 @@ class ClientSideQuorumTests(GlusterBaseClass): # create files g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) - cmd = ("python %s create_files " - "-f 10 --base-file-name file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) ret, _, err = g.run(self.mounts[0].client_system, cmd) self.assertFalse(ret, "IO failed on %s with '%s'" % (self.mounts[0].client_system, err)) @@ -181,9 +182,10 @@ class ClientSideQuorumTests(GlusterBaseClass): # create files g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) - cmd = ("python %s create_files " - "-f 10 --base-file-name second_file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name second_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) ret, _, err = g.run(self.mounts[0].client_system, cmd) self.assertFalse(ret, "IO failed on %s with '%s'" % (self.mounts[0].client_system, err)) diff --git a/tests/functional/afr/test_client_side_quorum_with_cross2.py b/tests/functional/afr/test_client_side_quorum_with_cross2.py index 142fcb886..6df722fb8 100644 --- a/tests/functional/afr/test_client_side_quorum_with_cross2.py +++ b/tests/functional/afr/test_client_side_quorum_with_cross2.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ """ Description: Test Cases in this module tests the client side quorum. """ - from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import ( @@ -41,16 +41,14 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -79,7 +77,7 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) @@ -101,7 +99,7 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # Calling GlusterBaseClass tearDown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_client_side_quorum_with_auto_option_cross2(self): """ @@ -126,9 +124,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # Start IO on mounts g.log.info("Starting IO .....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name file %s" % (self.script_upload_path, - self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -163,9 +162,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # create new file named newfile0.txt g.log.info("Start creating new file on all mounts...") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 1 --base-file-name newfile %s" % - (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 1 --base-file-name newfile %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -179,8 +179,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # create directory user1 g.log.info("Start creating directory on all mounts...") all_mounts_procs = [] - cmd = ("python %s create_deep_dir %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s create_deep_dir %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -249,8 +250,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Starting reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -274,8 +276,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # stat on dir g.log.info("stat on directory on all mounts") for mount_obj in self.mounts: - cmd = ("python %s stat %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s stat %s" % ( + self.script_upload_path, + mount_obj.mountpoint) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, 'Failed to stat directory on %s' % mount_obj.mountpoint) @@ -285,8 +288,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # ls on mount point g.log.info("ls on mount point on all mounts") for mount_obj in self.mounts: - cmd = ("python %s ls %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s ls %s" % ( + self.script_upload_path, + mount_obj.mountpoint) ret, _, _ = g.run(mount_obj.client_system, cmd) self.assertFalse(ret, 'Failed to ls on %s' % mount_obj.mountpoint) @@ -353,9 +357,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -370,8 +375,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -409,9 +415,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name second_file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name second_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -426,8 +433,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -450,9 +458,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mount.....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name third_file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name third_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -467,8 +476,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -491,9 +501,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - read must pass, write will fail g.log.info("Starting IO on mount......") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name fourth_file %s" % - (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name fourth_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -510,8 +521,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -535,9 +547,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mount.....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name fifth_file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name fifth_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -552,8 +565,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -576,9 +590,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - read must pass, write will fail g.log.info("Start creating files on mounts.....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name sixth_file %s" % - (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name sixth_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -595,8 +610,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -619,9 +635,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mount.....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name seventh_file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name seventh_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -636,8 +653,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -661,9 +679,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mount.....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name eigth_file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name eigth_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -678,8 +697,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -712,9 +732,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - read must pass, write will fail g.log.info("Start creating files on mounts.....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name ninth_file %s" % - (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name ninth_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -731,8 +752,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -755,9 +777,10 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mount.....") all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name tenth_file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name tenth_file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -772,8 +795,9 @@ class ClientSideQuorumCross2Tests(GlusterBaseClass): g.log.info("Start reading files on all mounts") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s read " - "%s" % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py b/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py index e074460e0..0ead8b2fc 100755 --- a/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py +++ b/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,15 +17,16 @@ """ Description: Test Cases in this module tests the client side quorum. """ - from time import sleep + from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on -from glustolibs.gluster.volume_libs import ( - set_volume_options, get_subvols) +from glustolibs.gluster.volume_libs import get_subvols from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.gluster.volume_ops import reset_volume_option +from glustolibs.gluster.volume_ops import (set_volume_options, + reset_volume_option) from glustolibs.gluster.brick_libs import (bring_bricks_offline, bring_bricks_online) from glustolibs.io.utils import (validate_io_procs, @@ -45,14 +46,14 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): Upload the necessary scripts to run tests. """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py" - cls.script_upload_path = script_abs_path - ret = upload_scripts(cls.clients, [script_abs_path]) + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients") @@ -61,7 +62,7 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): setUp method for every test """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) @@ -96,7 +97,7 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # Calling GlusterBaseClass tearDown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_client_side_quorum_with_fixed_for_cross3(self): """ @@ -150,9 +151,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/O( write ) - must succeed all_mounts_procs = [] g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) - cmd = ("python %s create_files " - "-f 10 --base-file-name file %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name file %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -165,8 +167,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # read the file g.log.info("Start reading files on %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -200,9 +203,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name testfile %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name testfile %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -216,8 +220,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Start reading files on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -245,9 +250,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name newfile %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name newfile %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -261,8 +267,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Start reading files on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -283,9 +290,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name filename %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name filename %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -299,8 +307,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Start reading files on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -370,9 +379,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name newfilename %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name newfilename %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -386,8 +396,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Start reading files on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -411,9 +422,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name textfile %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name textfile %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -427,8 +439,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Start reading files on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -449,9 +462,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name newtextfile %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name newtextfile %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -465,8 +479,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Start reading files on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -635,9 +650,10 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): # start I/0 ( write and read ) - must succeed g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s create_files " - "-f 10 --base-file-name lastfile %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name lastfile %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) @@ -651,8 +667,9 @@ class ClientSideQuorumTestsWithSingleVolumeCross3(GlusterBaseClass): g.log.info("Start reading files on mountpoint %s", self.mounts[0].mountpoint) all_mounts_procs = [] - cmd = ("python %s read %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, cmd, user=self.mounts[0].user) all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_client_side_quorum_with_fixed_validate_max_bricks.py b/tests/functional/afr/test_client_side_quorum_with_fixed_validate_max_bricks.py index 67929cc86..8e8652f6e 100755 --- a/tests/functional/afr/test_client_side_quorum_with_fixed_validate_max_bricks.py +++ b/tests/functional/afr/test_client_side_quorum_with_fixed_validate_max_bricks.py @@ -42,14 +42,14 @@ class ClientSideQuorumTests(GlusterBaseClass): """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py" - cls.script_upload_path = script_abs_path - ret = upload_scripts(cls.clients, script_abs_path) + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients") @@ -59,7 +59,7 @@ class ClientSideQuorumTests(GlusterBaseClass): """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) @@ -92,7 +92,7 @@ class ClientSideQuorumTests(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # Calling GlusterBaseClass tearDown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_client_side_quorum_with_fixed_validate_max_bricks(self): """ diff --git a/tests/functional/afr/test_client_side_quorum_with_multiple_volumes.py b/tests/functional/afr/test_client_side_quorum_with_multiple_volumes.py index e9f38b3f9..e3ed906f2 100644 --- a/tests/functional/afr/test_client_side_quorum_with_multiple_volumes.py +++ b/tests/functional/afr/test_client_side_quorum_with_multiple_volumes.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,6 @@ """ Description: Test Cases in this module tests the client side quorum. """ - import tempfile from glusto.core import Glusto as g @@ -44,16 +43,14 @@ class ClientSideQuorumTestsMultipleVols(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -153,7 +150,7 @@ class ClientSideQuorumTestsMultipleVols(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -180,7 +177,7 @@ class ClientSideQuorumTestsMultipleVols(GlusterBaseClass): g.log.info("Listing all files and directories is successful") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() @classmethod def tearDownClass(cls): @@ -208,7 +205,7 @@ class ClientSideQuorumTestsMultipleVols(GlusterBaseClass): cls.client) # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDownClass.im_func(cls) + cls.get_super_method(cls, 'tearDownClass')() def test_client_side_quorum_auto_local_to_volume_not_cluster(self): """ @@ -229,9 +226,10 @@ class ClientSideQuorumTestsMultipleVols(GlusterBaseClass): for mount_point in self.mount_points: self.all_mounts_procs = [] g.log.info('Creating files...') - command = ("python %s create_files -f 50 " - "--fixed-file-size 1k %s" - % (self.script_upload_path, mount_point)) + command = ("/usr/bin/env python %s create_files -f 50 " + "--fixed-file-size 1k %s" % ( + self.script_upload_path, + mount_point)) proc = g.run_async(self.mounts[0].client_system, command) self.all_mounts_procs.append(proc) @@ -332,15 +330,15 @@ class ClientSideQuorumTestsMultipleVols(GlusterBaseClass): # merge two dicts (volname: file_to_delete) and (volname: mountpoint) temp_dict = [vols_file_list, self.mount_points_and_volnames] file_to_delete_to_mountpoint_dict = {} - for k in vols_file_list.iterkeys(): + for k in vols_file_list: file_to_delete_to_mountpoint_dict[k] = ( tuple(file_to_delete_to_mountpoint_dict[k] for file_to_delete_to_mountpoint_dict in temp_dict)) # create files on all volumes and check for result - for volname, file_and_mountpoint in \ - file_to_delete_to_mountpoint_dict.iteritems(): + for volname, file_and_mountpoint in ( + file_to_delete_to_mountpoint_dict.items()): filename, mountpoint = file_and_mountpoint # check for ROFS error for read-only file system for diff --git a/tests/functional/afr/test_conservative_merge_of_files_heal_command.py b/tests/functional/afr/test_conservative_merge_of_files_heal_command.py index 06514b972..eab9d870e 100644 --- a/tests/functional/afr/test_conservative_merge_of_files_heal_command.py +++ b/tests/functional/afr/test_conservative_merge_of_files_heal_command.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import set_volume_options @@ -45,16 +46,14 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -71,7 +70,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -106,7 +105,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_conservative_merge_of_files_heal_command(self): """ @@ -160,9 +159,10 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_deep_dirs_with_files " - "-d 0 -l 5 -f 10 --dirname-start-num 1 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 0 -l 5 -f 10 --dirname-start-num 1 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -204,9 +204,10 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_deep_dirs_with_files " - "-d 0 -l 5 -f 10 --dirname-start-num 6 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 0 -l 5 -f 10 --dirname-start-num 6 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -320,6 +321,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): g.log.info('Arequals for mountpoint and %s are equal', brick) g.log.info('All arequals are equal for replicated') - self.assertNotEqual(cmp(arequals_before_heal, arequals_after_heal), 0, - 'Arequals are equal for bricks ' - 'before and after healing') + self.assertNotEqual( + arequals_before_heal, arequals_after_heal, + 'Arequals are equal for bricks before (%s) and after (%s) ' + 'healing' % (arequals_before_heal, arequals_after_heal)) diff --git a/tests/functional/afr/test_creating_deleting_files_reflect_available_space.py b/tests/functional/afr/test_creating_deleting_files_reflect_available_space.py new file mode 100644 index 000000000..edf4569bc --- /dev/null +++ b/tests/functional/afr/test_creating_deleting_files_reflect_available_space.py @@ -0,0 +1,160 @@ +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import wait_for_io_to_complete + + +@runs_on([['replicated', 'distributed-replicated'], ['glusterfs']]) +class VerifyAvaliableSpaceBeforeAfterDelete(GlusterBaseClass): + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + g.log.info("Upload io scripts to clients %s for running IO on mounts", + cls.clients) + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + + Cleanup and umount volume + """ + if not self.io_validation_complete: + g.log.info("Wait for IO to complete as IO validation did not " + "succeed in test method") + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_creating_deleting_files_reflect_available_space(self): + """ + - note the current available space on the mount + - create 1M file on the mount + - note the current available space on the mountpoint and compare + with space before creation + - remove the file + - note the current available space on the mountpoint and compare + with space before creation + """ + + # Create 1M file on client side + g.log.info('Creating file on %s', self.mounts[0].mountpoint) + cmd = ("/usr/bin/env python %s create_files -f 1" + " --fixed-file-size 1M %s" % (self.script_upload_path, + self.mounts[0].mountpoint)) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, err) + + # Get the current available space on the mount + g.log.info('Getting the current available space on the mount...') + cmd = ("df --output=avail %s | grep '[0-9]'" + % self.mounts[0].mountpoint) + ret, out, err = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, err) + space_before_file_creation = int(out) + + # Create 1M file on client side + g.log.info('Creating file on %s', self.mounts[0].mountpoint) + cmd = ("/usr/bin/env python %s create_files -f 1 " + "--fixed-file-size 1M --base-file-name newfile %s/newdir" + % (self.script_upload_path, self.mounts[0].mountpoint)) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, err) + + # Get the current available space on the mount + g.log.info('Getting the current available space on the mount...') + cmd = ("df --output=avail %s | grep '[0-9]'" + % self.mounts[0].mountpoint) + ret, out, err = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, err) + space_after_file_creation = int(out) + + # Compare available size before creation and after creation file + g.log.info('Comparing available size before creation ' + 'and after creation file...') + space_diff = space_before_file_creation - space_after_file_creation + space_diff = round(space_diff / 1024) + g.log.info('Space difference is %d', space_diff) + self.assertEqual(space_diff, 1.0, + 'Available size before creation and ' + 'after creation file is not valid') + g.log.info('Available size before creation and ' + 'after creation file is valid') + + # Delete file on client side + g.log.info('Deleting file on %s', self.mounts[0].mountpoint) + cmd = ("/usr/bin/env python %s delete %s/newdir" + % (self.script_upload_path, self.mounts[0].mountpoint)) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, err) + + # Get the current available space on the mount + cmd = ("df --output=avail %s | grep '[0-9]'" + % self.mounts[0].mountpoint) + ret, out, err = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, err) + space_after_file_deletion = int(out) + + # Compare available size before creation and after deletion file + g.log.info('Comparing available size before creation ' + 'and after deletion file...') + space_diff = space_before_file_creation - space_after_file_deletion + space_diff_comp = space_diff < 200 + self.assertTrue(space_diff_comp, + 'Available size before creation is not proportional ' + 'to the size after deletion file') + g.log.info('Available size before creation is proportional ' + 'to the size after deletion file') diff --git a/tests/functional/afr/test_default_granular_entry_heal.py b/tests/functional/afr/test_default_granular_entry_heal.py new file mode 100644 index 000000000..91ca25907 --- /dev/null +++ b/tests/functional/afr/test_default_granular_entry_heal.py @@ -0,0 +1,235 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated', + 'arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestDefaultGranularEntryHeal(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(choice(subvol)) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'mydir')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/mydir'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def test_default_granular_entry_heal(self): + """ + Test case: + 1. Create a cluster. + 2. Create volume start it and mount it. + 3. Check if cluster.granular-entry-heal is ON by default or not. + 4. Check /var/lib/glusterd/<volname>/info for + cluster.granular-entry-heal=on. + 5. Check if option granular-entry-heal is present in the + volume graph or not. + 6. Kill one or two bricks of the volume depending on volume type. + 7. Create all types of files on the volume like text files, hidden + files, link files, dirs, char device, block device and so on. + 8. Bring back the killed brick by restarting the volume. + 9. Wait for heal to complete. + 10. Check arequal-checksum of all the bricks and see if it's proper or + not. + """ + # Check if cluster.granular-entry-heal is ON by default or not + ret = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + self.assertEqual(ret['cluster.granular-entry-heal'], 'on', + "Value of cluster.granular-entry-heal not on " + "by default") + + # Check var/lib/glusterd/<volname>/info for + # cluster.granular-entry-heal=on + ret = occurences_of_pattern_in_file(self.mnode, + 'cluster.granular-entry-heal=on', + '/var/lib/glusterd/vols/{}/info' + .format(self.volname)) + self.assertEqual(ret, 1, "Failed get cluster.granular-entry-heal=on in" + " info file") + + # Check if option granular-entry-heal is present in the + # volume graph or not + ret = occurences_of_pattern_in_file(self.first_client, + 'option granular-entry-heal on', + "/var/log/glusterfs/mnt-{}_{}.log" + .format(self.volname, + self.mount_type)) + self.assertTrue(ret > 0, + "Failed to find granular-entry-heal in volume graph") + g.log.info("granular-entry-heal properly set to ON by default") + + # Kill one or two bricks of the volume depending on volume type + self._bring_bricks_offline() + + # Create all types of files on the volume like text files, hidden + # files, link files, dirs, char device, block device and so on + cmd = ("cd {};mkdir mydir;cd mydir;mkdir dir;mkdir .hiddendir;" + "touch file;touch .hiddenfile;mknod blockfile b 1 5;" + "mknod charfile b 1 5; mkfifo pipefile;touch fileforhardlink;" + "touch fileforsoftlink;ln fileforhardlink hardlinkfile;" + "ln -s fileforsoftlink softlinkfile".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files of all types") + + # Bring back the killed brick by restarting the volume Bricks should + # be online again + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check arequal-checksum of all the bricks and see if it's proper or + # not + self._check_arequal_checksum_for_the_volume() diff --git a/tests/functional/afr/test_dir_gfid_heal_on_all_subvols.py b/tests/functional/afr/test_dir_gfid_heal_on_all_subvols.py new file mode 100644 index 000000000..dcce8b418 --- /dev/null +++ b/tests/functional/afr/test_dir_gfid_heal_on_all_subvols.py @@ -0,0 +1,151 @@ +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# pylint: disable=too-many-statements, too-many-locals +""" +Description: + Test cases in this module tests whether directory with null gfid + is getting the gfids assigned on both the subvols of a dist-rep + volume when lookup comes on that directory from the mount point. +""" + + +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterfile import (get_fattr, delete_fattr) +from glustolibs.io.utils import get_mounts_stat + + +@runs_on([['replicated', 'distributed-replicated', 'distributed'], + ['glusterfs']]) +class AssignGfidsOnAllSubvols(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + # Calling GlusterBaseClass setUpClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + + # Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + g.log.info("Successful in cleaning up Volume %s", self.volname) + + self.get_super_method(self, 'tearDown')() + + def verify_gfid_and_retun_gfid(self, dirname): + dir_gfids = dict() + bricks_list = get_all_bricks(self.mnode, self.volname) + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + + ret = get_fattr(brick_node, '%s/%s' % (brick_path, dirname), + 'trusted.gfid') + if ret is not None: + self.assertIsNotNone(ret, "trusted.gfid is not present on" + "%s/%s" % (brick, dirname)) + dir_gfids.setdefault(dirname, []).append(ret) + for key in dir_gfids: + self.assertTrue(all(value == dir_gfids[key][0] + for value in dir_gfids[key]), + "gfid mismatch for %s" % dirname) + dir_gfid = dir_gfids.values()[0] + return dir_gfid + + def test_dir_gfid_heal_on_all_subvols(self): + """ + - Create a volume and mount it. + - Create a directory on mount and check whether all the bricks have + the same gfid. + - Now delete gfid attr from all but one backend bricks, + - Do lookup from the mount. + - Check whether all the bricks have the same gfid assigned. + """ + + # Create a directory on the mount + cmd = ("/usr/bin/env python %s create_deep_dir -d 0 -l 0 " + "%s/dir1" % (self.script_upload_path, + self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to create directory on mountpoint") + g.log.info("Directory created successfully on mountpoint") + + # Verify gfids are same on all the bricks and get dir1 gfid + bricks_list = get_all_bricks(self.mnode, self.volname)[1:] + dir_gfid = self.verify_gfid_and_retun_gfid("dir1") + + # Delete gfid attr from all but one backend bricks + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + ret = delete_fattr(brick_node, '%s/dir1' % (brick_path), + 'trusted.gfid') + self.assertTrue(ret, 'Failed to delete gfid for brick ' + 'path %s:%s/dir1' % (brick_node, brick_path)) + g.log.info("Successfully deleted gfid xattr for %s:%s/dir1", + brick_node, brick_path) + g.log.info("Successfully deleted gfid xattr for dir1 on the " + "following bricks %s", str(bricks_list[1:])) + + # Trigger heal from mount point + sleep(10) + for mount_obj in self.mounts: + g.log.info("Triggering heal for %s:%s", + mount_obj.client_system, mount_obj.mountpoint) + command = ('cd %s; ls -l' % mount_obj.mountpoint) + ret, _, _ = g.run(mount_obj.client_system, command) + self.assertFalse(ret, 'Failed to run lookup ' + 'on %s ' % mount_obj.client_system) + sleep(10) + + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Failed to stat lookup on clients") + g.log.info('stat lookup on clients succeeded') + + # Verify that all gfids for dir1 are same and get the gfid + dir_gfid_new = self.verify_gfid_and_retun_gfid("dir1") + self.assertTrue(all(gfid in dir_gfid for gfid in dir_gfid_new), + 'Previous gfid and new gfid are not equal, ' + 'which is not expected, previous gfid %s ' + 'and new gfid %s' % (dir_gfid, dir_gfid_new)) + g.log.info('gfid heal was successful from client lookup and all ' + 'backend bricks have same gfid xattr, no gfid mismatch') diff --git a/tests/functional/afr/test_dist_to_repl_automatic_heal_should_be_triggered.py b/tests/functional/afr/test_dist_to_repl_automatic_heal_should_be_triggered.py index b49db5336..074e18d09 100755 --- a/tests/functional/afr/test_dist_to_repl_automatic_heal_should_be_triggered.py +++ b/tests/functional/afr/test_dist_to_repl_automatic_heal_should_be_triggered.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.brick_libs import (get_all_bricks, @@ -39,16 +40,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -64,7 +63,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -95,7 +94,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_dist_to_repl_automatic_heal_should_be_triggered(self): """ @@ -117,12 +116,13 @@ class TestSelfHeal(GlusterBaseClass): for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_deep_dirs_with_files " + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dir-length 1 " "--dir-depth 1 " "--max-num-of-dirs 1 " - "--num-of-files 10 %s" % (self.script_upload_path, - mount_obj.mountpoint)) + "--num-of-files 10 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) @@ -230,8 +230,10 @@ class TestSelfHeal(GlusterBaseClass): for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_files -f 100 --fixed-file-size 1k %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 1k %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_file_permission_and_mode.py b/tests/functional/afr/test_file_permission_and_mode.py index aabfeb01f..038847aa7 100755 --- a/tests/functional/afr/test_file_permission_and_mode.py +++ b/tests/functional/afr/test_file_permission_and_mode.py @@ -57,7 +57,7 @@ class FileModeAndPermissionsTest(GlusterBaseClass): g.log.info('User %s successfully deleted on %s', user, host) def setUp(self): - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Create user qa for mount_object in self.mounts: @@ -96,7 +96,7 @@ class FileModeAndPermissionsTest(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_file_permissions(self): """ diff --git a/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py b/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py index 751275272..2f2bdae88 100644 --- a/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py +++ b/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,9 +21,10 @@ is getting the gfids assigned on both the subvols of a dist-rep volume when lookup comes on that directory from the mount point. """ - import time + from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.misc.misc_libs import upload_scripts @@ -40,40 +41,40 @@ class AssignGfidsOnAllSubvols(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) g.log.info("Successfully uploaded IO scripts to clients %s", cls.clients) + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = cls.setup_volume_and_mount_volume(cls.mounts) + ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(self.mounts) if not ret: raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + g.log.info("Successful in cleaning up Volume %s", self.volname) - GlusterBaseClass.tearDownClass.im_func(cls) + self.get_super_method(self, 'tearDown')() def verify_gfid(self, dirname): dir_gfids = dict() @@ -113,8 +114,9 @@ class AssignGfidsOnAllSubvols(GlusterBaseClass): # Create a directory on the mount g.log.info("Creating a directory") - cmd = ("python %s create_deep_dir -d 0 -l 0 %s/dir1 " - % (self.script_upload_path, self.mounts[0].mountpoint)) + cmd = "/usr/bin/env python %s create_deep_dir -d 0 -l 0 %s/dir1 " % ( + self.script_upload_path, + self.mounts[0].mountpoint) ret, _, _ = g.run(self.clients[0], cmd) self.assertEqual(ret, 0, "Failed to create directory on mountpoint") g.log.info("Directory created successfully on mountpoint") diff --git a/tests/functional/afr/test_gfid_assignment_on_lookup.py b/tests/functional/afr/test_gfid_assignment_on_lookup.py index b6aa2745e..edd154fc9 100644 --- a/tests/functional/afr/test_gfid_assignment_on_lookup.py +++ b/tests/functional/afr/test_gfid_assignment_on_lookup.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ remaining bricks when named lookup comes on those from the mount point. """ -import time +from time import sleep from glusto.core import Glusto as g from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on @@ -30,6 +30,7 @@ from glustolibs.gluster.heal_libs import is_heal_complete from glustolibs.misc.misc_libs import upload_scripts from glustolibs.gluster.brick_libs import get_all_bricks from glustolibs.gluster.glusterfile import get_fattr +from glustolibs.gluster.volume_ops import set_volume_options @runs_on([['replicated'], @@ -40,7 +41,7 @@ class AssignGfidOnLookup(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volumes if cls.volume_type == "replicated": @@ -53,54 +54,74 @@ class AssignGfidOnLookup(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) g.log.info("Successfully uploaded IO scripts to clients %s", cls.clients) + def setUp(self): + + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = cls.setup_volume_and_mount_volume(cls.mounts) + ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(self.mounts) if not ret: raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + g.log.info("Successful in cleaning up Volume %s", self.volname) - GlusterBaseClass.tearDownClass.im_func(cls) + self.get_super_method(self, 'tearDown')() def verify_gfid(self, dirname): dir_gfids = dict() bricks_list = get_all_bricks(self.mnode, self.volname) for brick in bricks_list: brick_node, brick_path = brick.split(":") - ret = get_fattr(brick_node, '%s/%s' % (brick_path, dirname), 'trusted.gfid') self.assertIsNotNone(ret, "trusted.gfid is not present on %s/%s" % (brick_path, dirname)) dir_gfids.setdefault(dirname, []).append(ret) - for key in dir_gfids: self.assertTrue(all(value == dir_gfids[key][0] for value in dir_gfids[key]), "gfid mismatch for %s" % dirname) def test_gfid_assignment_on_lookup(self): + ''' + 1) create replicate volume ( 1 * 3 ) + 2. Test the case with default afr options. + 3. Test the case with volume option 'self-heal-daemon' + 4) create dirs on bricks from backend. lets say dir1, dir2 and dir3 + 5) From mount point, + echo "hi" >dir1 ->must fail + touch dir2 --> must pass + mkdir dir3 ->must fail + 6) From mount point, + ls -l and find, must list both dir1 and dir2 and dir3 + 7) check on all backend bricks, dir1, dir2 and dir3 should be created + 8) heal info should show zero, and also gfid and other attributes + must exist + ''' + g.log.info("Enable client side healing options") + options = {"metadata-self-heal": "on", + "entry-self-heal": "on", + "data-self-heal": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Successfully set %s for volume %s", + options, self.volname) + g.log.info("Creating directories on the backend.") bricks_list = get_all_bricks(self.mnode, self.volname) i = 0 @@ -112,7 +133,7 @@ class AssignGfidOnLookup(GlusterBaseClass): g.log.info("Created directories on the backend.") # To circumvent is_fresh_file() check in glusterfs code. - time.sleep(2) + sleep(2) # Do named lookup on directories from mount ret, _, err = g.run(self.clients[0], "echo Hi > %s/dir1" @@ -139,8 +160,22 @@ class AssignGfidOnLookup(GlusterBaseClass): "supposed to.") g.log.info("Creation of directory \"dir3\" failed as expected") + g.log.info("Do a named lookup on dirs") + for number in range(1, 4): + ret, _, _ = g.run(self.clients[0], "ls %s/dir%s" + % (self.mounts[0].mountpoint, number)) + ret, _, _ = g.run(self.clients[0], "find %s/dir%s" + % (self.mounts[0].mountpoint, number)) + g.log.info("Named lookup Successful") + # Check if heal is completed - ret = is_heal_complete(self.mnode, self.volname) + counter = 0 + while True: + ret = is_heal_complete(self.mnode, self.volname) + if ret or counter > 30: + break + counter += 1 + sleep(2) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully') diff --git a/tests/functional/afr/test_gfid_heal.py b/tests/functional/afr/test_gfid_heal.py index 589a420a0..85147452d 100644 --- a/tests/functional/afr/test_gfid_heal.py +++ b/tests/functional/afr/test_gfid_heal.py @@ -36,8 +36,7 @@ class HealGfidTest(GlusterBaseClass): """ @classmethod def setUpClass(cls): - - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volumes if cls.volume_type == "replicated": @@ -48,7 +47,7 @@ class HealGfidTest(GlusterBaseClass): 'transport': 'tcp'} def setUp(self): - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() g.log.info("Starting to Setup Volume %s", self.volname) self.all_mounts_procs = [] @@ -85,7 +84,7 @@ class HealGfidTest(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def verify_gfid_and_link_count(self, dirname, filename): """ diff --git a/tests/functional/afr/test_gfid_split_brain_resolution.py b/tests/functional/afr/test_gfid_split_brain_resolution.py index a73ee407d..6e74376fc 100644 --- a/tests/functional/afr/test_gfid_split_brain_resolution.py +++ b/tests/functional/afr/test_gfid_split_brain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,260 +14,232 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from random import choice + from glusto.core import Glusto as g -from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) -from glustolibs.gluster.exceptions import ExecutionError -from glustolibs.gluster.volume_libs import get_subvols + from glustolibs.gluster.brick_libs import (bring_bricks_offline, - bring_bricks_online, - are_bricks_offline, - wait_for_bricks_to_be_online, - get_all_bricks) -from glustolibs.gluster.volume_ops import set_volume_options + bring_bricks_online) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.glusterdir import mkdir -from glustolibs.gluster.heal_ops import (enable_self_heal_daemon, - trigger_heal) from glustolibs.gluster.heal_libs import ( - is_volume_in_split_brain, - is_heal_complete, - wait_for_self_heal_daemons_to_be_online, - monitor_heal_completion) -from glustolibs.gluster.glusterfile import GlusterFile + is_volume_in_split_brain, monitor_heal_completion, + wait_for_self_heal_daemons_to_be_online) +from glustolibs.gluster.heal_ops import (enable_self_heal_daemon, trigger_heal, + trigger_heal_full) +from glustolibs.gluster.lib_utils import collect_bricks_arequal, list_files +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import set_volume_options -@runs_on([['replicated', 'distributed-replicated'], - ['glusterfs']]) +# pylint: disable=stop-iteration-return, too-many-locals, too-many-statements +@runs_on([[ + 'replicated', 'distributed-replicated', 'arbiter', 'distributed-arbiter' +], ['glusterfs']]) class TestSelfHeal(GlusterBaseClass): - """ - Description: - Test cases related to - healing in default configuration of the volume - """ - - @classmethod - def setUpClass(cls): - # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Override replica count to be 3 - if cls.volume_type == "replicated": - cls.volume['voltype'] = { - 'type': 'replicated', - 'replica_count': 3, - 'transport': 'tcp'} - - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'transport': 'tcp'} - def setUp(self): - # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() - # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = self.setup_volume_and_mount_volume(mounts=self.mounts, - volume_create_force=False) - if not ret: - raise ExecutionError("Failed to Setup_Volume and Mount_Volume") - g.log.info("Successful in Setup Volume and Mount Volume") + # A single mount is enough for the test + self.mounts = self.mounts[0::-1] - self.bricks_list = get_all_bricks(self.mnode, self.volname) + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) def tearDown(self): - """ - If test method failed before validating IO, tearDown waits for the - IO's to complete and checks for the IO exit status + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() - Cleanup and umount volume + @staticmethod + def _get_two_bricks(subvols, arbiter): """ - # Cleanup and umount volume - g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) - if not ret: - raise ExecutionError("Failed to umount the vol & cleanup Volume") - g.log.info("Successful in umounting the volume and Cleanup") - - # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) - - def toggle_bricks_and_perform_io(self, file_list, brick_list): + Yields two bricks from each subvol for dist/pure X arb/repl volumes + """ + # Get an iterator for py2/3 compatibility + brick_iter = iter(zip(*subvols)) + prev_brick = next(brick_iter) + first_brick = prev_brick + + for index, curr_brick in enumerate(brick_iter, 1): + # `yield` should contain arbiter brick for arbiter type vols + if not (index == 1 and arbiter): + yield prev_brick + curr_brick + prev_brick = curr_brick + # At the end yield first and last brick from a subvol + yield prev_brick + first_brick + + def _get_files_in_brick(self, brick_path, dir_path): """ - Kills bricks, does I/O and brings the brick back up. + Returns files in format of `dir_path/file_name` from the given brick + path """ - # Bring down bricks. - g.log.info("Going to bring down the brick process for %s", brick_list) - ret = bring_bricks_offline(self.volname, brick_list) - self.assertTrue(ret, ("Failed to bring down the bricks. Please " - "check the log file for more details.")) - g.log.info("Brought down the brick process " - "for %s successfully", brick_list) - ret = are_bricks_offline(self.mnode, self.volname, brick_list) - self.assertTrue(ret, 'Bricks %s are not offline' % brick_list) - - # Perform I/O - for filename in file_list: - fpath = self.mounts[0].mountpoint + "/test_gfid_split_brain/" + \ - filename - cmd = ("dd if=/dev/urandom of=%s bs=1024 count=1" % fpath) - ret, _, _ = g.run(self.clients[0], cmd) - self.assertEqual(ret, 0, "Creating %s failed" % fpath) - - # Bring up bricks - ret = bring_bricks_online(self.mnode, self.volname, brick_list) - self.assertTrue(ret, 'Failed to bring brick %s online' % brick_list) - g.log.info('Bringing brick %s online is successful', brick_list) - - # Waiting for bricks to come online - g.log.info("Waiting for brick process to come online") - timeout = 30 - ret = wait_for_bricks_to_be_online(self.mnode, self.volname, timeout) - self.assertTrue(ret, "bricks didn't come online after adding bricks") - g.log.info("Bricks are online") - - def resolve_gfid_split_brain(self, filename, source_brick): + node, path = brick_path.split(':') + files = list_files(node, path, dir_path) + self.assertIsNotNone( + files, 'Unable to get list of files from {}'.format(brick_path)) + + files = [file_name.rsplit('/', 1)[-1] for file_name in files] + return [ + each_file for each_file in files + if each_file in ('file1', 'file2', 'file3') + ] + + def _run_cmd_and_assert(self, cmd): """ - resolves gfid split-brain on files using source-brick option + Run `cmd` on `mnode` and assert for success """ - node, _ = source_brick.split(':') - command = ("gluster volume heal " + self.volname + " split-brain " - "source-brick " + source_brick + " " + filename) - ret, _, _ = g.run(node, command) - self.assertEqual(ret, 0, "command execution not successful") + ret, _, err = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, '`{}` failed with {}'.format(cmd, err)) def test_gfid_split_brain_resolution(self): """ - - create gfid split-brain of files and resolves them using source-brick - option of the CLI. + Description: Simulates gfid split brain on multiple files in a dir and + resolve them via `bigger-file`, `mtime` and `source-brick` methods + + Steps: + - Create and mount a replicated volume, create a dir and ~10 data files + - Simulate gfid splits in 9 of the files + - Resolve each 3 set of files using `bigger-file`, `mtime` and + `source-bricks` split-brain resoultion methods + - Trigger and monitor for heal completion + - Validate all the files are healed and arequal matches for bricks in + subvols """ - - # pylint: disable=too-many-statements - # pylint: disable=too-many-locals - - # Disable all self-heals and client-quorum - options = {"self-heal-daemon": "off", - "data-self-heal": "off", - "metadata-self-heal": "off", - "entry-self-heal": "off", - "cluster.quorum-type": "none"} - g.log.info("setting volume options %s", options) - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, ("Unable to set volume option %s for " - "volume %s" % (options, self.volname))) - g.log.info("Successfully set %s for volume %s", options, self.volname) - - # Create dir inside which I/O will be performed. - ret = mkdir(self.mounts[0].client_system, "%s/test_gfid_split_brain" - % self.mounts[0].mountpoint) - self.assertTrue(ret, "mkdir failed") - - # get the subvolumes - g.log.info("Starting to get sub-volumes for volume %s", self.volname) - subvols_dict = get_subvols(self.mnode, self.volname) - num_subvols = len(subvols_dict['volume_subvols']) - g.log.info("Number of subvolumes in volume %s:", num_subvols) - - # Toggle bricks and perform I/O - file_list = ["file1.txt", "file2.txt", "file3.txt", "file4.txt", - "file5.txt", "file6.txt", "file7.txt", "file8.txt", - "file9.txt", "file10.txt"] - brick_index = 0 - offline_bricks = [] - for _ in range(0, 3): - for i in range(0, num_subvols): - subvol_brick_list = subvols_dict['volume_subvols'][i] - offline_bricks.append(subvol_brick_list[brick_index % 3]) - offline_bricks.append(subvol_brick_list[(brick_index+1) % 3]) - self.toggle_bricks_and_perform_io(file_list, offline_bricks) - brick_index += 1 - offline_bricks[:] = [] - - # Enable shd - g.log.info("enabling the self heal daemon") + io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + arbiter = self.volume_type.find('arbiter') >= 0 + + # Disable self-heal daemon and set `quorum-type` option to `none` + ret = set_volume_options(self.mnode, self.volname, { + 'self-heal-daemon': 'off', + 'cluster.quorum-type': 'none' + }) + self.assertTrue( + ret, 'Not able to disable `quorum-type` and ' + '`self-heal` daemon volume options') + + # Create required dir and files from the mount + split_dir = 'gfid_split_dir' + file_io = ('cd %s; for i in {1..10}; do ' + io_cmd + + ' 1M > %s/file$i; done;') + ret = mkdir(client, '{}/{}'.format(m_point, split_dir)) + self.assertTrue(ret, 'Unable to create a directory from mount point') + ret, _, _ = g.run(client, file_io % (m_point, split_dir)) + + # `file{4,5,6}` are re-created every time to be used in `bigger-file` + # resolution method + cmd = 'rm -rf {0}/file{1} && {2} {3}M > {0}/file{1}' + split_cmds = { + 1: + ';'.join(cmd.format(split_dir, i, io_cmd, 2) for i in range(1, 7)), + 2: + ';'.join(cmd.format(split_dir, i, io_cmd, 3) for i in range(4, 7)), + 3: ';'.join( + cmd.format(split_dir, i, io_cmd, 1) for i in range(4, 10)), + 4: ';'.join( + cmd.format(split_dir, i, io_cmd, 1) for i in range(7, 10)), + } + + # Get subvols and simulate entry split brain + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + self.assertTrue(subvols, 'Not able to get list of subvols') + msg = ('Unable to bring files under {} dir to entry split brain while ' + '{} are down') + for index, bricks in enumerate(self._get_two_bricks(subvols, arbiter), + 1): + # Bring down two bricks from each subvol + ret = bring_bricks_offline(self.volname, list(bricks)) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + ret, _, _ = g.run(client, + 'cd {}; {}'.format(m_point, split_cmds[index])) + self.assertEqual(ret, 0, msg.format(split_dir, bricks)) + + # Bricks will be brought down only two times in case of arbiter and + # bringing remaining files into split brain for `latest-mtime` heal + if arbiter and index == 2: + ret, _, _ = g.run(client, + 'cd {}; {}'.format(m_point, split_cmds[4])) + self.assertEqual(ret, 0, msg.format(split_dir, bricks)) + + # Bring offline bricks online + ret = bring_bricks_online( + self.mnode, + self.volname, + bricks, + bring_bricks_online_methods='volume_start_force') + self.assertTrue(ret, 'Unable to bring {} online'.format(bricks)) + + # Enable self-heal daemon, trigger heal and assert volume is in split + # brain condition ret = enable_self_heal_daemon(self.mnode, self.volname) - self.assertTrue(ret, "failed to enable self heal daemon") - g.log.info("Successfully enabled the self heal daemon") - - # Wait for self heal processes to come online - g.log.info("Wait for selfheal process to come online") - timeout = 300 - ret = wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname, - timeout) - self.assertTrue(ret, "Self-heal process are not online") - g.log.info("All self heal process are online") - - # Trigger heal + self.assertTrue(ret, 'Failed to enable self heal daemon') + + ret = wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, 'Not all self heal daemons are online') + ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Starting heal failed') - g.log.info('Index heal launched') + self.assertTrue(ret, 'Unable to trigger index heal on the volume') - # checking if file is in split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) - self.assertTrue(ret, "Files are not in split-brain as expected.") - g.log.info("Files are still in split-brain") - - # First brick of each replica will be used as source-brick - first_brick_list = [] - for i in range(0, num_subvols): - subvol_brick_list = subvols_dict['volume_subvols'][i] - brick = subvol_brick_list[0] - first_brick_list.append(brick) - - # Find which dht subvols the 10 files are present in and trigger heal - for filename in file_list: - fpath = self.mounts[0].mountpoint + "/test_gfid_split_brain/" + \ - filename - gfile = GlusterFile(self.clients[0], fpath) - for brick in first_brick_list: - _, brick_path = brick.split(':') - match = [brick for item in gfile.hashed_bricks if brick_path - in item] - if match: - self.resolve_gfid_split_brain("/test_gfid_split_brain/" + - filename, brick) - - # Trigger heal to complete pending data/metadata heals - ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Starting heal failed') - g.log.info('Index heal launched') + self.assertTrue(ret, 'Volume should be in split brain condition') + + # Select source brick and take note of files in source brick + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + source_bricks = [choice(subvol[0:stop]) for subvol in subvols] + files = [ + self._get_files_in_brick(path, split_dir) for path in source_bricks + ] + + # Resolve `file1, file2, file3` gfid split files using `source-brick` + cmd = ('gluster volume heal ' + self.volname + ' split-brain ' + 'source-brick {} /' + split_dir + '/{}') + for index, source_brick in enumerate(source_bricks): + for each_file in files[index]: + run_cmd = cmd.format(source_brick, each_file) + self._run_cmd_and_assert(run_cmd) + + # Resolve `file4, file5, file6` gfid split files using `bigger-file` + cmd = ('gluster volume heal ' + self.volname + + ' split-brain bigger-file /' + split_dir + '/{}') + for each_file in ('file4', 'file5', 'file6'): + run_cmd = cmd.format(each_file) + self._run_cmd_and_assert(run_cmd) + + # Resolve `file7, file8, file9` gfid split files using `latest-mtime` + cmd = ('gluster volume heal ' + self.volname + + ' split-brain latest-mtime /' + split_dir + '/{}') + for each_file in ('file7', 'file8', 'file9'): + run_cmd = cmd.format(each_file) + self._run_cmd_and_assert(run_cmd) + + # Unless `shd` is triggered manually/automatically files will still + # appear in `heal info` + ret = trigger_heal_full(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger full self heal') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) - self.assertTrue(ret, 'Heal has not yet completed') - - # Check if heal is completed - ret = is_heal_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Heal is not complete') - g.log.info('Heal is completed successfully') - - # Get arequals and compare - for i in range(0, num_subvols): - # Get arequal for first brick - subvol_brick_list = subvols_dict['volume_subvols'][i] - node, brick_path = subvol_brick_list[0].split(':') - command = ('arequal-checksum -p %s ' - '-i .glusterfs -i .landfill -i .trashcan' - % brick_path) - ret, arequal, _ = g.run(node, command) - first_brick_total = arequal.splitlines()[-1].split(':')[-1] - - # Get arequal for every brick and compare with first brick - for brick in subvol_brick_list[1:]: - node, brick_path = brick.split(':') - command = ('arequal-checksum -p %s ' - '-i .glusterfs -i .landfill -i .trashcan' - % brick_path) - ret, brick_arequal, _ = g.run(node, command) - self.assertFalse(ret, - 'Failed to get arequal on brick %s' - % brick) - g.log.info('Getting arequal for %s is successful', brick) - brick_total = brick_arequal.splitlines()[-1].split(':')[-1] - - self.assertEqual(first_brick_total, brick_total, - 'Arequals for subvol and %s are not equal' - % brick) - g.log.info('Arequals for subvol and %s are equal', brick) + self.assertTrue( + ret, 'All files in volume should be healed after healing files via' + ' `source-brick`, `bigger-file`, `latest-mtime` methods manually') + + # Validate normal file `file10` and healed files don't differ in + # subvols via an `arequal` + for subvol in subvols: + # Disregard last brick if volume is of arbiter type + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue( + ret, 'Unable to get `arequal` checksum on ' + '{}'.format(subvol[0:stop])) + self.assertEqual( + len(set(arequal)), 1, 'Mismatch of `arequal` ' + 'checksum among {} is identified'.format(subvol[0:stop])) + + g.log.info('Pass: Resolution of gfid split-brain via `source-brick`, ' + '`bigger-file` and `latest-mtime` methods is complete') diff --git a/tests/functional/afr/test_git_clone.py b/tests/functional/afr/test_git_clone.py new file mode 100644 index 000000000..02871cb8b --- /dev/null +++ b/tests/functional/afr/test_git_clone.py @@ -0,0 +1,80 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import git_clone_and_compile +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['replicated', 'distributed-replicated', 'dispersed', + 'distributed-dispersed', 'arbiter', 'distributed-arbiter'], + ['glusterfs']]) +class TestGitCloneOnGlusterVolume(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on one client + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + self.get_super_method(self, 'tearDown')() + + # Unmount from the one client and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + g.log.info("Unmount and volume cleanup is successful") + + def _run_git_clone(self, options): + """Run git clone on the client""" + + repo = 'https://github.com/gluster/glusterfs.git' + cloned_repo_dir = (self.mounts[0].mountpoint + '/' + + repo.split('/')[-1].rstrip('.git')) + if options: + cloned_repo_dir = (self.mounts[0].mountpoint + '/' + "perf-" + + repo.split('/')[-1].rstrip('.git')) + ret = git_clone_and_compile(self.mounts[0].client_system, + repo, cloned_repo_dir, False) + self.assertTrue(ret, "Unable to clone {} repo on {}". + format(repo, cloned_repo_dir)) + g.log.info("Repo %s cloned successfully ", repo) + + def test_git_clone_on_gluster_volume(self): + """ + Test Steps: + 1. Create a volume and mount it on one client + 2. git clone the glusterfs repo on the glusterfs volume. + 3. Set the performance options to off + 4. Repeat step 2 on a different directory. + """ + self._run_git_clone(False) + + # Disable the performance cache options on the volume + self.options = {'performance.quick-read': 'off', + 'performance.stat-prefetch': 'off', + 'performance.open-behind': 'off', + 'performance.write-behind': 'off', + 'performance.client-io-threads': 'off'} + ret = set_volume_options(self.mnode, self.volname, self.options) + self.assertTrue(ret, "Unable to set the volume options") + g.log.info("Volume options set successfully") + + self._run_git_clone(True) diff --git a/tests/functional/afr/test_glustershd_on_all_volume_types.py b/tests/functional/afr/test_glustershd_on_all_volume_types.py index a7139114a..c2d38d806 100755 --- a/tests/functional/afr/test_glustershd_on_all_volume_types.py +++ b/tests/functional/afr/test_glustershd_on_all_volume_types.py @@ -41,7 +41,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): which is used in tests """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() cls.default_volume_type_config = { 'replicated': { @@ -118,7 +118,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): g.log.info("Successfully Cleanedup all Volumes") # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDownClass.im_func(cls) + cls.get_super_method(cls, 'tearDownClass')() def test_glustershd_on_all_volume_types(self): """ @@ -219,7 +219,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): (volume_type_info_for_replicate_after_adding_bricks ['volume_type_info']['typeStr']) - self.assertEquals( + self.assertEqual( volume_type_for_replicate_after_adding_bricks, 'Distributed-Replicate', ("Replicate volume type is not converted to " diff --git a/tests/functional/afr/test_glustershd_on_newly_probed_server.py b/tests/functional/afr/test_glustershd_on_newly_probed_server.py index 1c7fde3f5..68fb19be2 100755 --- a/tests/functional/afr/test_glustershd_on_newly_probed_server.py +++ b/tests/functional/afr/test_glustershd_on_newly_probed_server.py @@ -40,7 +40,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): """ def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.extra_servers = self.servers[-2:] self.servers = self.servers[:-2] @@ -94,7 +94,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): g.log.info("Peer probe success for detached servers %s", self.servers) # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_glustershd_on_newly_probed_server(self): """ @@ -171,10 +171,10 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): self.assertFalse(ret, ("Self Heal Daemon process is running even " "after stopping volume %s" % self.volname)) for node in pids: - self.assertEquals(pids[node][0], -1, ("Self Heal Daemon is still " - "running on node %s even " - "after stopping all " - "volumes" % node)) + self.assertEqual( + pids[node][0], -1, + "Self Heal Daemon is still running on node %s even " + "after stopping all volumes" % node) g.log.info("Expected : No self heal daemon process is running " "after stopping all volumes") diff --git a/tests/functional/afr/test_heal_command_unsuccessful_as_bricks_down.py b/tests/functional/afr/test_heal_command_unsuccessful_as_bricks_down.py index 2e4ddb9a1..366736bf6 100755 --- a/tests/functional/afr/test_heal_command_unsuccessful_as_bricks_down.py +++ b/tests/functional/afr/test_heal_command_unsuccessful_as_bricks_down.py @@ -36,16 +36,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -61,7 +59,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -92,7 +90,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_heal_command_unsuccessful_as_bricks_down(self): """ diff --git a/tests/functional/afr/test_heal_fail_1x3.py b/tests/functional/afr/test_heal_fail_1x3.py index 596b145a5..e47832936 100644 --- a/tests/functional/afr/test_heal_fail_1x3.py +++ b/tests/functional/afr/test_heal_fail_1x3.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ # pylint: disable=too-many-statements, too-many-locals from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_ops import set_volume_options @@ -36,16 +37,14 @@ class TestSelfHeal(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -67,17 +66,17 @@ class TestSelfHeal(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + g.log.info("Starting to clean up Volume %s", self.volname) + ret = self.unmount_volume_and_cleanup_volume(self.mounts) if not ret: raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + g.log.info("Successful in cleaning up Volume %s", self.volname) - GlusterBaseClass.tearDownClass.im_func(cls) + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() def test_heal_gfid_1x3(self): @@ -101,9 +100,10 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating a file from mount point") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s create_files " - "-f 1 --base-file-name test_file --fixed-file-size 10k %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("/usr/bin/env python %s create_files -f 1 " + "--base-file-name test_file --fixed-file-size 10k %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) @@ -133,9 +133,10 @@ class TestSelfHeal(GlusterBaseClass): "from mount point") all_mounts_procs = [] for mount_obj in self.mounts: - cmd = ("python %s create_files " - "-f 1 --base-file-name test_file --fixed-file-size 1M %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("/usr/bin/env python %s create_files -f 1 " + "--base-file-name test_file --fixed-file-size 1M %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_heal_info_should_have_fixed_fields.py b/tests/functional/afr/test_heal_info_should_have_fixed_fields.py index ec9c1d95f..11a39f794 100644 --- a/tests/functional/afr/test_heal_info_should_have_fixed_fields.py +++ b/tests/functional/afr/test_heal_info_should_have_fixed_fields.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,8 +13,8 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, @@ -41,16 +41,14 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -59,7 +57,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -94,7 +92,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_heal_info_should_have_fixed_fields(self): """ @@ -111,9 +109,10 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_deep_dirs_with_files " - "-d 2 -l 2 -f 50 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -f 50 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -123,10 +122,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) diff --git a/tests/functional/afr/test_heal_split_brain_command.py b/tests/functional/afr/test_heal_split_brain_command.py new file mode 100644 index 000000000..c924e8910 --- /dev/null +++ b/tests/functional/afr/test_heal_split_brain_command.py @@ -0,0 +1,264 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, get_all_bricks) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_fattr +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.heal_ops import heal_info, heal_info_split_brain +from glustolibs.gluster.volume_ops import set_volume_options + + +# pylint: disable=too-many-locals, too-many-statements +@runs_on([['arbiter', 'replicated'], ['glusterfs']]) +class TestSplitBrain(GlusterBaseClass): + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = [self.mounts[0]] + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + def tearDown(self): + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def _run_cmd_and_validate(self, client, cmd, paths): + """ + Run `cmd` from `paths` on `client` + """ + for path in paths: + ret, _, _ = g.run(client, cmd % path) + self.assertEqual( + ret, 0, 'Unable to perform `{}` from `{}` on `{}`'.format( + cmd, path, client)) + + @staticmethod + def _transform_gfids(gfids): + """ + Returns list of `gfids` joined by `-` at required places + + Example of one elemnt: + Input: 0xd4653ea0289548eb81b35c91ffb73eff + Returns: d4653ea0-2895-48eb-81b3-5c91ffb73eff + """ + split_pos = [10, 14, 18, 22] + rout = [] + for gfid in gfids: + rout.append('-'.join( + gfid[start:stop] + for start, stop in zip([2] + split_pos, split_pos + [None]))) + return rout + + def test_split_brain_from_heal_command(self): + """ + Description: Simulate and validate data, metadata and entry split brain + + Steps: + - Create and mount a replicated volume and disable quorum, self-heal + deamon + - Create ~10 files from the mount point and simulate data, metadata + split-brain for 2 files each + - Create a dir with some files and simulate entry/gfid split brain + - Validate volume successfully recognizing split-brain + - Validate a lookup on split-brain files fails with EIO error on mount + - Validate `heal info` and `heal info split-brain` command shows only + the files that are in split-brain + - Validate new files and dir's can be created from the mount + """ + io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + arbiter = self.volume_type.find('arbiter') >= 0 + + # Disable self-heal daemon and set `quorum-type` option to `none` + ret = set_volume_options(self.mnode, self.volname, { + 'self-heal-daemon': 'off', + 'cluster.quorum-type': 'none' + }) + self.assertTrue( + ret, 'Not able to disable `quorum-type` and ' + '`self-heal` daemon volume options') + + # Create required dir's from the mount + fqpath = '{}/dir'.format(m_point) + file_io = ('cd %s; for i in {1..6}; do ' + io_cmd + + ' 2M > file$i; done;') + file_cmd = 'cd %s; touch file{7..10}' + ret = mkdir(client, fqpath) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + # Create empty files and data files + for cmd in (file_io, file_cmd): + self._run_cmd_and_validate(client, cmd, [m_point, fqpath]) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone( + all_bricks, 'Unable to get list of bricks ' + 'associated with the volume') + + # Data will be appended to the files `file1, file2` resulting in data + # split brain + data_split_cmd = ';'.join(io_cmd + '2M >> ' + each_file + for each_file in ('file1', 'file2')) + + # File permissions will be changed for `file4, file5` to result in + # metadata split brain + meta_split_cmd = ';'.join('chmod 0555 ' + each_file + for each_file in ('file4', 'file5')) + + # Files will be deleted and created with data to result in data, + # metadata split brain on files and entry(gfid) split brain on dir + entry_split_cmd = ';'.join('rm -f ' + each_file + ' && ' + io_cmd + + ' 2M > ' + each_file + for each_file in ('dir/file1', 'dir/file2')) + + # Need to always select arbiter(3rd) brick if volume is arbiter type or + # any two bricks for replicated volume + for bricks in zip(all_bricks, all_bricks[1:] + [all_bricks[0]]): + + # Skip iteration if volume type is arbiter and `bricks` doesn't + # contain arbiter brick + if arbiter and (all_bricks[-1] not in bricks): + continue + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, list(bricks)) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + # Run cmd to bring files into split brain + for cmd, msg in ((data_split_cmd, 'data'), + (meta_split_cmd, 'meta'), (entry_split_cmd, + 'entry')): + ret, _, _ = g.run(client, 'cd {}; {}'.format(m_point, cmd)) + self.assertEqual( + ret, 0, 'Unable to run cmd for bringing files ' + 'into {} split brain'.format(msg)) + + # Bring offline bricks online + ret = bring_bricks_online( + self.mnode, + self.volname, + bricks, + bring_bricks_online_methods='volume_start_force') + self.assertTrue(ret, 'Unable to bring {} online'.format(bricks)) + + # Validate volume is in split-brain + self.assertTrue(is_volume_in_split_brain(self.mnode, self.volname), + 'Volume should be in split-brain') + + # Validate `head` lookup on split brain files fails with EIO + for each_file in ('file1', 'file2', 'file4', 'file5', 'dir/file1', + 'dir/file2'): + ret, _, err = g.run(client, + 'cd {}; head {}'.format(m_point, each_file)) + self.assertNotEqual( + ret, 0, 'Lookup on split-brain file {} should ' + 'fail'.format(each_file)) + self.assertIn( + 'Input/output error', err, + 'File {} should result in EIO error'.format(each_file)) + + # Validate presence of split-brain files and absence of other files in + # `heal info` and `heal info split-brain` commands + ret, info, _ = heal_info(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Unable to query for `heal info`') + ret, info_spb, _ = heal_info_split_brain(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Unable to query for `heal info split-brain`') + + # Collect `gfid's` of files in data and metadata split-brain + common_gfids = [] + host, path = all_bricks[0].split(':') + for each_file in ('file1', 'file2', 'file4', 'file5', 'dir'): + fattr = get_fattr(host, path + '/{}'.format(each_file), + 'trusted.gfid') + self.assertIsNotNone( + fattr, 'Unable to get `gfid` for {}'.format(each_file)) + common_gfids.append(fattr) + + # GFID for files under an entry split brain dir differs from it's peers + uniq_gfids = [] + for brick in all_bricks[:-1] if arbiter else all_bricks: + host, path = brick.split(':') + for each_file in ('dir/file1', 'dir/file2'): + fattr = get_fattr(host, path + '/{}'.format(each_file), + 'trusted.gfid') + self.assertIsNotNone( + fattr, 'Unable to get `gfid` for {}'.format(each_file)) + uniq_gfids.append(fattr) + + # Transform GFIDs to match against o/p of `heal info` and `split-brain` + common_gfids[:] = self._transform_gfids(common_gfids) + uniq_gfids[:] = self._transform_gfids(uniq_gfids) + + # Just enough validation by counting occurences asserting success + common_files = ['/file1 -', '/file2 -', '/file4', '/file5', '/dir '] + uniq_files = ['/dir/file1', '/dir/file2'] + + # Common files should occur 3 times each in `heal info` and + # `heal info split-brain` or 2 times for arbiter + occur = 2 if arbiter else 3 + for each_file, gfid in zip(common_files, common_gfids): + + # Check against `heal info` cmd + self.assertEqual( + info.count(gfid) + info.count(each_file), occur, + 'File {} with gfid {} should exist in `heal info` ' + 'command'.format(each_file[:6], gfid)) + + # Check against `heal info split-brain` cmd + self.assertEqual( + info_spb.count(gfid) + info_spb.count(each_file[:6].rstrip()), + occur, 'File {} with gfid {} should exist in `heal info ' + 'split-brain` command'.format(each_file[:6], gfid)) + + # Entry split files will be listed only in `heal info` cmd + for index, each_file in enumerate(uniq_files): + + # Collect file and it's associated gfid's + entries = (uniq_files + uniq_gfids)[index::2] + count = sum(info.count(entry) for entry in entries) + self.assertEqual( + count, occur, 'Not able to find existence of ' + 'entry split brain file {} in `heal info`'.format(each_file)) + + # Assert no other file is counted as in split-brain + for cmd, rout, exp_str in (('heal info', info, 'entries: 7'), + ('heal info split-brain', info_spb, + 'split-brain: 5')): + self.assertEqual( + rout.count(exp_str), occur, 'Each node should ' + 'list only {} entries in {} command'.format(exp_str[-1], cmd)) + + # Validate new files and dir can be created from mount + fqpath = '{}/temp'.format(m_point) + ret = mkdir(client, fqpath) + self.assertTrue( + ret, 'Unable to create a dir from mount post split-brain of files') + for cmd in (file_io, file_cmd): + self._run_cmd_and_validate(client, cmd, [fqpath]) + + g.log.info('Pass: Validated data, metadata and entry split brain') diff --git a/tests/functional/afr/test_healed_and_heal_failed_command.py b/tests/functional/afr/test_healed_and_heal_failed_command.py new file mode 100644 index 000000000..c02ed6514 --- /dev/null +++ b/tests/functional/afr/test_healed_and_heal_failed_command.py @@ -0,0 +1,104 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_ops import heal_info_heal_failed, heal_info_healed +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['replicated'], ['glusterfs', 'nfs']]) +class TestHealedAndHealFailedCommand(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + cls.script_path = '/usr/share/glustolibs/io/scripts/file_dir_ops.py' + if not upload_scripts(cls.clients, cls.script_path): + raise ExecutionError('Failed to upload IO scripts to client') + + def setUp(self): + self.get_super_method(self, 'setUp')() + self.mounts = [self.mounts[0]] + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + def tearDown(self): + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def test_healed_and_heal_failed_command(self): + """ + Description: Validate absence of `healed` and `heal-failed` options + + Steps: + - Create and mount a replicated volume + - Kill one of the bricks and write IO from mount point + - Verify `gluster volume heal <volname> info healed` and `gluster + volume heal <volname> info heal-failed` command results in error + - Validate `gluster volume help` doesn't list `healed` and + `heal-failed` commands + """ + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + # Kill one of the bricks in the volume + brick_list = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(brick_list, 'Unable to get online bricks list') + ret = bring_bricks_offline(self.volname, choice(brick_list)) + self.assertTrue(ret, 'Unable to kill one of the bricks in the volume') + + # Fill IO in the mount point + cmd = ('/usr/bin/env python {} ' + 'create_deep_dirs_with_files --dir-depth 10 ' + '--fixed-file-size 1M --num-of-files 50 ' + '--dirname-start-num 1 {}'.format(self.script_path, m_point)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Not able to fill directory with IO') + + # Verify `gluster volume heal <volname> info healed` results in error + cmd = 'gluster volume heal <volname> info' + ret, _, err = heal_info_healed(self.mnode, self.volname) + self.assertNotEqual(ret, 0, '`%s healed` should result in error' % cmd) + self.assertIn('Usage', err, '`%s healed` should list `Usage`' % cmd) + + # Verify `gluster volume heal <volname> info heal-failed` errors out + ret, _, err = heal_info_heal_failed(self.mnode, self.volname) + self.assertNotEqual(ret, 0, + '`%s heal-failed` should result in error' % cmd) + self.assertIn('Usage', err, + '`%s heal-failed` should list `Usage`' % cmd) + + # Verify absence of `healed` nd `heal-failed` commands in `volume help` + cmd = 'gluster volume help | grep -i heal' + ret, rout, _ = g.run(self.mnode, cmd) + self.assertEqual( + ret, 0, 'Unable to query help content from `gluster volume help`') + self.assertNotIn( + 'healed', rout, '`healed` string should not exist ' + 'in `gluster volume help` command') + self.assertNotIn( + 'heal-failed', rout, '`heal-failed` string should ' + 'not exist in `gluster volume help` command') diff --git a/tests/functional/afr/test_manual_heal_full_should_trigger_heal.py b/tests/functional/afr/test_manual_heal_full_should_trigger_heal.py new file mode 100644 index 000000000..4cfcbd01f --- /dev/null +++ b/tests/functional/afr/test_manual_heal_full_should_trigger_heal.py @@ -0,0 +1,182 @@ +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.gluster.volume_libs import get_volume_type_info +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain) +from glustolibs.gluster.heal_ops import trigger_heal_full +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.lib_utils import (form_bricks_list, + collect_bricks_arequal) +from glustolibs.io.utils import (validate_io_procs, + wait_for_io_to_complete, + collect_mounts_arequal) + + +@runs_on([['distributed'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Define x1 distributed volume + cls.volume['voltype'] = { + 'type': 'distributed', + 'dist_count': 1, + 'transport': 'tcp'} + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + + # If test method failed before validating IO, tearDown waits for the + # IO's to complete and checks for the IO exit status + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_manual_heal_full_should_trigger_heal(self): + """ + - Create a single brick volume + - Add some files and directories + - Get arequal from mountpoint + - Add-brick such that this brick makes the volume a replica vol 1x3 + - Start heal full + - Make sure heal is completed + - Get arequals from all bricks and compare with arequal from mountpoint + """ + # pylint: disable=too-many-statements,too-many-locals + # Start IO on mounts + self.all_mounts_procs = [] + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dir-length 1 --dir-depth 1 --max-num-of-dirs 1 " + "--num-of-files 10 %s" % (self.script_upload_path, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + g.log.info("IO on %s:%s is started successfully", + mount_obj.client_system, mount_obj.mountpoint) + self.io_validation_complete = False + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Get arequal for mount before adding bricks + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after healing is successful') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Form brick list to add + bricks_to_add = form_bricks_list(self.mnode, self.volname, 2, + self.servers, self.all_servers_info) + g.log.info('Brick list to add: %s', bricks_to_add) + + # Add bricks + ret, _, _ = add_brick(self.mnode, self.volname, bricks_to_add, + replica_count=3) + self.assertFalse(ret, "Failed to add bricks %s" % bricks_to_add) + g.log.info("Adding bricks is successful on volume %s", self.volname) + + # Make sure the newly added bricks are available in the volume + # get the bricks for the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + for brick in bricks_to_add: + self.assertIn(brick, bricks_list, + 'Brick %s is not in brick list' % brick) + g.log.info('New bricks are present in the volume') + + # Make sure volume change from distribute to replicate volume + vol_info_dict = get_volume_type_info(self.mnode, self.volname) + vol_type = vol_info_dict['volume_type_info']['typeStr'] + self.assertEqual('Replicate', vol_type, + 'Volume type is not converted to Replicate ' + 'after adding bricks') + g.log.info('Volume type is successfully converted to Replicate ' + 'after adding bricks') + + # Start healing + ret = trigger_heal_full(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal on bricks and compare with mount_point_total + # It should be the same + ret, arequals = collect_bricks_arequal(bricks_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(mount_point_total, brick_total, + 'Arequals for mountpoint and %s are not equal') + g.log.info('All arequals are equal for replicated') diff --git a/tests/functional/afr/test_manual_heal_should_trigger_heal.py b/tests/functional/afr/test_manual_heal_should_trigger_heal.py index bae9464cd..89ca9fef5 100755 --- a/tests/functional/afr/test_manual_heal_should_trigger_heal.py +++ b/tests/functional/afr/test_manual_heal_should_trigger_heal.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,8 +13,8 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.brick_libs import get_all_bricks @@ -37,16 +37,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -62,7 +60,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -93,7 +91,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_manual_heal_should_trigger_heal(self): """ @@ -112,12 +110,13 @@ class TestSelfHeal(GlusterBaseClass): for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_deep_dirs_with_files " + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dir-length 1 " "--dir-depth 1 " "--max-num-of-dirs 1 " - "--num-of-files 10 %s" % (self.script_upload_path, - mount_obj.mountpoint)) + "--num-of-files 10 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) diff --git a/tests/functional/afr/test_metadata_self_heal_client_side_heal.py b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py new file mode 100644 index 000000000..166059276 --- /dev/null +++ b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py @@ -0,0 +1,606 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# pylint: disable=too-many-locals,too-many-statements,too-many-branches + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain) +from glustolibs.gluster.lib_utils import (add_user, del_user, + collect_bricks_arequal) +from glustolibs.gluster.mount_ops import (umount_volume, + mount_volume) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete, + collect_mounts_arequal) + + +@runs_on([['distributed-replicated', 'replicated'], + ['glusterfs']]) +class TestAFRMetaDataSelfHealClientSideHeal(GlusterBaseClass): + @classmethod + def setUpClass(cls): + + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not upload_scripts(cls.clients, [cls.script_upload_path]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Create users + self.users = ['qa_func', 'qa_system', 'qa_perf', 'qa_all'] + for mount_object in self.mounts: + for user in self.users: + if not add_user(mount_object.client_system, user): + raise ExecutionError("Failed to create user " + "{}".format(user)) + g.log.info("Successfully created all users.") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status. + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + if not list_all_files_and_dirs_mounts(self.mounts): + raise ExecutionError("Failed to list all files and dirs") + g.log.info("Listing all files and directories is successful") + + # Delete user + for mount_object in self.mounts: + for user in self.users: + if not del_user(mount_object.client_system, user): + raise ExecutionError("Failed to delete user: {}" + .format(user)) + g.log.info("Successfully deleted all users") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def trigger_heal_from_mount_point(self): + """ + Trigger heal from mount point using read. + """ + # Unmouting and remounting volume to update the volume graph + # in client. + ret, _, _ = umount_volume( + self.mounts[0].client_system, self.mounts[0].mountpoint) + self.assertFalse(ret, "Failed to unmount volume.") + + ret, _, _ = mount_volume( + self.volname, 'glusterfs', self.mounts[0].mountpoint, + self.mnode, self.mounts[0].client_system) + self.assertFalse(ret, "Failed to remount volume.") + g.log.info('Successfully umounted and remounted volume.') + + # Trigger heal from client side + cmd = ("/usr/bin/env python {0} read {1}/{2}".format( + self.script_upload_path, self.mounts[0].mountpoint, + self.test_meta_data_self_heal_folder)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, 'Failed to trigger heal on %s' + % self.mounts[0].client_system) + g.log.info("Successfully triggered heal from mount point.") + + def validate_io_on_clients(self): + """ + Validate I/O on client mount points. + """ + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + def check_arequal_from_mount_point_and_bricks(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + # Check arequals for "replicated" + all_bricks = get_all_bricks(self.mnode, self.volname) + if self.volume_type == "replicated": + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Get arequal on bricks and compare with mount_point_total + ret, arequals = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(mount_point_total, brick_total, + 'Arequals for mountpoint and brick ' + 'are not equal') + g.log.info('Arequals for mountpoint and brick are equal') + g.log.info('All arequals are equal for replicated') + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + g.log.info("Number of subvolumes in volume %s:", num_subvols) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + subvol_brick_list = subvols_dict['volume_subvols'][i] + ret, arequal = collect_bricks_arequal([subvol_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first') + + # Get arequal for every brick and compare with first brick + first_brick_total = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(subvol_brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(first_brick_total, brick_total, + 'Arequals for subvol and brick are ' + 'not equal') + g.log.info('Arequals for subvol and brick are equal') + g.log.info('All arequals are equal for distributed-replicated') + + def check_permssions_on_bricks(self, bricks_list): + """ + Check permssions on a given set of bricks. + """ + for brick in bricks_list: + node, brick_path = brick.split(':') + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, folder)) + + self.assertEqual('555', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + # Verify for group for each file + if file_list: + for file_name in file_list: + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 1..50 + for i in range(1, 51): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('666', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 51..100 + for i in range(51, 101): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('444', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 1..35 + for i in range(1, 36): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 36..70 + for i in range(36, 71): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 71..100 + for i in range(71, 101): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + def test_metadata_self_heal_client_side_heal(self): + """ + Testcase steps: + 1.Turn off the options self heal daemon + 2.Create IO + 3.Calculate arequal of the bricks and mount point + 4.Bring down "brick1" process + 5.Change the permissions of the directories and files + 6.Change the ownership of the directories and files + 7.Change the group of the directories and files + 8.Bring back the brick "brick1" process + 9.Execute "find . | xargs stat" from the mount point to trigger heal + 10.Verify the changes in permissions are not self healed on brick1 + 11.Verify the changes in permissions on all bricks but brick1 + 12.Verify the changes in ownership are not self healed on brick1 + 13.Verify the changes in ownership on all the bricks but brick1 + 14.Verify the changes in group are not successfully self-healed + on brick1 + 15.Verify the changes in group on all the bricks but brick1 + 16.Turn on the option metadata-self-heal + 17.Execute "find . | xargs md5sum" from the mount point to trgger heal + 18.Wait for heal to complete + 19.Verify the changes in permissions are self-healed on brick1 + 20.Verify the changes in ownership are successfully self-healed + on brick1 + 21.Verify the changes in group are successfully self-healed on brick1 + 22.Calculate arequal check on all the bricks and mount point + """ + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"self-heal-daemon": "off"}) + self.assertTrue(ret, 'Failed to set options self-heal-daemon ' + 'and metadata-self-heal to OFF') + g.log.info("Options are set successfully") + + # Creating files on client side + self.test_meta_data_self_heal_folder = 'test_meta_data_self_heal' + for mount_object in self.mounts: + command = ("cd {0}/ ; mkdir {1} ; cd {1}/ ;" + "for i in `seq 1 100` ; " + "do mkdir dir.$i ; " + "for j in `seq 1 5` ; " + "do dd if=/dev/urandom of=dir.$i/file.$j " + "bs=1K count=$j ; done ; done ;".format + (mount_object.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_object.client_system, command, + user=mount_object.user) + self.all_mounts_procs.append(proc) + + # Validate IO + self.validate_io_on_clients() + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() + + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + bricks_to_bring_offline = [] + bricks_to_be_online = [] + for subvol in subvols: + bricks_to_bring_offline.append(subvol[0]) + for brick in subvol[1:]: + bricks_to_be_online.append(brick) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Change the permissions of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chmod 555 dir.$i ; done ; ' + 'for i in `seq 1 50` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 666 dir.$i/file.$j ; done ; done ; ' + 'for i in `seq 51 100` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 444 dir.$i/file.$j ; done ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the ownership of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{} ; ' + 'for i in `seq 1 35` ; ' + 'do chown -R qa_func dir.$i ; done ; ' + 'for i in `seq 36 70` ; ' + 'do chown -R qa_system dir.$i ; done ; ' + 'for i in `seq 71 100` ; ' + 'do chown -R qa_perf dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the group of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chgrp -R qa_all dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Verify the changes are not self healed on brick1 for each subvol + for brick in bricks_to_bring_offline: + node, brick_path = brick.split(':') + + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + + self.assertEqual('755', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('644', ret['access'], + "Permissions mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node" + " {} for file {}".format(node, + file_name)) + + # Verify the changes are self healed on all bricks except brick1 + # for each subvol + self.check_permssions_on_bricks(bricks_to_be_online) + + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"metadata-self-heal": "on"}) + self.assertTrue(ret, 'Failed to set options to ON.') + g.log.info("Options are set successfully") + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Verify the changes are self healed on brick1 for each subvol + self.check_permssions_on_bricks(bricks_to_bring_offline) + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() diff --git a/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py b/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py index 0d7d82d18..a3a6cb183 100644 --- a/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py +++ b/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.brick_libs import (bring_bricks_offline, @@ -28,9 +29,10 @@ from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, wait_for_io_to_complete) +from glustolibs.gluster.volume_ops import (get_volume_info) -@runs_on([['replicated'], +@runs_on([['replicated', 'arbiter'], ['glusterfs', 'cifs', 'nfs']]) class VerifySelfHealTriggersHealCommand(GlusterBaseClass): """ @@ -40,33 +42,23 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) g.log.info("Successfully uploaded IO scripts to clients %s", cls.clients) - # Override Volumes - if cls.volume_type == "replicated": - # Define x2 replicated volume - cls.volume['voltype'] = { - 'type': 'replicated', - 'replica_count': 2, - 'transport': 'tcp'} - def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -101,7 +93,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_multiple_clients_dd_on_same_file_default(self): """ @@ -145,8 +137,9 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Reading files...') - command = ("python %s read %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -244,6 +237,18 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): # It should be the same g.log.info('Getting arequal on bricks...') arequals_after_heal = {} + + if self.volume_type == "arbiter": + vol_info = get_volume_info(self.mnode, self.volname) + self.assertIsNotNone(vol_info, 'Unable to get volume info') + data_brick_list = [] + for brick in bricks_list: + for brick_info in vol_info[self.volname]["bricks"]["brick"]: + if brick_info["name"] == brick: + if brick_info["isArbiter"] == "0": + data_brick_list.append(brick) + bricks_list = data_brick_list + for brick in bricks_list: g.log.info('Getting arequal on bricks %s...', brick) node, brick_path = brick.split(':') diff --git a/tests/functional/afr/test_quota_limit_entry_heal.py b/tests/functional/afr/test_quota_limit_entry_heal.py index 033d326f4..9ae7c2013 100644 --- a/tests/functional/afr/test_quota_limit_entry_heal.py +++ b/tests/functional/afr/test_quota_limit_entry_heal.py @@ -43,8 +43,7 @@ class QuotaEntrySelfHealTest(GlusterBaseClass): """ @classmethod def setUpClass(cls): - - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volumes if cls.volume_type == "replicated": @@ -55,7 +54,7 @@ class QuotaEntrySelfHealTest(GlusterBaseClass): 'transport': 'tcp'} def setUp(self): - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() g.log.info("Starting to Setup Volume %s", self.volname) self.all_mounts_procs = [] @@ -92,7 +91,7 @@ class QuotaEntrySelfHealTest(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_entry_heal_with_quota(self): """ diff --git a/tests/functional/afr/test_readlink.py b/tests/functional/afr/test_readlink.py index 0f2b34b65..6b178d091 100644 --- a/tests/functional/afr/test_readlink.py +++ b/tests/functional/afr/test_readlink.py @@ -32,7 +32,7 @@ class AfrReadlinkTest(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override replica count to be 3 if cls.volume_type == "replicated": @@ -44,16 +44,15 @@ class AfrReadlinkTest(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_abs_path = "/usr/share/glustolibs/io/scripts/file_dir_ops.py" cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_abs_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients") def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -75,7 +74,7 @@ class AfrReadlinkTest(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_readlink(self): # create file diff --git a/tests/functional/afr/test_repl_heal_with_io.py b/tests/functional/afr/test_repl_heal_with_io.py new file mode 100644 index 000000000..0cdff000c --- /dev/null +++ b/tests/functional/afr/test_repl_heal_with_io.py @@ -0,0 +1,306 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from time import sleep, time + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import bring_bricks_offline +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.heal_ops import heal_info +from glustolibs.gluster.volume_libs import ( + get_subvols, wait_for_volume_process_to_be_online) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.io.utils import wait_for_io_to_complete + + +@runs_on([[ + 'arbiter', 'distributed-arbiter', 'replicated', 'distributed-replicated' +], ['glusterfs', 'nfs']]) +class TestHealWithIO(GlusterBaseClass): + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = [self.mounts[0]] + + # For `test_heal_info_...` tests 6 replicas are needed + if ('test_heal_info' in self.id().split('.')[-1] + and self.volume_type.find('distributed') >= 0): + self.volume['voltype']['dist_count'] = 6 + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + self.client, self.m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + self.file_path = self.m_point + '/test_file' + self._io_cmd = ('cat /dev/urandom | tr -dc [:space:][:print:] | ' + 'head -c {} ') + # IO has to run for longer length for covering two scenarios in arbiter + # volume type + self.io_time = 600 if self.volume_type.find('arbiter') >= 0 else 300 + self.proc = '' + + def tearDown(self): + if self.proc: + ret = wait_for_io_to_complete([self.proc], [self.mounts[0]]) + if not ret: + raise ExecutionError('Wait for IO completion failed on client') + + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def _validate_heal(self, timeout=8): + """ + Validates `heal info` command returns in less than `timeout` value + """ + start_time = time() + ret, _, _ = heal_info(self.mnode, self.volname) + end_time = time() + self.assertEqual(ret, 0, 'Not able to query heal info status') + self.assertLess( + end_time - start_time, timeout, + 'Query of heal info of volume took more than {} ' + 'seconds'.format(timeout)) + + def _validate_io(self, delay=5): + """ + Validates IO was happening during main test, measures by looking at + time delay between issue and return of `async_communicate` + """ + start_time = time() + ret, _, err = self.proc.async_communicate() + end_time = time() + self.assertEqual(ret, 0, 'IO failed to complete with error ' + '{}'.format(err)) + self.assertGreater( + end_time - start_time, delay, + 'Unable to validate IO was happening during main test') + self.proc = '' + + def _bring_brick_offline(self, bricks_list, arb_brick=False): + """ + Bring arbiter brick offline if `arb_brick` is true else one of data + bricks will be offline'd + """ + # Pick up only `data` brick + off_brick, b_type = bricks_list[:-1], 'data' + if arb_brick: + # Pick only `arbiter` brick + off_brick, b_type = [bricks_list[-1]], 'arbiter' + elif not arb_brick and self.volume_type.find('replicated') >= 0: + # Should pick all bricks if voltype is `replicated` + off_brick = bricks_list + + ret = bring_bricks_offline(self.volname, choice(off_brick)) + self.assertTrue(ret, + 'Unable to bring `{}` brick offline'.format(b_type)) + + def _get_hashed_subvol_index(self, subvols): + """ + Return `index` of hashed_volume from list of subvols + """ + index = 0 + if self.volume_type.find('distributed') >= 0: + hashed_subvol, index = find_hashed_subvol( + subvols, '', + self.file_path.rsplit('/', 1)[1]) + self.assertIsNotNone(hashed_subvol, + 'Unable to find hashed subvolume') + return index + + def _validate_brick_down_scenario(self, + validate_heal=False, + monitor_heal=False): + """ + Refactor of common steps across volume type for validating brick down + scenario + """ + if validate_heal: + # Wait for ample amount of IO to be written to file + sleep(180) + + # Validate heal info shows o/p and exit in <8s + self._validate_heal() + + # Force start volume and verify all process are online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, 'Unable to force start volume') + + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue( + ret, 'Not able to confirm all process of volume are online') + + if monitor_heal: + # Wait for IO to be written to file + sleep(30) + + # Monitor heal and validate data was appended successfully to file + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, + 'Self heal is not completed post brick online') + + def _perform_heal_append_scenario(self): + """ + Refactor of common steps in `entry_heal` and `data_heal` tests + """ + # Find hashed subvol of the file with IO + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + index = self._get_hashed_subvol_index(subvols) + + # Bring down one of the `data` bricks of hashed subvol + self._bring_brick_offline(bricks_list=subvols[index]) + + cmd = ('{} >> {}; '.format(self._io_cmd.format('1G'), self.file_path)) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual( + ret, 0, 'Unable to append 1G of data to existing ' + 'file on mount post offline of a brick') + + # Start volume and verify all process are online + self._validate_brick_down_scenario() + + # Start conitnuous IO and monitor heal completion + cmd = ('count={}; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' + '((count--)); done;'.format(self.io_time, + self._io_cmd.format('1M'), + self.file_path)) + self.proc = g.run_async(self.client, cmd) + self._validate_brick_down_scenario(monitor_heal=True) + + # Bring down `arbiter` brick and perform validation + if self.volume_type.find('arbiter') >= 0: + self._bring_brick_offline(bricks_list=subvols[index], + arb_brick=True) + self._validate_brick_down_scenario(monitor_heal=True) + + self._validate_io() + + def test_heal_info_with_io(self): + """ + Description: Validate heal info command with IO + + Steps: + - Create and mount a 6x3 replicated volume + - Create a file and perform IO continuously on this file + - While IOs are happening issue `heal info` command and validate o/p + not taking much time + """ + cmd = ('count=90; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' + '((count--)); done;'.format(self._io_cmd.format('5M'), + self.file_path)) + self.proc = g.run_async(self.client, cmd) + + # Wait for IO to be written to file + sleep(30) + + # Validate heal info shows o/p and exit in <5s + self._validate_heal() + + # Validate IO was happening + self._validate_io() + + g.log.info('Pass: Test heal info with IO is complete') + + def test_heal_info_with_io_and_brick_down(self): + """ + Description: Validate heal info command with IO and brick down + + Steps: + - Create and mount a 6x3 replicated volume + - Create a file and perform IO continuously on this file + - While IOs are happening, bring down one of the brick where the file + is getting hashed to + - After about a period of ~5 min issue `heal info` command and + validate o/p not taking much time + - Repeat the steps for arbiter on bringing arbiter brick down + """ + cmd = ('count={}; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' + '((count--)); done;'.format(self.io_time, + self._io_cmd.format('5M'), + self.file_path)) + self.proc = g.run_async(self.client, cmd) + + # Wait for IO to be written to file + sleep(30) + + # Find hashed subvol of the file with IO + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + index = self._get_hashed_subvol_index(subvols) + + # Bring down one of the `data` bricks of hashed subvol + self._bring_brick_offline(bricks_list=subvols[index]) + + # Validate heal and bring volume online + self._validate_brick_down_scenario(validate_heal=True) + + # Bring down `arbiter` brick and perform validation + if self.volume_type.find('arbiter') >= 0: + self._bring_brick_offline(bricks_list=subvols[index], + arb_brick=True) + + # Validate heal and bring volume online + self._validate_brick_down_scenario(validate_heal=True) + + self._validate_io() + + g.log.info('Pass: Test heal info with IO and brick down is complete') + + def test_data_heal_on_file_append(self): + """ + Description: Validate appends to a self healing file (data heal check) + + Steps: + - Create and mount a 1x2 replicated volume + - Create a file of ~ 1GB from the mount + - Bring down a brick and write more data to the file + - Bring up the offline brick and validate appending data to the file + succeeds while file self heals + - Repeat the steps for arbiter on bringing arbiter brick down + """ + cmd = ('{} >> {}; '.format(self._io_cmd.format('1G'), self.file_path)) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, 'Unable to create 1G of file on mount') + + # Perform `data_heal` test + self._perform_heal_append_scenario() + + g.log.info('Pass: Test data heal on file append is complete') + + def test_entry_heal_on_file_append(self): + """ + Description: Validate appends to a self healing file (entry heal check) + + Steps: + - Create and mount a 1x2 replicated volume + - Bring down a brick and write data to the file + - Bring up the offline brick and validate appending data to the file + succeeds while file self heals + - Repeat the steps for arbiter on bringing arbiter brick down + """ + + # Perform `entry_heal` test + self._perform_heal_append_scenario() + + g.log.info('Pass: Test entry heal on file append is complete') diff --git a/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py b/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py new file mode 100644 index 000000000..198351589 --- /dev/null +++ b/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py @@ -0,0 +1,260 @@ +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.heal_ops import trigger_heal_full +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain, + is_shd_daemonized) +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online, + get_subvols) +from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (list_all_files_and_dirs_mounts, + wait_for_io_to_complete, + validate_io_procs, + collect_mounts_arequal) + + +@runs_on([['distributed-replicated', 'replicated'], + ['glusterfs', 'nfs', 'cifs']]) +class TestAFRSelfHeal(GlusterBaseClass): + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + + # Checking if failure occure before I/O was complete + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + g.log.info("Listing all files and directories is successful") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_replace_brick_self_heal_io_in_progress(self): + """ + - Create directory on mount point and write files/dirs + - Create another set of files (1K files) + - While creation of files/dirs are in progress Kill one brick + - Remove the contents of the killed brick(simulating disk replacement) + - When the IO's are still in progress, restart glusterd on the nodes + where we simulated disk replacement to bring back bricks online + - Start volume heal + - Wait for IO's to complete + - Verify whether the files are self-healed + - Calculate arequals of the mount point and all the bricks + """ + # pylint: disable=too-many-locals,too-many-statements,too-many-branches + # Create dirs with files + g.log.info('Creating dirs with file...') + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -n 2 -f 10 %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + ret, _, err = g.run(self.mounts[0].client_system, command, + user=self.mounts[0].user) + self.assertFalse(ret, err) + g.log.info("IO is successful") + + # Creating another set of files (1K files) + self.all_mounts_procs = [] + + # Create dirs with files + g.log.info('Creating 1K files...') + command = ("/usr/bin/env python %s create_files " + "-f 1500 --fixed-file-size 10k %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts[0]) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Select bricks to bring offline + bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( + self.mnode, self.volname)) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Remove the content of the killed bricks + for brick in bricks_to_bring_offline: + brick_node, brick_path = brick.split(':') + + # Removing files + command = ('cd %s ; rm -rf *' % brick_path) + ret, _, err = g.run(brick_node, command) + self.assertFalse(ret, err) + g.log.info('Files are deleted on brick %s', brick) + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Wait for self-heal-daemons to be online + ret = is_shd_daemonized(self.all_servers) + self.assertTrue(ret, "Either No self heal daemon process found") + g.log.info("All self-heal daemons are online") + + # Start healing + ret = trigger_heal_full(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Check arequals for "replicated" + all_bricks = get_all_bricks(self.mnode, self.volname) + if self.volume_type == "replicated": + + # Get arequal after bricks are online + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after successfully bringing' + 'bricks online.') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Get arequal on bricks and compare with mount_point_total + ret, arequals = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(mount_point_total, brick_total, + 'Arequals for mountpoint and brick ' + 'are not equal') + g.log.info('Arequals for mountpoint and brick are equal') + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + g.log.info("Number of subvolumes in volume %s:", num_subvols) + + # Get arequals and compare + for i in range(0, num_subvols): + + # Get arequal for first brick + subvol_brick_list = subvols_dict['volume_subvols'][i] + ret, arequal = collect_bricks_arequal(subvol_brick_list[0]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + first_brick_total = arequal[0].splitlines()[-1].split(':')[-1] + + # Get arequal for every brick and compare with first brick + ret, arequals = collect_bricks_arequal(subvol_brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(first_brick_total, brick_total, + 'Arequals for subvol and brick are ' + 'not equal') + g.log.info('Arequals for subvol and brick are equal') diff --git a/tests/functional/afr/test_self_heal_when_dir_quota_exceeded.py b/tests/functional/afr/test_self_heal_when_dir_quota_exceeded.py index 4648c0f68..22ff5e151 100644 --- a/tests/functional/afr/test_self_heal_when_dir_quota_exceeded.py +++ b/tests/functional/afr/test_self_heal_when_dir_quota_exceeded.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,8 +20,8 @@ Test cases in this module tests whether SHD heals the files in a directory when directory quota is exceeded. """ - from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.brick_libs import (get_all_bricks, @@ -47,7 +47,7 @@ class HealFilesWhenDirQuotaExceeded(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volumes if cls.volume_type == "replicated": @@ -60,11 +60,9 @@ class HealFilesWhenDirQuotaExceeded(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -78,27 +76,29 @@ class HealFilesWhenDirQuotaExceeded(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): - - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - GlusterBaseClass.tearDownClass.im_func(cls) + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() - def test_heal_when_dir_quota_exceeded_(self): + def test_heal_when_dir_quota_exceeded(self): # Create a directory to set the quota_limit_usage path = "/dir" g.log.info("Creating a directory") self.all_mounts_procs = [] for mount_object in self.mounts: - cmd = ("python %s create_deep_dir -d 0 -l 0 %s%s " - % (self.script_upload_path, mount_object.mountpoint, - path)) + cmd = "/usr/bin/env python %s create_deep_dir -d 0 -l 0 %s%s" % ( + self.script_upload_path, + mount_object.mountpoint, path) ret = g.run(mount_object.client_system, cmd) self.assertTrue(ret, "Failed to create directory on mountpoint") g.log.info("Directory created successfully on mountpoint") diff --git a/tests/functional/afr/test_self_heal_with_diff_algorithm.py b/tests/functional/afr/test_self_heal_with_diff_algorithm.py new file mode 100644 index 000000000..68472cc14 --- /dev/null +++ b/tests/functional/afr/test_self_heal_with_diff_algorithm.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test self heal when data-self-heal-algorithm option is set to diff. +""" + +from random import sample + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_ops import (volume_start, + set_volume_options) +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols) + + +@runs_on([['arbiter', 'distributed-arbiter', 'replicated', + 'distributed-replicated'], ['glusterfs']]) +class TestSelfHealWithDiffAlgorithm(GlusterBaseClass): + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Unable to setup and mount volume") + g.log.info("Volume created and mounted successfully") + + def tearDown(self): + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + g.log.info("Volume unmounted and deleted successfully") + + # Calling GlusterBaseClass Teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_with_diff_algorithm(self): + """ + Test Steps: + 1. Create a replicated/distributed-replicate volume and mount it + 2. Set data/metadata/entry-self-heal to off and + data-self-heal-algorithm to diff + 3. Create few files inside a directory with some data + 4. Check arequal of the subvol and all the bricks in the subvol should + have same checksum + 5. Bring down a brick from the subvol and validate it is offline + 6. Modify the data of existing files under the directory + 7. Bring back the brick online and wait for heal to complete + 8. Check arequal of the subvol and all the brick in the same subvol + should have same checksum + """ + + # Setting options + for key, value in (("data-self-heal", "off"), + ("metadata-self-heal", "off"), + ("entry-self-heal", "off"), + ("data-self-heal-algorithm", "diff")): + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertTrue(ret, 'Failed to set %s to %s.' % (key, value)) + g.log.info("%s set to %s successfully", key, value) + + # Create few files under a directory with data + mountpoint = self.mounts[0].mountpoint + client = self.mounts[0].client_system + + cmd = ("mkdir %s/test_diff_self_heal ; cd %s/test_diff_self_heal ;" + "for i in `seq 1 100` ; do dd if=/dev/urandom of=file.$i " + " bs=1M count=1; done;" % (mountpoint, mountpoint)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Check arequal checksum of all the bricks is same + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") + + # List a brick in each subvol and bring them offline + brick_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brick_to_bring_offline.extend(sample(subvol, 1)) + + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, "Unable to bring brick: {} offline".format( + brick_to_bring_offline)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, "Brick:{} is still online".format( + brick_to_bring_offline)) + + # Modify files under test_diff_self_heal directory + cmd = ("for i in `seq 1 100` ; do truncate -s 0 file.$i ; " + "truncate -s 2M file.$i ; done;") + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to modify the files") + g.log.info("Successfully modified files") + + # Start volume with force to bring all bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Volume start with force failed") + g.log.info("Volume: %s started successfully", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online", + self.volname)) + + # Monitor heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname, + interval_check=10), + "Heal failed after 20 mins") + + # Check are there any files in split-brain + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + + # Check arequal checksum of all the bricks is same + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py new file mode 100644 index 000000000..d5b6d5d43 --- /dev/null +++ b/tests/functional/afr/test_self_heal_with_expand_volume.py @@ -0,0 +1,221 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import (set_file_permissions, + occurences_of_pattern_in_file) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.lib_utils import (add_user, del_user) +from glustolibs.gluster.volume_libs import (get_subvols, expand_volume) + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestHealWithExpandVolume(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Create non-root users + self.users = ('qa_user', 'qa_admin') + for user in self.users: + if not add_user(self.first_client, user): + raise ExecutionError("Failed to create non-root user {}" + .format(user)) + g.log.info("Successfully created non-root users") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + ret, _, _ = g.run(self.first_client, + "rm -rf /home/{}".format(user)) + if ret: + raise ExecutionError("Failed to remove home dir of " + "non-root user") + g.log.info("Successfully deleted all users") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + self.bricks_to_bring_offline.append(choice(subvols[0])) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_if_there_are_files_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _expand_volume_and_wait_for_rebalance_to_complete(self): + """Expand volume and wait for rebalance to complete""" + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=6000) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + def test_self_heal_and_add_brick_with_data_from_diff_users(self): + """ + Test case: + 1. Created a 2X3 volume. + 2. Mount the volume using FUSE and give 777 permissions to the mount. + 3. Added a new user. + 4. Login as new user and created 100 files from the new user: + for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done + 5. Kill a brick which is part of the volume. + 6. On the mount, login as root user and create 1000 files: + for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done + 7. On the mount, login as new user, and copy existing data to + the mount. + 8. Start volume using force. + 9. While heal is in progress, add-brick and start rebalance. + 10. Wait for rebalance and heal to complete, + 11. Check for MSGID: 108008 errors in rebalance logs. + """ + # Change permissions of mount point to 777 + ret = set_file_permissions(self.first_client, self.mountpoint, + '-R 777') + self.assertTrue(ret, "Unable to change mount point permissions") + g.log.info("Mount point permissions set to 777") + + # Create 100 files from non-root user + cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom " + "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0], + self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files from non-root user") + + # Kill one brick which is part of the volume + self._bring_bricks_offline() + + # Create 1000 files from root user + cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i" + " bs=10M count=1;done" % self.mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to creare files from root user") + + # On the mount, login as new user, and copy existing data to + # the mount + cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/" + "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;" + "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to copy files from non-root user") + + # Check if there are files to be healed + self._check_if_there_are_files_to_be_healed() + + # Start the vol using force + self._restart_volume_and_bring_all_offline_bricks_online() + + # Add bricks to volume and wait for heal to complete + self._expand_volume_and_wait_for_rebalance_to_complete() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check for MSGID: 108008 errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "MSGID: 108008", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + g.log.info("Expanding volume successful and no MSGID: 108008 " + "errors see in rebalance logs") diff --git a/tests/functional/afr/test_self_heal_with_quota_object_limit.py b/tests/functional/afr/test_self_heal_with_quota_object_limit.py index ff308c3f6..e340c0a39 100644 --- a/tests/functional/afr/test_self_heal_with_quota_object_limit.py +++ b/tests/functional/afr/test_self_heal_with_quota_object_limit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,8 +20,8 @@ Test cases in this module tests whether SHD heals the files in a directory when quota-object-limit is set. """ - from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.brick_libs import (get_all_bricks, @@ -47,7 +47,7 @@ class HealFilesWhenQuotaObjectLimitExceeded(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volumes if cls.volume_type == "replicated": @@ -60,11 +60,9 @@ class HealFilesWhenQuotaObjectLimitExceeded(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -78,17 +76,19 @@ class HealFilesWhenQuotaObjectLimitExceeded(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): - - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - GlusterBaseClass.tearDownClass.im_func(cls) + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() def test_heal_when_quota_object_limit_exceeded(self): # Create a directory to set the quota_limit_objects @@ -96,9 +96,9 @@ class HealFilesWhenQuotaObjectLimitExceeded(GlusterBaseClass): g.log.info("Creating a directory") self.all_mounts_procs = [] for mount_object in self.mounts: - cmd = ("python %s create_deep_dir -d 0 -l 0 %s%s " - % (self.script_upload_path, mount_object.mountpoint, - path)) + cmd = "/usr/bin/env python %s create_deep_dir -d 0 -l 0 %s%s" % ( + self.script_upload_path, + mount_object.mountpoint, path) ret = g.run(mount_object.client_system, cmd) self.assertTrue(ret, "Failed to create directory on mountpoint") g.log.info("Directory created successfully on mountpoint") @@ -137,8 +137,10 @@ class HealFilesWhenQuotaObjectLimitExceeded(GlusterBaseClass): for mount_object in self.mounts: g.log.info("Creating Files on %s:%s", mount_object.client_system, path) - cmd = ("python %s create_files -f 3 --base-file-name file-0 %s%s" - % (self.script_upload_path, mount_object.mountpoint, path)) + cmd = ("/usr/bin/env python %s create_files -f 3 " + "--base-file-name file-0 %s%s" % ( + self.script_upload_path, + mount_object.mountpoint, path)) ret, _, _ = g.run(mount_object.client_system, cmd) self.assertEqual(ret, 0, ("Failed to create files on %s", path)) g.log.info("Files created successfully on mountpoint") @@ -160,8 +162,9 @@ class HealFilesWhenQuotaObjectLimitExceeded(GlusterBaseClass): # Try creating 5 more files, which should fail as the quota limit # exceeds - cmd = ("python %s create_files -f 5 --base-file-name file-1 %s%s" - % (self.script_upload_path, mount_object.mountpoint, path)) + cmd = ("/usr/bin/env python %s create_files -f 5 --base-file-name " + "file-1 %s%s" % (self.script_upload_path, + mount_object.mountpoint, path)) ret, _, _ = g.run(mount_object.client_system, cmd) self.assertNotEqual(ret, 0, ("Creating 5 files succeeded while it was" "not supposed to.")) diff --git a/tests/functional/afr/test_shd_should_not_crash_executed_heal_info.py b/tests/functional/afr/test_shd_should_not_crash_executed_heal_info.py index 189d70af9..f8a9725e3 100755 --- a/tests/functional/afr/test_shd_should_not_crash_executed_heal_info.py +++ b/tests/functional/afr/test_shd_should_not_crash_executed_heal_info.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import set_volume_options @@ -37,16 +38,14 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -63,7 +62,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -98,7 +97,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_shd_should_not_crash_executed_heal_info(self): """ @@ -131,8 +130,10 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files -f 10 --fixed-file-size 1M %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 10 " + "--fixed-file-size 1M %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -167,7 +168,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files " + command = ("/usr/bin/env python %s create_files " "-f %s " "--fixed-file-size 1k " "--base-file-name new_file " @@ -237,7 +238,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files " + command = ("/usr/bin/env python %s create_files " "-f %s " "--fixed-file-size 1k " "--base-file-name new_new_file " diff --git a/tests/functional/afr/test_split_brain_with_hard_link_file.py b/tests/functional/afr/test_split_brain_with_hard_link_file.py new file mode 100644 index 000000000..a8248fb72 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_hard_link_file.py @@ -0,0 +1,175 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals, unused-variable +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion, + is_heal_complete) + +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Override Volumes + if cls.volume_type == "distributed-replicated": + # Define x3 distributed-replicated volume + cls.volume['voltype'] = { + 'type': 'distributed-replicated', + 'dist_count': 2, + 'replica_count': 3, + 'transport': 'tcp'} + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + @classmethod + def tearDownClass(cls): + + # Cleanup Volume + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + g.log.info("Successful in cleaning up Volume %s", cls.volname) + + cls.get_super_method(cls, 'tearDownClass')() + + def _test_brick_down_with_file_rename(self, pfile, rfile, brick): + # Bring brick offline + g.log.info('Bringing brick %s offline', brick) + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Failed to bring brick %s offline' + % brick) + + ret = are_bricks_offline(self.mnode, self.volname, + [brick]) + self.assertTrue(ret, 'Brick %s is not offline' + % brick) + g.log.info('Bringing brick %s offline is successful', + brick) + + # Rename file + cmd = ("mv %s/%s %s/%s" + % (self.mounts[0].mountpoint, pfile, + self.mounts[0].mountpoint, rfile)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "rename of file failed") + + # Bring brick back online + g.log.info('Bringing brick %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, + brick) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick) + g.log.info('Bringing brick %s online is successful', brick) + + def test_afr_heal_with_brickdown_hardlink(self): + """ + Steps: + 1. Create 2 * 3 distribute replicate volume and disable all heals + 2. Create a file and 3 hardlinks to it from fuse mount. + 3. Kill brick4, rename HLINK1 to an appropriate name so that + it gets hashed to replicate-1 + 4. Likewise rename HLINK3 and HLINK7 as well, killing brick5 and brick6 + respectively each time. i.e. a different brick of the 2nd + replica is down each time. + 5. Now enable shd and let selfheals complete. + 6. Heal should complete without split-brains. + """ + bricks_list = get_all_bricks(self.mnode, self.volname) + options = {"metadata-self-heal": "off", + "entry-self-heal": "off", + "data-self-heal": "off", + "self-heal-daemon": "off"} + g.log.info("setting options %s", options) + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for" + "volume %s" % (options, self.volname))) + g.log.info("Successfully set %s for volume %s", options, self.volname) + + cmd = ("touch %s/FILE" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "file creation failed") + + # Creating a hardlink for the file created + for i in range(1, 4): + ret = create_link_file(self.clients[0], + '{}/FILE'.format(self.mounts[0].mountpoint), + '{}/HLINK{}'.format + (self.mounts[0].mountpoint, i)) + self.assertTrue(ret, "Unable to create hard link file ") + + # Bring brick3 offline,Rename file HLINK1,and bring back brick3 online + self._test_brick_down_with_file_rename("HLINK1", "NEW-HLINK1", + bricks_list[3]) + + # Bring brick4 offline,Rename file HLINK2,and bring back brick4 online + self._test_brick_down_with_file_rename("HLINK2", "NEW-HLINK2", + bricks_list[4]) + + # Bring brick5 offline,Rename file HLINK3,and bring back brick5 online + self._test_brick_down_with_file_rename("HLINK3", "NEW-HLINK3", + bricks_list[5]) + + # Setting options + options = {"self-heal-daemon": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Check data on mount point + cmd = ("ls %s" % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "failed to fetch data from mount point") diff --git a/tests/functional/afr/test_split_brain_with_node_reboot.py b/tests/functional/afr/test_split_brain_with_node_reboot.py new file mode 100644 index 000000000..9b630ba75 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_node_reboot.py @@ -0,0 +1,149 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +from unittest import SkipTest +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.io.utils import (run_linux_untar, run_crefi, + wait_for_io_to_complete) + + +@runs_on([['replicated', 'distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for availability of atleast 3 clients + if len(cls.clients) < 3: + raise SkipTest("This test requires atleast 3 clients") + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + cls.list_of_io_processes = [] + cls.is_io_running = False + + def tearDown(self): + + # If I/O processes are running wait from them to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + self.mounts): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_afr_node_reboot_self_heal(self): + """ + Steps: + 1. Create *3 replica volume + 2. Mount the volume on 3 clients + 3. Run following workload from clients + Client 1: Linux Untars + Client 2: Lookups ls + Client 3: Lookups du + 4. Create a directory on mount point + 5. Create deep dirs and file in the directory created at step 4 + 6. Perform node reboot + 7. Check for heal status + 8. Reboot another node + 9. Check for heal status + """ + + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar from client 1 + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Run lookup operation ls from client 2 + cmd = ("cd {}; for i in `seq 1 1000000`;do du -sh; done" + .format(self.mounts[1].mountpoint)) + ret = g.run_async(self.mounts[1].client_system, cmd) + self.list_of_io_processes += [ret] + + # Run lookup operation du from client 3 + cmd = ("cd {}; for i in `seq 1 1000000`;do ls -laRt; done" + .format(self.mounts[2].mountpoint)) + ret = g.run_async(self.mounts[2].client_system, cmd) + self.list_of_io_processes += [ret] + + # Create a dir to start crefi tool + self.linux_untar_dir = "{}/{}".format(self.mounts[3].mountpoint, + "crefi") + ret = mkdir(self.clients[3], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir for crefi") + + # Create deep dirs and files on mount point from client 4 + list_of_fops = ("create", "rename", "chmod", "chown", "chgrp", + "hardlink", "truncate", "setxattr") + for fops in list_of_fops: + ret = run_crefi(self.clients[3], + self.linux_untar_dir, 10, 3, 3, thread=4, + random_size=True, fop=fops, minfs=0, + maxfs=102400, multi=True, random_filename=True) + self.assertTrue(ret, "crefi failed during {}".format(fops)) + g.log.info("crefi PASSED FOR fop %s", fops) + g.log.info("IOs were successful using crefi") + + for server_num in (1, 2): + # Perform node reboot for servers + g.log.info("Rebooting %s", self.servers[server_num]) + ret = g.run_async(self.servers[server_num], "reboot") + self.assertTrue(ret, 'Failed to reboot node') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') diff --git a/tests/functional/afr/test_volume_set_options.py b/tests/functional/afr/test_volume_set_options.py index 86067ea7e..27d62ac2d 100755 --- a/tests/functional/afr/test_volume_set_options.py +++ b/tests/functional/afr/test_volume_set_options.py @@ -42,7 +42,7 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -84,7 +84,7 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_volume_set_option_data_self_heal(self): """ @@ -358,10 +358,10 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): # Compare dicts with file size g.log.info('Compare arequal size on brick before bringing offline and' ' after bringing online') - self.assertFalse(cmp(arequal_before_brick_offline, - arequal_after_brick_offline), - 'arequal size on brick before bringing offline and ' - 'after bringing online are not equal') + self.assertEqual( + arequal_before_brick_offline, arequal_after_brick_offline, + 'arequal size on brick before bringing offline and ' + 'after bringing online are not equal') g.log.info('arequal size on brick before bringing offline and ' 'after bringing online are equal') diff --git a/tests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py b/tests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py index 18125933a..aa2cc0742 100755 --- a/tests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py +++ b/tests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,9 +14,10 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - import time + from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import ( @@ -41,16 +42,14 @@ class ClientSideQuorumRestored(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -59,7 +58,7 @@ class ClientSideQuorumRestored(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -101,7 +100,7 @@ class ClientSideQuorumRestored(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_write_io_mount_point_resumed_quorum_restored_x3(self): """ @@ -151,8 +150,9 @@ class ClientSideQuorumRestored(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Creating files - cmd = ("python %s create_files -f 30 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s create_files -f 30 %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) |