diff options
Diffstat (limited to 'tests/functional/afr/heal')
15 files changed, 2427 insertions, 213 deletions
diff --git a/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py new file mode 100644 index 000000000..df05dd86c --- /dev/null +++ b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py @@ -0,0 +1,199 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep +from random import sample + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols, expand_volume, + wait_for_volume_process_to_be_online) +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter', 'replicated', + 'distributed-replicated'], ['glusterfs']]) +class TestAfrSelfHealAddBrickRebalance(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients % s", + cls.clients) + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Unable to setup and mount volume") + + def tearDown(self): + + # Wait if any IOs are pending from the test + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass Teardown + self.get_super_method(self, 'tearDown')() + + def test_afr_self_heal_add_brick_rebalance(self): + """ + Test Steps: + 1. Create a replicated/distributed-replicate volume and mount it + 2. Start IO from the clients + 3. Bring down a brick from the subvol and validate it is offline + 4. Bring back the brick online and wait for heal to complete + 5. Once the heal is completed, expand the volume. + 6. Trigger rebalance and wait for rebalance to complete + 7. Validate IO, no errors during the steps performed from step 2 + 8. Check arequal of the subvol and all the brick in the same subvol + should have same checksum + """ + # Start IO from the clients + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 30 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + # List a brick in each subvol and bring them offline + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + brick_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brick_to_bring_offline.extend(sample(subvol, 1)) + + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, "Unable to bring brick: {} offline".format( + brick_to_bring_offline)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, "Brick:{} is still online".format( + brick_to_bring_offline)) + + # Wait for 10 seconds for IO to be generated + sleep(10) + + # Start volume with force to bring all bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Volume start with force failed") + g.log.info("Volume: %s started successfully", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online", + self.volname)) + + # Monitor heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname, + interval_check=10), + "Heal failed after 20 mins") + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + + # Expanding volume by adding bricks to the volume when IO in progress + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume when IO in " + "progress on volume %s", self.volname)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the " + "volume %s", self.volname) + + # Without sleep the next step will fail with Glusterd Syncop locking. + sleep(2) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on " + "the volume %s", self.volname) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.io_validation_complete = True + self.assertTrue(ret, "IO failed on some of the clients") + self.all_mounts_procs *= 0 + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + self.assertTrue(ret, "Failed to list all files and dirs") + + # Check arequal checksum of all the bricks is same + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_data_split_brain_resolution.py b/tests/functional/afr/heal/test_data_split_brain_resolution.py index e1284cad6..73fd144c1 100644 --- a/tests/functional/afr/heal/test_data_split_brain_resolution.py +++ b/tests/functional/afr/heal/test_data_split_brain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ """ from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.misc.misc_libs import upload_scripts @@ -45,7 +46,7 @@ class HealDataSplitBrain(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volume if cls.volume_type == "replicated": @@ -57,11 +58,9 @@ class HealDataSplitBrain(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -75,17 +74,19 @@ class HealDataSplitBrain(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): - - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - GlusterBaseClass.tearDownClass.im_func(cls) + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() def verify_brick_arequals(self): g.log.info("Fetching bricks for the volume: %s", self.volname) diff --git a/tests/functional/afr/heal/test_dir_time_stamp_restoration.py b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py new file mode 100644 index 000000000..6a4ef2a19 --- /dev/null +++ b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py @@ -0,0 +1,160 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Check if parent directory timestamps are restored after an entry heal. +""" +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + select_volume_bricks_to_bring_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.glusterdir import (mkdir, rmdir) +from glustolibs.gluster.glusterfile import (get_fattr, get_file_stat) +from glustolibs.gluster.volume_libs import set_volume_options +from glustolibs.gluster.heal_libs import monitor_heal_completion + + +@runs_on([['replicated'], + ['glusterfs']]) +class TestDirTimeStampRestore(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def are_mdata_xattrs_equal(self): + """Check if atime/mtime/ctime in glusterfs.mdata xattr are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + fattr = get_fattr(server, '%s/%s' % (brick, "dir1"), + 'trusted.glusterfs.mdata') + self.assertIsNotNone(fattr, 'Unable to get mdata xattr') + timestamps.append(fattr) + + g.log.debug("mdata list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def are_stat_timestamps_equal(self): + """Check if atime/mtime/ctime in stat info are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + stat_data = get_file_stat(server, "%s/dir1" % brick) + ts_string = "{}-{}-{}".format(stat_data['epoch_atime'], + stat_data['epoch_mtime'], + stat_data['epoch_ctime']) + timestamps.append(ts_string) + + g.log.debug("stat list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def perform_test(self, ctime): + """ + Testcase steps: + 1. Enable/disable features,ctime based on function argument. + 2. Create a directory on the mount point. + 3. Kill a brick and create a file inside the directory. + 4. Bring the brick online. + 5. Trigger heal and wait for its completion. + 6. Verify that the atime, mtime and ctime of the directory are same on + all bricks of the replica. + """ + if ctime: + option = {'features.ctime': 'on'} + else: + option = {'features.ctime': 'off'} + ret = set_volume_options(self.mnode, self.volname, option) + self.assertTrue(ret, 'failed to set option %s on %s' + % (option, self.volume)) + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + dirpath = '{}/dir1'.format(m_point) + ret = mkdir(client, dirpath) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + cmd = 'touch {}/file1'.format(dirpath) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Unable to create file from mount point') + + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + if ctime: + ret = self.are_mdata_xattrs_equal() + self.assertTrue(ret, "glusterfs.mdata mismatch for {}" + .format(dirpath)) + else: + ret = self.are_stat_timestamps_equal() + self.assertTrue(ret, "stat mismatch for {}".format(dirpath)) + + ret = rmdir(client, dirpath, force=True) + self.assertTrue(ret, 'Unable to delete directory from mount point') + + def test_dir_time_stamp_restoration(self): + """ + Create pending entry self-heal on a replica volume and verify that + after the heal is complete, the atime, mtime and ctime of the parent + directory are identical on all bricks of the replica. + + The test is run with features.ctime enabled as well as disabled. + """ + self.perform_test(ctime=True) + self.perform_test(ctime=False) diff --git a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py index 64f5254a5..3fe682e59 100755 --- a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py +++ b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py @@ -46,7 +46,7 @@ class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass): which is used in tests """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -74,8 +74,8 @@ class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass): raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") g.log.info("Successful in Unmount Volume and Cleanup Volume") - # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDownClass.im_func(self) + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_existing_glustershd_should_take_care_of_self_healing(self): """ diff --git a/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py new file mode 100644 index 000000000..163596bb7 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py @@ -0,0 +1,175 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline, + bring_bricks_offline, + get_online_bricks_list, + are_bricks_online) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.glusterfile import set_fattr, get_fattr +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal + + +@runs_on([['replicated'], ['glusterfs']]) +class TestHealForConservativeMergeWithTwoBricksBlame(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it. + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_brick_offline_and_check(self, brick): + """Brings brick offline an checks if it is offline or not""" + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, "Unable to bring brick: {} offline".format(brick)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, "Brick:{} is still online".format(brick)) + + def _get_fattr_for_the_brick(self, brick): + """Get xattr of trusted.afr.volname-client-0 for the given brick""" + host, fqpath = brick.split(":") + fqpath = fqpath + "/dir1" + fattr = "trusted.afr.{}-client-0".format(self.volname) + return get_fattr(host, fqpath, fattr, encode="hex") + + def _check_peers_status(self): + """Validates peers are connected or not""" + count = 0 + while count < 4: + if self.validate_peers_are_connected(): + return + sleep(5) + count += 1 + self.fail("Peers are not in connected state") + + def test_heal_for_conservative_merge_with_two_bricks_blame(self): + """ + 1) Create 1x3 volume and fuse mount the volume + 2) On mount created a dir dir1 + 3) Pkill glusterfsd on node n1 (b2 on node2 and b3 and node3 up) + 4) touch f{1..10} on the mountpoint + 5) b2 and b3 xattrs would be blaming b1 as files are created while + b1 is down + 6) Reset the b3 xattrs to NOT blame b1 by using setattr + 7) Now pkill glusterfsd of b2 on node2 + 8) Restart glusterd on node1 to bring up b1 + 9) Now bricks b1 online , b2 down, b3 online + 10) touch x{1..10} under dir1 itself + 11) Again reset xattr on node3 of b3 so that it doesn't blame b2, + as done for b1 in step 6 + 12) Do restart glusterd on node2 hosting b2 to bring all bricks online + 13) Check for heal info, split-brain and arequal for the bricks + """ + # pylint: disable=too-many-locals + # Create dir `dir1/` on mountpont + path = self.mounts[0].mountpoint + "/dir1" + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume") + brick1, brick2, brick3 = all_bricks + + # Bring first brick offline + self._bring_brick_offline_and_check(brick1) + + # touch f{1..10} files on the mountpoint + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch f$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Check b2 and b3 xattrs are blaming b1 and are same + self.assertEqual(self._get_fattr_for_the_brick(brick2), + self._get_fattr_for_the_brick(brick3), + "Both the bricks xattrs are not blaming " + "brick: {}".format(brick1)) + + # Reset the xattrs of dir1 on b3 for brick b1 + first_xattr_to_reset = "trusted.afr.{}-client-0".format(self.volname) + xattr_value = "0x000000000000000000000000" + host, brick_path = brick3.split(":") + brick_path = brick_path + "/dir1" + ret = set_fattr(host, brick_path, first_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Kill brick2 on the node2 + self._bring_brick_offline_and_check(brick2) + + # Restart glusterd on node1 to bring the brick1 online + self.assertTrue(restart_glusterd([brick1.split(":")[0]]), "Unable to " + "restart glusterd") + # checking for peer status post glusterd restart + self._check_peers_status() + + # Check if the brick b1 on node1 is online or not + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Unable to fetch online bricks") + self.assertIn(brick1, online_bricks, "Brick:{} is still offline after " + "glusterd restart".format(brick1)) + + # Create 10 files under dir1 naming x{1..10} + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch x$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Reset the xattrs from brick3 on to brick2 + second_xattr_to_reset = "trusted.afr.{}-client-1".format(self.volname) + ret = set_fattr(host, brick_path, second_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Bring brick2 online + self.assertTrue(restart_glusterd([brick2.split(":")[0]]), "Unable to " + "restart glusterd") + self._check_peers_status() + + self.assertTrue(are_bricks_online(self.mnode, self.volname, [brick2])) + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + "Conservative merge of files failed") + + # Check arequal checksum of all the bricks is same + ret, arequal_from_the_bricks = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, "Arequal is collected successfully across the" + " bricks in the subvol {}".format(all_bricks)) + self.assertEqual(len(set(arequal_from_the_bricks)), 1, "Arequal is " + "same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_heal_info_no_hang.py b/tests/functional/afr/heal/test_heal_info_no_hang.py new file mode 100644 index 000000000..82f8b0598 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_info_no_hang.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + heal info completes when there is ongoing I/O and a lot of pending heals. +""" +import random +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.io.utils import run_linux_untar +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['distributed-replicated'], + ['glusterfs']]) +class TestHealInfoNoHang(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + self.is_io_running = False + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + + def tearDown(self): + if self.is_io_running: + if not self._wait_for_untar_completion(): + g.log.error("I/O failed to stop on clients") + + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def _wait_for_untar_completion(self): + """Wait for the kernel untar to complete""" + has_process_stopped = [] + for proc in self.list_of_io_processes: + try: + ret, _, _ = proc.async_communicate() + if not ret: + has_process_stopped.append(False) + has_process_stopped.append(True) + except ValueError: + has_process_stopped.append(True) + return all(has_process_stopped) + + def _does_heal_info_complete_within_timeout(self): + """Check if heal info CLI completes within a specific timeout""" + # We are just assuming 1 entry takes one second to process, which is + # a very high number but some estimate is better than a random magic + # value for timeout. + timeout = self.num_entries * 1 + + # heal_info_data = get_heal_info(self.mnode, self.volname) + cmd = "timeout %s gluster volume heal %s info" % (timeout, + self.volname) + ret, _, _ = g.run(self.mnode, cmd) + if ret: + return False + return True + + def test_heal_info_no_hang(self): + """ + Testcase steps: + 1. Start kernel untar on the mount + 2. While untar is going on, kill a brick of the replica. + 3. Wait for the untar to be over, resulting in pending heals. + 4. Get the approx. number of pending heals and save it + 5. Bring the brick back online. + 6. Trigger heal + 7. Run more I/Os with dd command + 8. Run heal info command and check that it completes successfully under + a timeout that is based on the no. of heals in step 4. + """ + self.list_of_io_processes = [] + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Kill brick resulting in heal backlog. + brick_to_bring_offline = random.choice(self.bricks_list) + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' + % brick_to_bring_offline) + ret = are_bricks_offline(self.mnode, self.volname, + [brick_to_bring_offline]) + self.assertTrue(ret, 'Bricks %s are not offline' + % brick_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + brick_to_bring_offline) + + ret = self._wait_for_untar_completion() + self.assertFalse(ret, "IO didn't complete or failed on client") + self.is_io_running = False + + # Get approx. no. of entries to be healed. + cmd = ("gluster volume heal %s statistics heal-count | grep Number " + "| awk '{sum+=$4} END {print sum/2}'" % self.volname) + ret, self.num_entries, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to get heal-count statistics") + + # Restart the down bricks + ret = bring_bricks_online(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick_to_bring_offline) + g.log.info('Bringing brick %s online is successful', + brick_to_bring_offline) + # Trigger heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + g.log.info('Index heal launched') + + # Run more I/O + cmd = ("for i in `seq 1 10`; do dd if=/dev/urandom of=%s/file_$i " + "bs=1M count=100; done" % self.mounts[0].mountpoint) + ret = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + + # Get heal info + ret = self._does_heal_info_complete_within_timeout() + self.assertTrue(ret, 'Heal info timed out') + g.log.info('Heal info completed succesfully') diff --git a/tests/functional/afr/heal/test_heal_info_while_accessing_file.py b/tests/functional/afr/heal/test_heal_info_while_accessing_file.py index 2fa7b194c..24450702b 100644 --- a/tests/functional/afr/heal/test_heal_info_while_accessing_file.py +++ b/tests/functional/afr/heal/test_heal_info_while_accessing_file.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -13,8 +13,8 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import get_subvols @@ -41,16 +41,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -73,7 +71,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -118,7 +116,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_heal_info_shouldnot_list_files_being_accessed(self): """ @@ -152,8 +150,9 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Creating files - cmd = ("python %s create_files -f 100 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = "/usr/bin/env python %s create_files -f 100 %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) @@ -214,9 +213,7 @@ class TestSelfHeal(GlusterBaseClass): # Compare dicts before accessing and while accessing g.log.info('Comparing entries before modifying and while modifying...') - ret = cmp(entries_before_accessing, entries_while_accessing) - self.assertEqual(ret, 0, 'Entries before modifying and while modifying' - 'are not equal') + self.assertDictEqual(entries_before_accessing, entries_while_accessing) g.log.info('Comparison entries before modifying and while modifying' 'finished successfully.') diff --git a/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py new file mode 100644 index 000000000..efd2f8745 --- /dev/null +++ b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py @@ -0,0 +1,186 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, wait_for_volume_process_to_be_online, + setup_volume, cleanup_volume) +from glustolibs.gluster.lib_utils import get_servers_bricks_dict +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_ops import replace_brick +from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, + do_bricks_exist_in_shd_volfile, + is_shd_daemonized) +from glustolibs.gluster.volume_ops import get_volume_list + + +class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which + verifies the self-heal daemon process on multiple volumes running. + """ + def setUp(self): + """ + setup volume and initialize necessary variables + which is used in tests + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume for all the volume types + self.volume_configs = [] + for volume_type in self.default_volume_type_config: + self.volume_configs.append( + {'name': 'testvol_%s' % volume_type, + 'servers': self.servers, + 'voltype': self.default_volume_type_config[volume_type]}) + + for volume_config in self.volume_configs[1:]: + ret = setup_volume(mnode=self.mnode, + all_servers_info=self.all_servers_info, + volume_config=volume_config, + multi_vol=True) + volname = volume_config['name'] + if not ret: + raise ExecutionError("Failed to setup Volume" + " %s" % volname) + g.log.info("Successful in setting volume %s", volname) + + # Verify volume's all process are online for 60 sec + ret = wait_for_volume_process_to_be_online(self.mnode, volname, 60) + if not ret: + raise ExecutionError("Volume %s : All process are not online" + % volname) + g.log.info("Successfully Verified volume %s processes are online", + volname) + + # Verfiy glustershd process releases its parent process + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + self.glustershd = "/var/lib/glusterd/glustershd/glustershd-server.vol" + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + + # Cleanup volume + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup Volume %s" % volume) + g.log.info("Successfully Cleaned up all Volumes") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_impact_of_replace_brick_on_glustershd(self): + """ + Test Script to verify the glustershd server vol file + has only entries for replicate volumes + 1.Create multiple volumes and start all volumes + 2.Check the glustershd processes - Only 1 glustershd should be listed + 3.Do replace brick on the replicate volume + 4.Confirm that the brick is replaced + 5.Check the glustershd processes - Only 1 glustershd should be listed + and pid should be different + 6.glustershd server vol should be updated with new bricks + """ + # Check the self-heal daemon process + ret, glustershd_pids = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, ("Either no self heal daemon process found or " + "more than one self heal daemon process " + "found : %s" % glustershd_pids)) + g.log.info("Successful in getting single self heal daemon process" + " on all nodes %s", self.servers) + + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + + # Log Volume Info and Status before replacing brick + ret = log_volume_info_and_status(self.mnode, volume) + self.assertTrue(ret, ("Logging volume info and status " + "failed on volume %s", volume)) + g.log.info("Successful in logging volume info and status " + "of volume %s", volume) + + # Selecting a random source brick to replace + src_brick = choice(get_all_bricks(self.mnode, volume)) + src_node, original_brick = src_brick.split(":") + + # Creating a random destination brick in such a way + # that the brick is select from the same node but always + # picks a different from the original brick + list_of_bricks = [ + brick for brick in get_servers_bricks_dict( + src_node, self.all_servers_info)[src_node] + if brick not in original_brick] + dst_brick = ('{}:{}/{}_replaced'.format( + src_node, choice(list_of_bricks), + original_brick.split('/')[::-1][0])) + + # Replace brick for the volume + ret, _, _ = replace_brick(self.mnode, volume, + src_brick, dst_brick) + self.assertFalse(ret, "Failed to replace brick " + "from the volume %s" % volume) + g.log.info("Successfully replaced faulty brick from " + "the volume %s", volume) + + # Verify all volume process are online + ret = wait_for_volume_process_to_be_online(self.mnode, volume) + self.assertTrue(ret, "Volume %s : All process are not online" + % volume) + g.log.info("Volume %s : All process are online", volume) + + # Check the self-heal daemon process after replacing brick + ret, pid_after_replace = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, "Either no self heal daemon process " + "found or more than one self heal " + "daemon process found : %s" % pid_after_replace) + g.log.info("Successful in getting Single self heal " + " daemon process on all nodes %s", self.servers) + + # Compare the glustershd pids + self.assertNotEqual(glustershd_pids, pid_after_replace, + "Self heal daemon process should be different " + "after replacing bricks in %s volume" + % volume) + g.log.info("EXPECTED: Self heal daemon process should be different" + " after replacing bricks in replicate volume") + + # Get the bricks for the volume + bricks_list = get_all_bricks(self.mnode, volume) + g.log.info("Brick List : %s", bricks_list) + + # Validate the bricks present in volume info with + # glustershd server volume file + ret = do_bricks_exist_in_shd_volfile(self.mnode, volume, + bricks_list) + self.assertTrue(ret, ("Brick List from volume info is " + "different from glustershd server " + "volume file. Please check log file " + "for details")) + g.log.info("Bricks in volume %s exists in glustershd server " + "volume file", volume) diff --git a/tests/functional/afr/heal/test_metadata_split_brain_resolution.py b/tests/functional/afr/heal/test_metadata_split_brain_resolution.py index 75c513a5f..7782a4de8 100644 --- a/tests/functional/afr/heal/test_metadata_split_brain_resolution.py +++ b/tests/functional/afr/heal/test_metadata_split_brain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ class HealMetadataSplitBrain(GlusterBaseClass): def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Override Volume if cls.volume_type == "replicated": @@ -56,11 +56,9 @@ class HealMetadataSplitBrain(GlusterBaseClass): # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, script_local_path) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts " "to clients %s" % cls.clients) @@ -74,17 +72,19 @@ class HealMetadataSplitBrain(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): - - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - GlusterBaseClass.tearDownClass.im_func(cls) + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() def verify_brick_arequals(self): g.log.info("Fetching bricks for the volume: %s", self.volname) diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py new file mode 100644 index 000000000..bbefe0cff --- /dev/null +++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py @@ -0,0 +1,177 @@ +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test Cases in this module tests the self heal daemon process. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_libs import ( + wait_for_volume_process_to_be_online, setup_volume, cleanup_volume, + get_volume_type_info) +from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, + is_shd_daemonized,) +from glustolibs.gluster.volume_ops import (volume_stop, volume_start, + get_volume_list) + + +class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which + verifies the self-heal daemon process on multiple volumes running. + """ + @classmethod + def setUpClass(cls): + """ + setup volume and initialize necessary variables + which is used in tests + """ + # calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + list_of_vol = ['distributed-dispersed', 'replicated', + 'dispersed', 'distributed', 'distributed-replicated'] + cls.volume_configs = [] + if cls.default_volume_type_config['distributed']['dist_count'] > 3: + cls.default_volume_type_config['distributed']['dist_count'] = 3 + + for volume_type in list_of_vol: + cls.volume_configs.append( + {'name': 'testvol_%s' % (volume_type), + 'servers': cls.servers, + 'voltype': cls.default_volume_type_config[volume_type]}) + for volume_config in cls.volume_configs: + ret = setup_volume(mnode=cls.mnode, + all_servers_info=cls.all_servers_info, + volume_config=volume_config, multi_vol=True) + volname = volume_config['name'] + if not ret: + raise ExecutionError("Failed to setup Volume" + " %s" % volname) + g.log.info("Successful in setting volume %s", volname) + + # Verify volume's all process are online for 60 sec + g.log.info("Verifying volume's all process are online") + ret = wait_for_volume_process_to_be_online(cls.mnode, volname, 60) + if not ret: + raise ExecutionError("Volume %s : All process are not online" + % volname) + g.log.info("Successfully Verified volume %s processes are online", + volname) + + # Verfiy glustershd process releases its parent process + g.log.info("Verifying Self Heal Daemon process is daemonized") + ret = is_shd_daemonized(cls.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + @classmethod + def tearDownClass(cls): + """ + Clean up the volume and umount volume from client + """ + + # stopping the volume + g.log.info("Starting to Cleanup all Volumes") + volume_list = get_volume_list(cls.mnode) + for volume in volume_list: + ret = cleanup_volume(cls.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup Volume %s" % volume) + g.log.info("Volume: %s cleanup is done", volume) + g.log.info("Successfully Cleanedup all Volumes") + + # calling GlusterBaseClass tearDownClass + cls.get_super_method(cls, 'tearDownClass')() + + def test_no_glustershd_with_distribute(self): + """ + Test Script to verify the glustershd server vol file + has only entries for replicate volumes + + * Create multiple volumes and start all volumes + * Check the glustershd processes - Only 1 glustershd should be listed + * Stop all volumes + * Check the glustershd processes - No glustershd should be running + * Start the distribute volume only + * Check the glustershd processes - No glustershd should be running + + """ + + nodes = self.servers + + # check the self-heal daemon process + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertTrue(ret, ("Either no self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in getting single self heal daemon process" + " on all nodes %s", nodes) + + # stop all the volumes + g.log.info("Going to stop all the volumes") + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + g.log.info("Stopping Volume : %s", volume) + ret = volume_stop(self.mnode, volume) + self.assertTrue(ret, ("Failed to stop volume %s" % volume)) + g.log.info("Successfully stopped volume %s", volume) + g.log.info("Successfully stopped all the volumes") + + # check the self-heal daemon process after stopping all volumes + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertFalse(ret, ("Self heal daemon process is still running " + "after stopping all volumes ")) + for node in pids: + self.assertEqual(pids[node][0], -1, ("Self heal daemon is still " + "running on node %s even " + "after stoppong all " + "volumes" % node)) + g.log.info("EXPECTED: No self heal daemon process is " + "running after stopping all volumes") + + # start the distribute volume only + for volume in volume_list: + volume_type_info = get_volume_type_info(self.mnode, volume) + volume_type = (volume_type_info['volume_type_info']['typeStr']) + if volume_type == 'Distribute': + g.log.info("starting to start distribute volume: %s", volume) + ret = volume_start(self.mnode, volume) + self.assertTrue(ret, ("Failed to start volume %s" % volume)) + g.log.info("Successfully started volume %s", volume) + break + + # check the self-heal daemon process after starting distribute volume + g.log.info("Starting to get self-heal daemon process on " + "nodes %s", nodes) + ret, pids = get_self_heal_daemon_pid(nodes) + self.assertFalse(ret, ("Self heal daemon process is still running " + "after stopping all volumes ")) + for node in pids: + self.assertEqual(pids[node][0], -1, ("Self heal daemon is still " + "running on node %s even " + "after stopping all " + "volumes" % node)) + g.log.info("EXPECTED: No self heal daemon process is running " + "after stopping all volumes") diff --git a/tests/functional/afr/heal/test_self_heal.py b/tests/functional/afr/heal/test_self_heal.py index fe060e4f5..4fb6dea7e 100755 --- a/tests/functional/afr/heal/test_self_heal.py +++ b/tests/functional/afr/heal/test_self_heal.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,15 +15,13 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # pylint: disable=too-many-lines - from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError -from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_ops import get_volume_options from glustolibs.gluster.volume_libs import ( verify_all_process_of_volume_are_online, wait_for_volume_process_to_be_online) -from glustolibs.gluster.volume_libs import expand_volume from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, bring_bricks_offline, bring_bricks_online, @@ -34,8 +32,6 @@ from glustolibs.gluster.heal_libs import ( is_heal_complete, is_volume_in_split_brain, is_shd_daemonized) -from glustolibs.gluster.rebalance_ops import (rebalance_start, - wait_for_rebalance_to_complete) from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @@ -43,27 +39,25 @@ from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @runs_on([['replicated', 'distributed-replicated'], - ['glusterfs', 'cifs', 'nfs']]) + ['glusterfs', 'cifs']]) class TestSelfHeal(GlusterBaseClass): """ Description: - Arbiter Test cases related to - healing in default configuration of the volume + AFR Test cases related to healing in + default configuration of the volume """ @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -86,7 +80,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -121,14 +115,17 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() - def test_data_self_heal_daemon_off(self): + def test_data_self_heal_command(self): """ Test Data-Self-Heal (heal command) Description: - - set the volume option + - get the client side healing volume options and check + if they have already been disabled by default + NOTE: Client side healing has been disabled by default + since GlusterFS 6.0 "metadata-self-heal": "off" "entry-self-heal": "off" "data-self-heal": "off" @@ -137,7 +134,7 @@ class TestSelfHeal(GlusterBaseClass): - set the volume option "self-heal-daemon": "off" - bring down all bricks processes from selected set - - Get areeual after getting bricks offline and compare with + - Get arequal after getting bricks offline and compare with arequal before getting bricks offline - modify the data - bring bricks online @@ -146,8 +143,6 @@ class TestSelfHeal(GlusterBaseClass): - check daemons and start healing - check if heal is completed - check for split-brain - - add bricks - - do rebalance - create 5k files - while creating files - kill bricks and bring bricks online one by one in cycle @@ -155,15 +150,16 @@ class TestSelfHeal(GlusterBaseClass): """ # pylint: disable=too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Successfully set %s for volume %s", - options, self.volname) + # Checking if Client side healing options are disabled by default + g.log.info('Checking Client side healing is disabled by default') + options = ('cluster.metadata-self-heal', 'cluster.data-self-heal', + 'cluster.entry-self-heal') + for option in options: + ret = get_volume_options(self.mnode, self.volname, option)[option] + self.assertTrue(bool(ret == 'off' or ret == 'off (DEFAULT)'), + "{} option is not disabled by default" + .format(option)) + g.log.info("Client side healing options are disabled by default") # Creating files on client side for mount_obj in self.mounts: @@ -171,8 +167,10 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files -f 100 --fixed-file-size 1k %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 1k %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -193,20 +191,10 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Getting arequal before getting bricks offline ' 'is successful') - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") - # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -243,8 +231,10 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files -f 100 --fixed-file-size 10k %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 10k %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -267,13 +257,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -282,7 +265,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in waiting for volume %s processes to be " "online", self.volname) - # Verify volume's all process are online + # Verify volume's all processes are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" @@ -314,23 +297,6 @@ class TestSelfHeal(GlusterBaseClass): self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) - g.log.info("Expanding volume is successful on " - "volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - # Create 1k files self.all_mounts_procs = [] for mount_obj in self.mounts: @@ -338,8 +304,9 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files -f 1000 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 1000 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -402,50 +369,26 @@ class TestSelfHeal(GlusterBaseClass): ) self.io_validation_complete = True - def test_self_heal_50k_files_heal_command_by_add_brick(self): + def test_self_heal_50k_files_heal_default(self): """ - Test self-heal of 50k files (heal command + Test self-heal of 50k files by heal default Description: - - set the volume option - "metadata-self-heal": "off" - "entry-self-heal": "off" - "data-self-heal": "off" - "self-heal-daemon": "off" - bring down all bricks processes from selected set - create IO (50k files) - Get arequal before getting bricks online - - bring bricks online - - set the volume option - "self-heal-daemon": "on" - - check for daemons - - start healing + - check for daemons to come online + - heal daemon should pick up entries to heal automatically - check if heal is completed - check for split-brain - get arequal after getting bricks online and compare with arequal before getting bricks online - - add bricks - - do rebalance - - get arequal after adding bricks and compare with - arequal after getting bricks online """ # pylint: disable=too-many-locals,too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off", - "self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options') - g.log.info("Successfully set %s for volume %s", options, self.volname) # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -466,8 +409,9 @@ class TestSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create 50k files g.log.info('Creating files...') - command = ("python %s create_files -f 50000 %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = ("/usr/bin/env python %s create_files -f 50000 %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) @@ -496,13 +440,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -524,11 +461,7 @@ class TestSelfHeal(GlusterBaseClass): self.assertTrue(ret, "Either No self heal daemon process found") g.log.info("All self-heal-daemons are online") - # Start healing - ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Heal is not started') - g.log.info('Healing is started') - + # Default Heal testing, wait for shd to pick up healing # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname, timeout_period=3600) @@ -553,40 +486,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(result_before_online, result_after_online, + 'Checksums before and after bringing bricks online ' + 'are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') - - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume when IO in " - "progress on volume %s", self.volname)) - g.log.info("Expanding volume is successful on volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - - # Get arequal after adding bricks - g.log.info('Getting arequal after adding bricks...') - ret, result_after_adding_bricks = collect_mounts_arequal(self.mounts) - self.assertTrue(ret, 'Failed to get arequal') - g.log.info('Getting arequal after getting bricks ' - 'is successful') - - # Checking arequals after bringing bricks online - # and after adding bricks - self.assertItemsEqual(result_after_online, result_after_adding_bricks, - 'Checksums after bringing bricks online and ' - 'after adding bricks are not equal') - g.log.info('Checksums after bringing bricks online and ' - 'after adding bricks are equal') diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py index edb4575eb..ea598b1fc 100755 --- a/tests/functional/afr/heal/test_self_heal_daemon_process.py +++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,9 +18,11 @@ Test Cases in this module tests the self heal daemon process. """ -import time import calendar +import time + from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import ( @@ -37,7 +39,8 @@ from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, is_shd_daemonized, are_all_self_heal_daemons_are_online) from glustolibs.gluster.volume_ops import (volume_stop, volume_start) -from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.gluster_init import ( + restart_glusterd, wait_for_glusterd_to_start) from glustolibs.io.utils import validate_io_procs from glustolibs.misc.misc_libs import upload_scripts @@ -55,16 +58,14 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -78,7 +79,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): """ # calling GlusterBaseClass setUpClass - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -103,7 +104,6 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): """ Clean up the volume and umount volume from client """ - # stopping the volume g.log.info("Starting to Unmount Volume and Cleanup Volume") ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) @@ -112,7 +112,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_glustershd_with_add_remove_brick(self): """ @@ -356,6 +356,10 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): g.log.info("Successfully restarted glusterd on all nodes %s", nodes) + self.assertTrue( + wait_for_glusterd_to_start(self.servers), + "Failed to start glusterd on %s" % self.servers) + # check the self heal daemon process after restarting glusterd process g.log.info("Starting to get self-heal daemon process on" " nodes %s", nodes) @@ -445,10 +449,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # bring bricks offline g.log.info("Going to bring down the brick process " @@ -529,10 +530,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -548,11 +546,14 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): bricks_to_bring_offline) # Creating files for all volumes + self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_files -f 100 %s/test_dir" - % (self.script_upload_path, mount_obj.mountpoint)) + cmd = ("/usr/bin/env python %s create_files -f 100 " + "%s/test_dir" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) diff --git a/tests/functional/afr/heal/test_self_heal_with_link_files.py b/tests/functional/afr/heal/test_self_heal_with_link_files.py new file mode 100644 index 000000000..d029c3d9e --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_link_files.py @@ -0,0 +1,405 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import (get_subvols, + replace_brick_from_volume) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_files_and_dirs_on_mount_point(self, second_attempt=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_link_self_heal on mount point + if not second_attempt: + ret = mkdir(self.first_client, + '{}/{}'.format(self.mountpoint, + 'test_link_self_heal')) + self.assertTrue(ret, "Failed to create dir test_link_self_heal") + + # Create dirctories and files inside directory test_link_self_heal + io_cmd = ("for i in `seq 1 5`; do mkdir dir.$i; " + "for j in `seq 1 10`; do dd if=/dev/random " + "of=dir.$i/file.$j bs=1k count=$j; done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5` ; do for j in `seq 1 10`; " + "do dd if=/dev/random of=sym_link_dir.$i/" + "new_file.$j bs=1k count=$j; done; done ") + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create dirs and files inside") + + def _create_soft_links_to_directories(self): + """Create soft links to directories""" + cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s " + "dir.$i sym_link_dir.$i; done".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create soft links to dirs") + + def _verify_soft_links_to_dir(self, option=0): + """Verify soft links to dir""" + + cmd_list = [ + ("for i in `seq 1 5`; do stat -c %F sym_link_dir.$i | " + "grep -F 'symbolic link'; if [ $? -ne 0 ]; then exit 1;" + " fi ; done; for i in `seq 1 5` ; do readlink sym_link_dir.$i | " + "grep \"dir.$i\"; if [ $? -ne 0 ]; then exit 1; fi; done; "), + ("for i in `seq 1 5`; do for j in `seq 1 10`; do ls " + "dir.$i/new_file.$j; if [ $? -ne 0 ]; then exit 1; fi; done; " + "done")] + + # Generate command to check according to option + if option == 2: + verify_cmd = "".join(cmd_list) + else: + verify_cmd = cmd_list[option] + + cmd = ("cd {}/test_link_self_heal; {}".format(self.mountpoint, + verify_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Symlinks aren't proper") + + def _create_hard_links_to_files(self, second_attempt=False): + """Create hard links to files""" + io_cmd = ("for i in `seq 1 5`;do for j in `seq 1 10`;do ln " + "dir.$i/file.$j dir.$i/link_file.$j;done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5`; do mkdir new_dir.$i; for j in " + "`seq 1 10`; do ln dir.$i/file.$j new_dir.$i/new_file." + "$j;done; done;") + + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create hard links to files") + + def _verify_hard_links_to_files(self, second_set=False): + """Verify if hard links to files""" + file_to_compare = "dir.$i/link_file.$j" + if second_set: + file_to_compare = "new_dir.$i/new_file.$j" + + cmd = ("cd {}/test_link_self_heal;for i in `seq 1 5`; do for j in `seq" + " 1 10`;do if [ `stat -c %i dir.$i/file.$j` -ne `stat -c %i " + "{}` ];then exit 1; fi; done; done" + .format(self.mountpoint, file_to_compare)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to verify hard links to files") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal([self.mounts[0]]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + ret, arequals = collect_bricks_arequal([brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _replace_one_random_brick(self): + """Replace one random brick from the volume""" + brick = choice(get_all_bricks(self.mnode, self.volname)) + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, self.all_servers_info, + src_brick=brick) + self.assertTrue(ret, "Failed to replace brick %s " % brick) + g.log.info("Successfully replaced brick %s", brick) + + def test_self_heal_of_hard_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create hard links for the files created in step 2. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring brack all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if hard links are proper or not. + 12. Do a lookup on mount point. + 13. Bring down brick processes accoding to the volume type. + 14. Create a second set of hard links to the files. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring brack all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify both set of hard links are proper or not. + 21. Do a lookup on mount point. + 22. Pick a random brick and replace it. + 23. Wait for heal to complete on the volume. + 24. Check if heal is complete and check if volume is in split brain. + 25. Collect and compare arequal-checksum according to the volume type + for bricks. + 26. Verify both set of hard links are proper or not. + 27. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + for attempt in (False, True): + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create hardlinks for the files created in step 2 + self._create_hard_links_to_files(second_attempt=attempt) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + if attempt: + self._verify_hard_links_to_files(second_set=attempt) + + # Pick a random brick and replace it + self._replace_one_random_brick() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + self._verify_hard_links_to_files(second_set=True) + + def test_self_heal_of_soft_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create soft links for the dirs created in step 2. + 6. Verify if soft links are proper or not. + 7. Add files through the soft links. + 8. Verify if the soft links are proper or not. + 9. Check if heal info is showing all the files and dirs to be healed. + 10. Bring brack all brick processes which were killed. + 11. Wait for heal to complete on the volume. + 12. Check if heal is complete and check if volume is in split brain. + 13. Collect and compare arequal-checksum according to the volume type + for bricks. + 14. Verify if soft links are proper or not. + 15. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create soft links for the dirs created in step 2 + self._create_soft_links_to_directories() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir() + + # Add files through the soft links + self._create_files_and_dirs_on_mount_point(second_attempt=True) + + # Verify if the soft links are proper or not + self._verify_soft_links_to_dir(option=1) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir(option=2) diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py new file mode 100644 index 000000000..37bd2ec52 --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py @@ -0,0 +1,600 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete, + enable_granular_heal, + disable_granular_heal) +from glustolibs.gluster.lib_utils import (add_user, del_user, group_del, + group_add, collect_bricks_arequal) +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + self.user_group_created = False + + # If test case running is test_self_heal_meta_data + # create user and group + test_name_splitted = self.id().split('.') + test_id = test_name_splitted[len(test_name_splitted) - 1] + if test_id == 'test_self_heal_meta_data': + + # Create non-root group + if not group_add(self.first_client, 'qa_all'): + raise ExecutionError("Failed to create group qa_all") + + # Create non-root users + self.users = ('qa_func', 'qa_system', 'qa_perf') + for user in self.users: + if not add_user(self.first_client, user, group='qa_all'): + raise ExecutionError("Failed to create user {}" + .format(user)) + + self.user_group_created = True + g.log.info("Successfully created all users.") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users and group if created + if self.user_group_created: + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + g.log.info("Successfully deleted all users") + + # Delete non-root group + group_del(self.first_client, 'qa_all') + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _set_granular_heal_to_on_or_off(self, enabled=False): + """Set granular heal to ON or OFF""" + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if enabled: + if granular['cluster.granular-entry-heal'] != 'on': + ret = enable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + else: + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to off") + + def _run_cmd(self, io_cmd, err_msg): + """Run cmd and show error message if it fails""" + cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, err_msg) + + def _create_files_and_dirs_on_mount_point(self, index, second_set=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_self_heal on mount point + if not second_set: + ret = mkdir(self.first_client, '{}/{}'.format( + self.mountpoint, 'test_self_heal')) + self.assertTrue(ret, "Failed to create dir test_self_heal") + + # Create dirctories and files inside directory test_self_heal + io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random" + " of=file.$i count=1K bs=$i; done", + + "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;" + " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j" + ";done;done", + + "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq " + "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;" + " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k" + " bs=1k count=$k; done; done; done;", + + "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq" + " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j " + "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i" + " count=1K bs=$i; done ;") + self._run_cmd( + io_cmd[index], "Failed to create dirs and files inside") + + def _delete_files_and_dirs(self): + """Delete files and dirs from mount point""" + io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done") + self._run_cmd(io_cmd, "Failed to delete dirs and files") + + def _rename_files_and_dirs(self): + """Rename files and dirs from mount point""" + io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;" + " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j " + "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i " + "renamed_dir.$i; done;") + self._run_cmd(io_cmd, "Failed to rename dirs and files") + + def _change_meta_deta_of_dirs_and_files(self): + """Change meta data of dirs and files""" + cmds = ( + # Change permission + "for i in `seq 1 100`; do chmod 555 dir.$i; done; " + "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 " + "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for " + "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;", + + # Change ownership + "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; " + "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; " + "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;", + + # Change group + "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, + "Failed to change meta data on dirs and files") + g.log.info("Successfully changed meta data on dirs and files") + + def _verify_meta_data_of_files_and_dirs(self): + """Verify meta data of files and dirs""" + cmds = ( + # Verify permissions + "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";" + " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do " + "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]" + "; then exit 1; fi; done; done; for i in `seq 51 100`; do " + "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then " + "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j" + " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;" + "done;", + + # Verify ownership + "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;", + + # Verify group + "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, "Meta data of dirs and files not proper") + + def _set_and_remove_extended_attributes(self, remove=False): + """Set and remove extended attributes""" + # Command to set extended attribute to files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 " + "5`;do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_files dir.$i/file.$j; done; " + "done;") + err_msg = "Failed to set extended attributes to files and dirs" + if remove: + # Command to remove extended attribute set on files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name " + "dir.$i; for j in `seq 1 5`; do setfattr -x " + "trusted.name dir.$i/file.$j ; done ; done ;") + err_msg = "Failed to remove extended attributes to files and dirs" + + self._run_cmd(io_cmd, err_msg) + + def _verify_if_extended_attributes_are_proper(self, remove=False): + """Verify if extended attributes are set or remove properly""" + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text " + "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? " + "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do " + "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F " + "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then " + "exit 1; fi; done; done;") + err_msg = "Extended attributes on files and dirs are not proper" + if remove: + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e " + "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in" + " `seq 1 5`; do getfattr -n trusted.name -e text " + "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; " + "done;") + err_msg = "Extended attributes set to files and dirs not removed" + self._run_cmd(io_cmd, err_msg) + + def _remove_files_and_create_dirs_with_the_same_name(self): + """Remove files and create dirs with the same name""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir " + "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;") + self._run_cmd(io_cmd, + "Failed to remove files and create dirs with same name") + + def _verify_if_dirs_are_proper_or_not(self): + """Verify if dirs are proper or not""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | " + "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; " + "done; done; done;") + self._run_cmd(io_cmd, "Dirs created instead of files aren't proper") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/test_self_heal'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_heal_status_restart_vol_wait_and_check_data(self): + """ + Perform repatative steps mentioned below: + 1 Check if heal info is showing all the files and dirs to be healed + 2 Bring back all brick processes which were killed + 3 Wait for heal to complete on the volume + 4 Check if heal is complete and check if volume is in split brain + 5 Collect and compare arequal-checksum according to the volume type + for bricks + """ + # Check if heal info is showing all the files and dirs to be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + def _run_test_self_heal_entry_heal(self): + """Run steps of test_self_heal_entry_heal""" + # Create a directory and create files and directories inside it on + # mount point + self._create_files_and_dirs_on_mount_point(0) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create a new set of files and directories on mount point + self._create_files_and_dirs_on_mount_point(3, second_set=True) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Delete files and directories from mount point + self._delete_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Rename the existing files and dirs + self._rename_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + def test_self_heal_entry_heal(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create a new set of files and directories on mount point. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Bring down brick processes accoding to the volume type. + 12. Delete files and directories from mount point. + 13. Check if heal info is showing all the files and dirs to be healed. + 14. Bring back all brick processes which were killed. + 15. Wait for heal to complete on the volume. + 16. Check if heal is complete and check if volume is in split brain. + 17. Collect and compare arequal-checksum according to the volume type + for bricks. + 18. Bring down brick processes accoding to the volume type. + 19. Rename the existing files and dirs. + 20. Check if heal info is showing all the files and dirs to be healed. + 21. Bring back all brick processes which were killed. + 22. Wait for heal to complete on the volume. + 23. Check if heal is complete and check if volume is in split brain. + 24. Collect and compare arequal-checksum according to the volume type + for bricks. + + Note: + Do this test with both Granular-entry-heal set enable and disable. + """ + for value in (False, True): + if value: + # Cleanup old data from mount point + ret, _, _ = g.run(self.first_client, + 'rm -rf {}/*'.format(self.mountpoint)) + self.assertFalse(ret, 'Failed to cleanup mount point') + g.log.info("Testing with granular heal set to enabled") + self._set_granular_heal_to_on_or_off(enabled=value) + self._run_test_self_heal_entry_heal() + + def test_self_heal_meta_data(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Change the meta data of files and dirs. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if the meta data of files and dirs. + 12. Bring down brick processes accoding to the volume type. + 13. Set extended attributes on the files and dirs. + 14. Verify if the extended attributes are set properly or not. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring back all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify if extended attributes are consitent or not. + 21. Bring down brick processes accoding to the volume type + 22. Remove extended attributes on the files and dirs. + 23. Verify if extended attributes were removed properly. + 24. Check if heal info is showing all the files and dirs to be healed. + 25. Bring back all brick processes which were killed. + 26. Wait for heal to complete on the volume. + 27. Check if heal is complete and check if volume is in split brain. + 28. Collect and compare arequal-checksum according to the volume type + for bricks. + 29. Verify if extended attributes are removed or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(1) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Change the meta data of files and dirs + self._change_meta_deta_of_dirs_and_files() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if the meta data of files and dirs + self._verify_meta_data_of_files_and_dirs() + + for value in (False, True): + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Set or remove extended attributes on the files and dirs + self._set_and_remove_extended_attributes(remove=value) + + # Verify if the extended attributes are set properly or not + self._verify_if_extended_attributes_are_proper(remove=value) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if extended attributes are consitent or not + self._verify_if_extended_attributes_are_proper(remove=value) + + def test_self_heal_of_dir_with_files_removed(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Remove all files and create dir which have name of files. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if dirs are healed properly or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(2) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Remove all files and create dir which have name of files + self._remove_files_and_create_dirs_with_the_same_name() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if dirs are healed properly or not + self._verify_if_dirs_are_proper_or_not() diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py new file mode 100644 index 000000000..a449e396f --- /dev/null +++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py @@ -0,0 +1,250 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test Cases in this module tests the self heal daemon process. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, bring_bricks_online, + select_volume_bricks_to_bring_offline, get_online_bricks_list) +from glustolibs.gluster.heal_libs import ( + get_self_heal_daemon_pid, is_shd_daemonized, + monitor_heal_completion, bring_self_heal_daemon_process_offline, + disable_granular_heal) +from glustolibs.gluster.heal_ops import (get_heal_info_summary, + trigger_heal_full) +from glustolibs.io.utils import validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) +from glustolibs.gluster.mount_ops import mount_volume, umount_volume + + +@runs_on([['replicated'], ['glusterfs']]) +class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithSingleVolume contains tests which + verifies the self-heal daemon process on a single volume + """ + + def setUp(self): + + # Calling GlusterBaseClass setUpClass + self.get_super_method(self, 'setUp')() + + # Upload script + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.clients, [self.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + # Verify glustershd process releases its parent process + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_server_side_healing_happens_only_when_glustershd_running(self): + """ + Test Script which verifies that the server side healing must happen + only if the heal daemon is running on the node where source brick + resides. + + * Create and start the Replicate volume + * Check the glustershd processes - Only 1 glustershd should be listed + * Bring down the bricks without affecting the cluster + * Create files on volume + * kill the glustershd on node where bricks is running + * bring the bricks up which was killed in previous steps + * check the heal info - heal info must show pending heal info, heal + shouldn't happen since glustershd is down on source node + * issue heal + * trigger client side heal + * heal should complete successfully + """ + # pylint: disable=too-many-locals,too-many-statements,too-many-lines + + # Disable granular heal if not disabled already + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + + # Setting Volume options + options = {"metadata-self-heal": "on", + "entry-self-heal": "on", + "data-self-heal": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Successfully set %s for volume %s", + options, self.volname) + + # Check the self-heal daemon process + ret, pids = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in verifying self heal daemon process" + " on all nodes %s", self.servers) + + # Select the bricks to bring offline + bricks_to_bring_offline = (select_volume_bricks_to_bring_offline + (self.mnode, self.volname)) + g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline) + + # Bring down the selected bricks + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, "Failed to bring down the bricks") + g.log.info("Brought down the brick process " + "for %s", bricks_to_bring_offline) + + # Write files on all mounts + all_mounts_procs, num_files_to_write = [], 100 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_files " + "-f %d --base-file-name file %s" % (self.script_upload_path, + num_files_to_write, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Get online bricks list + online_bricks = get_online_bricks_list(self.mnode, self.volname) + g.log.info("Online Bricks for volume %s : %s", + self.volname, online_bricks) + + # Get the nodes where bricks are running + bring_offline_glustershd_nodes = [] + for brick in online_bricks: + bring_offline_glustershd_nodes.append(brick.split(":")[0]) + g.log.info("self heal deamon on nodes %s to be killed", + bring_offline_glustershd_nodes) + + # Kill the self heal daemon process on nodes + ret = bring_self_heal_daemon_process_offline( + bring_offline_glustershd_nodes) + self.assertTrue(ret, ("Unable to bring self heal daemon process" + " offline for nodes %s" + % bring_offline_glustershd_nodes)) + g.log.info("Sucessfully brought down self heal process for " + "nodes %s", bring_offline_glustershd_nodes) + + # Check the heal info + heal_info = get_heal_info_summary(self.mnode, self.volname) + g.log.info("Successfully got heal info %s for the volume %s", + heal_info, self.volname) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline, 'glusterd_restart') + self.assertTrue(ret, ("Failed to bring bricks: %s online" + % bricks_to_bring_offline)) + + # Issue heal + ret = trigger_heal_full(self.mnode, self.volname) + self.assertFalse(ret, ("Able to trigger heal on volume %s where " + "self heal daemon is not running" + % self.volname)) + g.log.info("Expected : Unable to trigger heal on volume %s where " + "self heal daemon is not running", self.volname) + + # Wait for 130 sec to heal + ret = monitor_heal_completion(self.mnode, self.volname, 130) + self.assertFalse(ret, ("Heal Completed on volume %s" % self.volname)) + g.log.info("Expected : Heal pending on volume %s", self.volname) + + # Check the heal info + heal_info_after_triggering_heal = get_heal_info_summary(self.mnode, + self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + + # Compare with heal pending with the files wrote + for node in online_bricks: + self.assertGreaterEqual( + int(heal_info_after_triggering_heal[node]['numberOfEntries']), + num_files_to_write, + ("Some of the files are healed from source bricks %s where " + "self heal daemon is not running" % node)) + g.log.info("EXPECTED: No files are healed from source bricks where " + "self heal daemon is not running") + + # Unmount and Mount volume again as volume options were set + # after mounting the volume + for mount_obj in self.mounts: + ret, _, _ = umount_volume(mount_obj.client_system, + mount_obj.mountpoint) + self.assertEqual(ret, 0, "Failed to unmount %s" + % mount_obj.client_system) + ret, _, _ = mount_volume(self.volname, + mtype='glusterfs', + mpoint=mount_obj.mountpoint, + mserver=self.mnode, + mclient=mount_obj.client_system) + self.assertEqual(ret, 0, "Failed to mount %s" + % mount_obj.client_system) + + all_mounts_procs = [] + for mount_obj in self.mounts: + cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;" + " done " % (mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "Reads failed on some of the clients") + g.log.info("Reads successful on all mounts") + + # Wait for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "Unable to heal the pending entries") + g.log.info("Successfully healed the pending entries for volume %s", + self.volname) |