summaryrefslogtreecommitdiffstats
path: root/tests/functional/afr
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional/afr')
-rw-r--r--tests/functional/afr/heal/test_no_glustershd_with_distribute.py4
-rw-r--r--tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py600
-rw-r--r--tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py23
-rw-r--r--tests/functional/afr/test_add_brick_followed_by_remove_brick.py170
-rw-r--r--tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py41
-rw-r--r--tests/functional/afr/test_default_granular_entry_heal.py235
-rw-r--r--tests/functional/afr/test_self_heal_with_expand_volume.py221
-rw-r--r--tests/functional/afr/test_split_brain_with_hard_link_file.py175
-rw-r--r--tests/functional/afr/test_split_brain_with_node_reboot.py149
9 files changed, 1589 insertions, 29 deletions
diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
index d2b43bfe3..bbefe0cff 100644
--- a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
+++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -58,7 +58,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass):
for volume_config in cls.volume_configs:
ret = setup_volume(mnode=cls.mnode,
all_servers_info=cls.all_servers_info,
- volume_config=volume_config)
+ volume_config=volume_config, multi_vol=True)
volname = volume_config['name']
if not ret:
raise ExecutionError("Failed to setup Volume"
diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
new file mode 100644
index 000000000..37bd2ec52
--- /dev/null
+++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
@@ -0,0 +1,600 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online,
+ get_all_bricks)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain,
+ is_heal_complete,
+ enable_granular_heal,
+ disable_granular_heal)
+from glustolibs.gluster.lib_utils import (add_user, del_user, group_del,
+ group_add, collect_bricks_arequal)
+from glustolibs.gluster.volume_ops import get_volume_options
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']])
+class TestHealWithLinkFiles(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+ self.user_group_created = False
+
+ # If test case running is test_self_heal_meta_data
+ # create user and group
+ test_name_splitted = self.id().split('.')
+ test_id = test_name_splitted[len(test_name_splitted) - 1]
+ if test_id == 'test_self_heal_meta_data':
+
+ # Create non-root group
+ if not group_add(self.first_client, 'qa_all'):
+ raise ExecutionError("Failed to create group qa_all")
+
+ # Create non-root users
+ self.users = ('qa_func', 'qa_system', 'qa_perf')
+ for user in self.users:
+ if not add_user(self.first_client, user, group='qa_all'):
+ raise ExecutionError("Failed to create user {}"
+ .format(user))
+
+ self.user_group_created = True
+ g.log.info("Successfully created all users.")
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ # Delete non-root users and group if created
+ if self.user_group_created:
+
+ # Delete non-root users
+ for user in self.users:
+ del_user(self.first_client, user)
+ g.log.info("Successfully deleted all users")
+
+ # Delete non-root group
+ group_del(self.first_client, 'qa_all')
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _set_granular_heal_to_on_or_off(self, enabled=False):
+ """Set granular heal to ON or OFF"""
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if enabled:
+ if granular['cluster.granular-entry-heal'] != 'on':
+ ret = enable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+ else:
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to off")
+
+ def _run_cmd(self, io_cmd, err_msg):
+ """Run cmd and show error message if it fails"""
+ cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, err_msg)
+
+ def _create_files_and_dirs_on_mount_point(self, index, second_set=False):
+ """A function to create files and dirs on mount point"""
+ # Create a parent directory test_self_heal on mount point
+ if not second_set:
+ ret = mkdir(self.first_client, '{}/{}'.format(
+ self.mountpoint, 'test_self_heal'))
+ self.assertTrue(ret, "Failed to create dir test_self_heal")
+
+ # Create dirctories and files inside directory test_self_heal
+ io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random"
+ " of=file.$i count=1K bs=$i; done",
+
+ "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;"
+ " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j"
+ ";done;done",
+
+ "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq "
+ "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;"
+ " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k"
+ " bs=1k count=$k; done; done; done;",
+
+ "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq"
+ " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j "
+ "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i"
+ " count=1K bs=$i; done ;")
+ self._run_cmd(
+ io_cmd[index], "Failed to create dirs and files inside")
+
+ def _delete_files_and_dirs(self):
+ """Delete files and dirs from mount point"""
+ io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done")
+ self._run_cmd(io_cmd, "Failed to delete dirs and files")
+
+ def _rename_files_and_dirs(self):
+ """Rename files and dirs from mount point"""
+ io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;"
+ " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j "
+ "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i "
+ "renamed_dir.$i; done;")
+ self._run_cmd(io_cmd, "Failed to rename dirs and files")
+
+ def _change_meta_deta_of_dirs_and_files(self):
+ """Change meta data of dirs and files"""
+ cmds = (
+ # Change permission
+ "for i in `seq 1 100`; do chmod 555 dir.$i; done; "
+ "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 "
+ "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for "
+ "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;",
+
+ # Change ownership
+ "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; "
+ "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; "
+ "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;",
+
+ # Change group
+ "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd,
+ "Failed to change meta data on dirs and files")
+ g.log.info("Successfully changed meta data on dirs and files")
+
+ def _verify_meta_data_of_files_and_dirs(self):
+ """Verify meta data of files and dirs"""
+ cmds = (
+ # Verify permissions
+ "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";"
+ " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do "
+ "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]"
+ "; then exit 1; fi; done; done; for i in `seq 51 100`; do "
+ "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then "
+ "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j"
+ " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;"
+ "done;",
+
+ # Verify ownership
+ "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;",
+
+ # Verify group
+ "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd, "Meta data of dirs and files not proper")
+
+ def _set_and_remove_extended_attributes(self, remove=False):
+ """Set and remove extended attributes"""
+ # Command to set extended attribute to files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 "
+ "5`;do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_files dir.$i/file.$j; done; "
+ "done;")
+ err_msg = "Failed to set extended attributes to files and dirs"
+ if remove:
+ # Command to remove extended attribute set on files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name "
+ "dir.$i; for j in `seq 1 5`; do setfattr -x "
+ "trusted.name dir.$i/file.$j ; done ; done ;")
+ err_msg = "Failed to remove extended attributes to files and dirs"
+
+ self._run_cmd(io_cmd, err_msg)
+
+ def _verify_if_extended_attributes_are_proper(self, remove=False):
+ """Verify if extended attributes are set or remove properly"""
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text "
+ "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? "
+ "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do "
+ "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F "
+ "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then "
+ "exit 1; fi; done; done;")
+ err_msg = "Extended attributes on files and dirs are not proper"
+ if remove:
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e "
+ "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in"
+ " `seq 1 5`; do getfattr -n trusted.name -e text "
+ "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; "
+ "done;")
+ err_msg = "Extended attributes set to files and dirs not removed"
+ self._run_cmd(io_cmd, err_msg)
+
+ def _remove_files_and_create_dirs_with_the_same_name(self):
+ """Remove files and create dirs with the same name"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir "
+ "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;")
+ self._run_cmd(io_cmd,
+ "Failed to remove files and create dirs with same name")
+
+ def _verify_if_dirs_are_proper_or_not(self):
+ """Verify if dirs are proper or not"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | "
+ "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; "
+ "done; done; done;")
+ self._run_cmd(io_cmd, "Dirs created instead of files aren't proper")
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(subvol[0])
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ @staticmethod
+ def _add_dir_path_to_brick_list(brick_list):
+ """Add test_self_heal at the end of brick path"""
+ dir_brick_list = []
+ for brick in brick_list:
+ dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal'))
+ return dir_brick_list
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+
+ # Get arequal before getting bricks offline
+ work_dir = '{}/test_self_heal'.format(self.mountpoint)
+ ret, arequals = collect_mounts_arequal([self.mounts[0]],
+ path=work_dir)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+ ret, arequals = collect_bricks_arequal([dir_brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ def _check_heal_is_completed_and_not_in_split_brain(self):
+ """Check if heal is completed and volume not in split brain"""
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check if volume is in split brian or not
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ def _check_if_there_are_files_and_dirs_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _wait_for_heal_is_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_heal_status_restart_vol_wait_and_check_data(self):
+ """
+ Perform repatative steps mentioned below:
+ 1 Check if heal info is showing all the files and dirs to be healed
+ 2 Bring back all brick processes which were killed
+ 3 Wait for heal to complete on the volume
+ 4 Check if heal is complete and check if volume is in split brain
+ 5 Collect and compare arequal-checksum according to the volume type
+ for bricks
+ """
+ # Check if heal info is showing all the files and dirs to be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ def _run_test_self_heal_entry_heal(self):
+ """Run steps of test_self_heal_entry_heal"""
+ # Create a directory and create files and directories inside it on
+ # mount point
+ self._create_files_and_dirs_on_mount_point(0)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create a new set of files and directories on mount point
+ self._create_files_and_dirs_on_mount_point(3, second_set=True)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Delete files and directories from mount point
+ self._delete_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Rename the existing files and dirs
+ self._rename_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ def test_self_heal_entry_heal(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create a new set of files and directories on mount point.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Bring down brick processes accoding to the volume type.
+ 12. Delete files and directories from mount point.
+ 13. Check if heal info is showing all the files and dirs to be healed.
+ 14. Bring back all brick processes which were killed.
+ 15. Wait for heal to complete on the volume.
+ 16. Check if heal is complete and check if volume is in split brain.
+ 17. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 18. Bring down brick processes accoding to the volume type.
+ 19. Rename the existing files and dirs.
+ 20. Check if heal info is showing all the files and dirs to be healed.
+ 21. Bring back all brick processes which were killed.
+ 22. Wait for heal to complete on the volume.
+ 23. Check if heal is complete and check if volume is in split brain.
+ 24. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+
+ Note:
+ Do this test with both Granular-entry-heal set enable and disable.
+ """
+ for value in (False, True):
+ if value:
+ # Cleanup old data from mount point
+ ret, _, _ = g.run(self.first_client,
+ 'rm -rf {}/*'.format(self.mountpoint))
+ self.assertFalse(ret, 'Failed to cleanup mount point')
+ g.log.info("Testing with granular heal set to enabled")
+ self._set_granular_heal_to_on_or_off(enabled=value)
+ self._run_test_self_heal_entry_heal()
+
+ def test_self_heal_meta_data(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Change the meta data of files and dirs.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if the meta data of files and dirs.
+ 12. Bring down brick processes accoding to the volume type.
+ 13. Set extended attributes on the files and dirs.
+ 14. Verify if the extended attributes are set properly or not.
+ 15. Check if heal info is showing all the files and dirs to be healed.
+ 16. Bring back all brick processes which were killed.
+ 17. Wait for heal to complete on the volume.
+ 18. Check if heal is complete and check if volume is in split brain.
+ 19. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 20. Verify if extended attributes are consitent or not.
+ 21. Bring down brick processes accoding to the volume type
+ 22. Remove extended attributes on the files and dirs.
+ 23. Verify if extended attributes were removed properly.
+ 24. Check if heal info is showing all the files and dirs to be healed.
+ 25. Bring back all brick processes which were killed.
+ 26. Wait for heal to complete on the volume.
+ 27. Check if heal is complete and check if volume is in split brain.
+ 28. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 29. Verify if extended attributes are removed or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(1)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Change the meta data of files and dirs
+ self._change_meta_deta_of_dirs_and_files()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if the meta data of files and dirs
+ self._verify_meta_data_of_files_and_dirs()
+
+ for value in (False, True):
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Set or remove extended attributes on the files and dirs
+ self._set_and_remove_extended_attributes(remove=value)
+
+ # Verify if the extended attributes are set properly or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if extended attributes are consitent or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ def test_self_heal_of_dir_with_files_removed(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Remove all files and create dir which have name of files.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if dirs are healed properly or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(2)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Remove all files and create dir which have name of files
+ self._remove_files_and_create_dirs_with_the_same_name()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if dirs are healed properly or not
+ self._verify_if_dirs_are_proper_or_not()
diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
index 43b4f4edf..a449e396f 100644
--- a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
+++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -26,12 +26,14 @@ from glustolibs.gluster.brick_libs import (
select_volume_bricks_to_bring_offline, get_online_bricks_list)
from glustolibs.gluster.heal_libs import (
get_self_heal_daemon_pid, is_shd_daemonized,
- monitor_heal_completion, bring_self_heal_daemon_process_offline)
+ monitor_heal_completion, bring_self_heal_daemon_process_offline,
+ disable_granular_heal)
from glustolibs.gluster.heal_ops import (get_heal_info_summary,
trigger_heal_full)
from glustolibs.io.utils import validate_io_procs
from glustolibs.misc.misc_libs import upload_scripts
-from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.gluster.volume_ops import (set_volume_options,
+ get_volume_options)
from glustolibs.gluster.mount_ops import mount_volume, umount_volume
@@ -99,6 +101,15 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
* heal should complete successfully
"""
# pylint: disable=too-many-locals,too-many-statements,too-many-lines
+
+ # Disable granular heal if not disabled already
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+
# Setting Volume options
options = {"metadata-self-heal": "on",
"entry-self-heal": "on",
@@ -131,7 +142,7 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
all_mounts_procs, num_files_to_write = [], 100
for mount_obj in self.mounts:
cmd = ("/usr/bin/env python %s create_files "
- "-f %s --base-file-name file %s" % (self.script_upload_path,
+ "-f %d --base-file-name file %s" % (self.script_upload_path,
num_files_to_write,
mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, cmd,
@@ -221,8 +232,8 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
all_mounts_procs = []
for mount_obj in self.mounts:
- cmd = ("/usr/bin/env python %s read %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;"
+ " done " % (mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, cmd,
user=mount_obj.user)
all_mounts_procs.append(proc)
diff --git a/tests/functional/afr/test_add_brick_followed_by_remove_brick.py b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py
new file mode 100644
index 000000000..a653b792d
--- /dev/null
+++ b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py
@@ -0,0 +1,170 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import get_all_bricks
+from glustolibs.gluster.dht_test_utils import is_layout_complete
+from glustolibs.gluster.glusterfile import (file_exists,
+ occurences_of_pattern_in_file)
+from glustolibs.gluster.rebalance_ops import (rebalance_start,
+ wait_for_rebalance_to_complete)
+from glustolibs.gluster.volume_libs import expand_volume, shrink_volume
+from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete)
+from glustolibs.misc.misc_libs import upload_scripts
+
+
+@runs_on([['replicated'], ['glusterfs']])
+class TestAddBrickFollowedByRemoveBrick(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+ cls.get_super_method(cls, 'setUpClass')()
+
+ cls.first_client = cls.mounts[0].client_system
+ cls.mountpoint = cls.mounts[0].mountpoint
+ cls.is_io_running = False
+
+ # Upload IO scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ if not file_exists(cls.first_client, cls.script_upload_path):
+ if not upload_scripts(cls.first_client, cls.script_upload_path):
+ raise ExecutionError(
+ "Failed to upload IO scripts to client %s"
+ % cls.first_client)
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ if self.is_io_running:
+ if not wait_for_io_to_complete(self.all_mounts_procs,
+ [self.mounts[0]]):
+ raise ExecutionError("IO failed on some of the clients")
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _check_layout_of_bricks(self):
+ """Check the layout of bricks"""
+ ret = is_layout_complete(self.mnode, self.volname, "/")
+ self.assertTrue(ret, ("Volume %s: Layout is not complete",
+ self.volname))
+ g.log.info("Volume %s: Layout is complete", self.volname)
+
+ def _add_brick_and_wait_for_rebalance_to_complete(self):
+ """Add brick and wait for rebalance to complete"""
+
+ # Add brick to volume
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info)
+ self.assertTrue(ret, "Failed to add brick on volume %s"
+ % self.volname)
+
+ # Trigger rebalance and wait for it to complete
+ ret, _, _ = rebalance_start(self.mnode, self.volname,
+ force=True)
+ self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
+ % self.volname)
+
+ # Wait for rebalance to complete
+ ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+ timeout=1200)
+ self.assertTrue(ret, "Rebalance is not yet complete on the volume "
+ "%s" % self.volname)
+ g.log.info("Rebalance successfully completed")
+
+ self._check_layout_of_bricks()
+
+ def _remove_brick_from_volume(self):
+ """Remove bricks from volume"""
+ # Remove bricks from the volume
+ ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=2000)
+ self.assertTrue(ret, "Failed to remove-brick from volume")
+ g.log.info("Remove-brick rebalance successful")
+
+ def test_add_brick_followed_by_remove_brick(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it to a client.
+ 2. Start I/O on volume.
+ 3. Add brick and trigger rebalance, wait for rebalance to complete.
+ (The volume which was 1x3 should now be 2x3)
+ 4. Add brick and trigger rebalance, wait for rebalance to complete.
+ (The volume which was 2x3 should now be 3x3)
+ 5. Remove brick from volume such that it becomes a 2x3.
+ 6. Remove brick from volume such that it becomes a 1x3.
+ 7. Wait for I/O to complete and check for any input/output errors in
+ both client and rebalance logs.
+ """
+ # Start I/O on mount point
+ self.all_mounts_procs = []
+ cmd = ("/usr/bin/env python {} create_deep_dirs_with_files "
+ "--dirname-start-num {} --dir-depth 5 --dir-length 5 "
+ "--max-num-of-dirs 5 --num-of-files 5 {}"
+ .format(self.script_upload_path, 10, self.mountpoint))
+ proc = g.run_async(self.first_client, cmd)
+ self.all_mounts_procs.append(proc)
+ self.is_io_running = True
+
+ # Convert 1x3 to 2x3 and then convert 2x3 to 3x3
+ for _ in range(0, 2):
+ self._add_brick_and_wait_for_rebalance_to_complete()
+
+ # Convert 3x3 to 2x3 and then convert 2x3 to 1x3
+ for _ in range(0, 2):
+ self._remove_brick_from_volume()
+
+ # Validate I/O processes running on the nodes
+ ret = validate_io_procs(self.all_mounts_procs, [self.mounts[0]])
+ self.is_io_running = False
+ self.assertTrue(ret, "IO failed on some of the clients")
+ g.log.info("IO on all mounts: Complete")
+
+ # Check for Input/output errors in rebalance logs
+ particiapting_nodes = []
+ for brick in get_all_bricks(self.mnode, self.volname):
+ node, _ = brick.split(':')
+ particiapting_nodes.append(node)
+
+ for server in particiapting_nodes:
+ ret = occurences_of_pattern_in_file(
+ server, "Input/output error",
+ "/var/log/glusterfs/{}-rebalance.log".format(self.volname))
+ self.assertEqual(ret, 0,
+ "[Input/output error] present in rebalance log"
+ " file")
+
+ # Check for Input/output errors in client logs
+ ret = occurences_of_pattern_in_file(
+ self.first_client, "Input/output error",
+ "/var/log/glusterfs/mnt-{}_{}.log".format(self.volname,
+ self.mount_type))
+ self.assertEqual(ret, 0,
+ "[Input/output error] present in client log file")
+ g.log.info("Expanding and shrinking volume successful and no I/O "
+ "errors see in rebalance and client logs")
diff --git a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py
index ad6f336a5..1acd11faa 100644
--- a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py
+++ b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -112,17 +112,16 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info("creating 5 files from mount point")
all_mounts_procs = []
- for mount_obj in self.mounts:
- cmd = ("/usr/bin/env python %s create_files -f 5 "
- "--base-file-name test_file --fixed-file-size 1k %s" % (
- self.script_upload_path,
- mount_obj.mountpoint))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
- all_mounts_procs.append(proc)
+ cmd = ("/usr/bin/env python %s create_files -f 5 "
+ "--base-file-name test_file --fixed-file-size 1k %s" % (
+ self.script_upload_path, self.mounts[0].mountpoint))
+ proc = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+ all_mounts_procs.append(proc)
+
# Validate I/O
g.log.info("Wait for IO to complete and validate IO.....")
- ret = validate_io_procs(all_mounts_procs, self.mounts)
+ ret = validate_io_procs(all_mounts_procs, [self.mounts[0]])
self.assertTrue(ret, "IO failed on some of the clients")
g.log.info("IO is successful on all mounts")
g.log.info("Successfully created a file from mount point")
@@ -149,17 +148,16 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info("creating 5 new files of same name from mount point")
all_mounts_procs = []
- for mount_obj in self.mounts:
- cmd = ("/usr/bin/env python %s create_files -f 5 "
- "--base-file-name test_file --fixed-file-size 10k %s" % (
- self.script_upload_path,
- mount_obj.mountpoint))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
- all_mounts_procs.append(proc)
+ cmd = ("/usr/bin/env python %s create_files -f 5 "
+ "--base-file-name test_file --fixed-file-size 10k %s" % (
+ self.script_upload_path, self.mounts[0].mountpoint))
+ proc = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+ all_mounts_procs.append(proc)
+
# Validate I/O
g.log.info("Wait for IO to complete and validate IO.....")
- ret = validate_io_procs(all_mounts_procs, self.mounts)
+ ret = validate_io_procs(all_mounts_procs, [self.mounts[0]])
self.assertTrue(ret, "IO failed on some of the clients")
g.log.info("IO is successful on all mounts")
g.log.info("Successfully created a new file of same name "
@@ -225,10 +223,11 @@ class TestSelfHeal(GlusterBaseClass):
fpath = (self.mounts[0].mountpoint + '/test_file' +
str(fcount) + '.txt')
status = get_fattr(self.mounts[0].client_system,
- fpath, 'replica.split-brain-status')
+ fpath, 'replica.split-brain-status',
+ encode="text")
compare_string = ("The file is not under data or metadata "
"split-brain")
- self.assertEqual(status.rstrip('\x00'), compare_string,
+ self.assertEqual(status, compare_string,
"file test_file%s is under"
" split-brain" % str(fcount))
g.log.info("none of the files are under split-brain")
diff --git a/tests/functional/afr/test_default_granular_entry_heal.py b/tests/functional/afr/test_default_granular_entry_heal.py
new file mode 100644
index 000000000..91ca25907
--- /dev/null
+++ b/tests/functional/afr/test_default_granular_entry_heal.py
@@ -0,0 +1,235 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online, get_all_bricks)
+from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete)
+from glustolibs.gluster.lib_utils import collect_bricks_arequal
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.gluster.volume_ops import get_volume_options
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated',
+ 'arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestDefaultGranularEntryHeal(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(choice(subvol))
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _wait_for_heal_to_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ @staticmethod
+ def _add_dir_path_to_brick_list(brick_list):
+ """Add test_self_heal at the end of brick path"""
+ dir_brick_list = []
+ for brick in brick_list:
+ dir_brick_list.append('{}/{}'.format(brick, 'mydir'))
+ return dir_brick_list
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+
+ # Get arequal before getting bricks offline
+ work_dir = '{}/mydir'.format(self.mountpoint)
+ ret, arequals = collect_mounts_arequal([self.mounts[0]],
+ path=work_dir)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+ ret, arequals = collect_bricks_arequal([dir_brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ def test_default_granular_entry_heal(self):
+ """
+ Test case:
+ 1. Create a cluster.
+ 2. Create volume start it and mount it.
+ 3. Check if cluster.granular-entry-heal is ON by default or not.
+ 4. Check /var/lib/glusterd/<volname>/info for
+ cluster.granular-entry-heal=on.
+ 5. Check if option granular-entry-heal is present in the
+ volume graph or not.
+ 6. Kill one or two bricks of the volume depending on volume type.
+ 7. Create all types of files on the volume like text files, hidden
+ files, link files, dirs, char device, block device and so on.
+ 8. Bring back the killed brick by restarting the volume.
+ 9. Wait for heal to complete.
+ 10. Check arequal-checksum of all the bricks and see if it's proper or
+ not.
+ """
+ # Check if cluster.granular-entry-heal is ON by default or not
+ ret = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ self.assertEqual(ret['cluster.granular-entry-heal'], 'on',
+ "Value of cluster.granular-entry-heal not on "
+ "by default")
+
+ # Check var/lib/glusterd/<volname>/info for
+ # cluster.granular-entry-heal=on
+ ret = occurences_of_pattern_in_file(self.mnode,
+ 'cluster.granular-entry-heal=on',
+ '/var/lib/glusterd/vols/{}/info'
+ .format(self.volname))
+ self.assertEqual(ret, 1, "Failed get cluster.granular-entry-heal=on in"
+ " info file")
+
+ # Check if option granular-entry-heal is present in the
+ # volume graph or not
+ ret = occurences_of_pattern_in_file(self.first_client,
+ 'option granular-entry-heal on',
+ "/var/log/glusterfs/mnt-{}_{}.log"
+ .format(self.volname,
+ self.mount_type))
+ self.assertTrue(ret > 0,
+ "Failed to find granular-entry-heal in volume graph")
+ g.log.info("granular-entry-heal properly set to ON by default")
+
+ # Kill one or two bricks of the volume depending on volume type
+ self._bring_bricks_offline()
+
+ # Create all types of files on the volume like text files, hidden
+ # files, link files, dirs, char device, block device and so on
+ cmd = ("cd {};mkdir mydir;cd mydir;mkdir dir;mkdir .hiddendir;"
+ "touch file;touch .hiddenfile;mknod blockfile b 1 5;"
+ "mknod charfile b 1 5; mkfifo pipefile;touch fileforhardlink;"
+ "touch fileforsoftlink;ln fileforhardlink hardlinkfile;"
+ "ln -s fileforsoftlink softlinkfile".format(self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create files of all types")
+
+ # Bring back the killed brick by restarting the volume Bricks should
+ # be online again
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete
+ self._wait_for_heal_to_completed()
+
+ # Check arequal-checksum of all the bricks and see if it's proper or
+ # not
+ self._check_arequal_checksum_for_the_volume()
diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py
new file mode 100644
index 000000000..d5b6d5d43
--- /dev/null
+++ b/tests/functional/afr/test_self_heal_with_expand_volume.py
@@ -0,0 +1,221 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online, get_all_bricks)
+from glustolibs.gluster.glusterfile import (set_file_permissions,
+ occurences_of_pattern_in_file)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete)
+from glustolibs.gluster.rebalance_ops import (
+ rebalance_start, wait_for_rebalance_to_complete)
+from glustolibs.gluster.lib_utils import (add_user, del_user)
+from glustolibs.gluster.volume_libs import (get_subvols, expand_volume)
+
+
+@runs_on([['distributed-replicated'], ['glusterfs']])
+class TestHealWithExpandVolume(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+
+ # Create non-root users
+ self.users = ('qa_user', 'qa_admin')
+ for user in self.users:
+ if not add_user(self.first_client, user):
+ raise ExecutionError("Failed to create non-root user {}"
+ .format(user))
+ g.log.info("Successfully created non-root users")
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ # Delete non-root users
+ for user in self.users:
+ del_user(self.first_client, user)
+ ret, _, _ = g.run(self.first_client,
+ "rm -rf /home/{}".format(user))
+ if ret:
+ raise ExecutionError("Failed to remove home dir of "
+ "non-root user")
+ g.log.info("Successfully deleted all users")
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ self.bricks_to_bring_offline.append(choice(subvols[0]))
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _wait_for_heal_to_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_if_there_are_files_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _expand_volume_and_wait_for_rebalance_to_complete(self):
+ """Expand volume and wait for rebalance to complete"""
+ # Add brick to volume
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info)
+ self.assertTrue(ret, "Failed to add brick on volume %s"
+ % self.volname)
+
+ # Trigger rebalance and wait for it to complete
+ ret, _, _ = rebalance_start(self.mnode, self.volname,
+ force=True)
+ self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
+ % self.volname)
+
+ # Wait for rebalance to complete
+ ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+ timeout=6000)
+ self.assertTrue(ret, "Rebalance is not yet complete on the volume "
+ "%s" % self.volname)
+ g.log.info("Rebalance successfully completed")
+
+ def test_self_heal_and_add_brick_with_data_from_diff_users(self):
+ """
+ Test case:
+ 1. Created a 2X3 volume.
+ 2. Mount the volume using FUSE and give 777 permissions to the mount.
+ 3. Added a new user.
+ 4. Login as new user and created 100 files from the new user:
+ for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done
+ 5. Kill a brick which is part of the volume.
+ 6. On the mount, login as root user and create 1000 files:
+ for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done
+ 7. On the mount, login as new user, and copy existing data to
+ the mount.
+ 8. Start volume using force.
+ 9. While heal is in progress, add-brick and start rebalance.
+ 10. Wait for rebalance and heal to complete,
+ 11. Check for MSGID: 108008 errors in rebalance logs.
+ """
+ # Change permissions of mount point to 777
+ ret = set_file_permissions(self.first_client, self.mountpoint,
+ '-R 777')
+ self.assertTrue(ret, "Unable to change mount point permissions")
+ g.log.info("Mount point permissions set to 777")
+
+ # Create 100 files from non-root user
+ cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom "
+ "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0],
+ self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create files from non-root user")
+
+ # Kill one brick which is part of the volume
+ self._bring_bricks_offline()
+
+ # Create 1000 files from root user
+ cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i"
+ " bs=10M count=1;done" % self.mountpoint)
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to creare files from root user")
+
+ # On the mount, login as new user, and copy existing data to
+ # the mount
+ cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/"
+ "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;"
+ "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to copy files from non-root user")
+
+ # Check if there are files to be healed
+ self._check_if_there_are_files_to_be_healed()
+
+ # Start the vol using force
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Add bricks to volume and wait for heal to complete
+ self._expand_volume_and_wait_for_rebalance_to_complete()
+
+ # Wait for heal to complete
+ self._wait_for_heal_to_completed()
+
+ # Check for MSGID: 108008 errors in rebalance logs
+ particiapting_nodes = []
+ for brick in get_all_bricks(self.mnode, self.volname):
+ node, _ = brick.split(':')
+ particiapting_nodes.append(node)
+
+ for server in particiapting_nodes:
+ ret = occurences_of_pattern_in_file(
+ server, "MSGID: 108008",
+ "/var/log/glusterfs/{}-rebalance.log".format(self.volname))
+ self.assertEqual(ret, 0,
+ "[Input/output error] present in rebalance log"
+ " file")
+ g.log.info("Expanding volume successful and no MSGID: 108008 "
+ "errors see in rebalance logs")
diff --git a/tests/functional/afr/test_split_brain_with_hard_link_file.py b/tests/functional/afr/test_split_brain_with_hard_link_file.py
new file mode 100644
index 000000000..a8248fb72
--- /dev/null
+++ b/tests/functional/afr/test_split_brain_with_hard_link_file.py
@@ -0,0 +1,175 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# pylint: disable=too-many-statements, too-many-locals, unused-variable
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (get_all_bricks,
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline)
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.heal_libs import (is_volume_in_split_brain,
+ monitor_heal_completion,
+ is_heal_complete)
+
+from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.gluster.glusterfile import create_link_file
+
+
+@runs_on([['distributed-replicated'], ['glusterfs']])
+class TestSelfHeal(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Override Volumes
+ if cls.volume_type == "distributed-replicated":
+ # Define x3 distributed-replicated volume
+ cls.volume['voltype'] = {
+ 'type': 'distributed-replicated',
+ 'dist_count': 2,
+ 'replica_count': 3,
+ 'transport': 'tcp'}
+
+ # Setup Volume and Mount Volume
+ ret = cls.setup_volume_and_mount_volume(cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ @classmethod
+ def tearDownClass(cls):
+
+ # Cleanup Volume
+ ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to create volume")
+ g.log.info("Successful in cleaning up Volume %s", cls.volname)
+
+ cls.get_super_method(cls, 'tearDownClass')()
+
+ def _test_brick_down_with_file_rename(self, pfile, rfile, brick):
+ # Bring brick offline
+ g.log.info('Bringing brick %s offline', brick)
+ ret = bring_bricks_offline(self.volname, brick)
+ self.assertTrue(ret, 'Failed to bring brick %s offline'
+ % brick)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ [brick])
+ self.assertTrue(ret, 'Brick %s is not offline'
+ % brick)
+ g.log.info('Bringing brick %s offline is successful',
+ brick)
+
+ # Rename file
+ cmd = ("mv %s/%s %s/%s"
+ % (self.mounts[0].mountpoint, pfile,
+ self.mounts[0].mountpoint, rfile))
+ ret, _, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "rename of file failed")
+
+ # Bring brick back online
+ g.log.info('Bringing brick %s online', brick)
+ ret = bring_bricks_online(self.mnode, self.volname,
+ brick)
+ self.assertTrue(ret, 'Failed to bring brick %s online' %
+ brick)
+ g.log.info('Bringing brick %s online is successful', brick)
+
+ def test_afr_heal_with_brickdown_hardlink(self):
+ """
+ Steps:
+ 1. Create 2 * 3 distribute replicate volume and disable all heals
+ 2. Create a file and 3 hardlinks to it from fuse mount.
+ 3. Kill brick4, rename HLINK1 to an appropriate name so that
+ it gets hashed to replicate-1
+ 4. Likewise rename HLINK3 and HLINK7 as well, killing brick5 and brick6
+ respectively each time. i.e. a different brick of the 2nd
+ replica is down each time.
+ 5. Now enable shd and let selfheals complete.
+ 6. Heal should complete without split-brains.
+ """
+ bricks_list = get_all_bricks(self.mnode, self.volname)
+ options = {"metadata-self-heal": "off",
+ "entry-self-heal": "off",
+ "data-self-heal": "off",
+ "self-heal-daemon": "off"}
+ g.log.info("setting options %s", options)
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, ("Unable to set volume option %s for"
+ "volume %s" % (options, self.volname)))
+ g.log.info("Successfully set %s for volume %s", options, self.volname)
+
+ cmd = ("touch %s/FILE" % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "file creation failed")
+
+ # Creating a hardlink for the file created
+ for i in range(1, 4):
+ ret = create_link_file(self.clients[0],
+ '{}/FILE'.format(self.mounts[0].mountpoint),
+ '{}/HLINK{}'.format
+ (self.mounts[0].mountpoint, i))
+ self.assertTrue(ret, "Unable to create hard link file ")
+
+ # Bring brick3 offline,Rename file HLINK1,and bring back brick3 online
+ self._test_brick_down_with_file_rename("HLINK1", "NEW-HLINK1",
+ bricks_list[3])
+
+ # Bring brick4 offline,Rename file HLINK2,and bring back brick4 online
+ self._test_brick_down_with_file_rename("HLINK2", "NEW-HLINK2",
+ bricks_list[4])
+
+ # Bring brick5 offline,Rename file HLINK3,and bring back brick5 online
+ self._test_brick_down_with_file_rename("HLINK3", "NEW-HLINK3",
+ bricks_list[5])
+
+ # Setting options
+ options = {"self-heal-daemon": "on"}
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, 'Failed to set options %s' % options)
+ g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")
+
+ # Start healing
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not started')
+ g.log.info('Healing is started')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ # Check data on mount point
+ cmd = ("ls %s" % (self.mounts[0].mountpoint))
+ ret, _, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "failed to fetch data from mount point")
diff --git a/tests/functional/afr/test_split_brain_with_node_reboot.py b/tests/functional/afr/test_split_brain_with_node_reboot.py
new file mode 100644
index 000000000..9b630ba75
--- /dev/null
+++ b/tests/functional/afr/test_split_brain_with_node_reboot.py
@@ -0,0 +1,149 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# pylint: disable=too-many-statements, too-many-locals
+from unittest import SkipTest
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.misc.misc_libs import upload_scripts
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete)
+from glustolibs.io.utils import (run_linux_untar, run_crefi,
+ wait_for_io_to_complete)
+
+
+@runs_on([['replicated', 'distributed-replicated'], ['glusterfs']])
+class TestSelfHeal(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Check for availability of atleast 3 clients
+ if len(cls.clients) < 3:
+ raise SkipTest("This test requires atleast 3 clients")
+
+ # Upload io scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts "
+ "to clients %s" % cls.clients)
+ g.log.info("Successfully uploaded IO scripts to clients %s",
+ cls.clients)
+
+ # Setup Volume and Mount Volume
+ ret = cls.setup_volume_and_mount_volume(cls.mounts, True)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ cls.list_of_io_processes = []
+ cls.is_io_running = False
+
+ def tearDown(self):
+
+ # If I/O processes are running wait from them to complete
+ if self.is_io_running:
+ if not wait_for_io_to_complete(self.list_of_io_processes,
+ self.mounts):
+ raise ExecutionError("Failed to wait for I/O to complete")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
+ if not ret:
+ raise ExecutionError("Unable to delete volume %s" % self.volname)
+
+ self.get_super_method(self, 'tearDown')()
+
+ def test_afr_node_reboot_self_heal(self):
+ """
+ Steps:
+ 1. Create *3 replica volume
+ 2. Mount the volume on 3 clients
+ 3. Run following workload from clients
+ Client 1: Linux Untars
+ Client 2: Lookups ls
+ Client 3: Lookups du
+ 4. Create a directory on mount point
+ 5. Create deep dirs and file in the directory created at step 4
+ 6. Perform node reboot
+ 7. Check for heal status
+ 8. Reboot another node
+ 9. Check for heal status
+ """
+
+ # Create a dir to start untar
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar from client 1
+ ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.list_of_io_processes += ret
+ self.is_io_running = True
+
+ # Run lookup operation ls from client 2
+ cmd = ("cd {}; for i in `seq 1 1000000`;do du -sh; done"
+ .format(self.mounts[1].mountpoint))
+ ret = g.run_async(self.mounts[1].client_system, cmd)
+ self.list_of_io_processes += [ret]
+
+ # Run lookup operation du from client 3
+ cmd = ("cd {}; for i in `seq 1 1000000`;do ls -laRt; done"
+ .format(self.mounts[2].mountpoint))
+ ret = g.run_async(self.mounts[2].client_system, cmd)
+ self.list_of_io_processes += [ret]
+
+ # Create a dir to start crefi tool
+ self.linux_untar_dir = "{}/{}".format(self.mounts[3].mountpoint,
+ "crefi")
+ ret = mkdir(self.clients[3], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir for crefi")
+
+ # Create deep dirs and files on mount point from client 4
+ list_of_fops = ("create", "rename", "chmod", "chown", "chgrp",
+ "hardlink", "truncate", "setxattr")
+ for fops in list_of_fops:
+ ret = run_crefi(self.clients[3],
+ self.linux_untar_dir, 10, 3, 3, thread=4,
+ random_size=True, fop=fops, minfs=0,
+ maxfs=102400, multi=True, random_filename=True)
+ self.assertTrue(ret, "crefi failed during {}".format(fops))
+ g.log.info("crefi PASSED FOR fop %s", fops)
+ g.log.info("IOs were successful using crefi")
+
+ for server_num in (1, 2):
+ # Perform node reboot for servers
+ g.log.info("Rebooting %s", self.servers[server_num])
+ ret = g.run_async(self.servers[server_num], "reboot")
+ self.assertTrue(ret, 'Failed to reboot node')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')