summaryrefslogtreecommitdiffstats
path: root/tests/functional
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional')
-rw-r--r--tests/functional/afr/heal/test_no_glustershd_with_distribute.py4
-rw-r--r--tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py600
-rw-r--r--tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py23
-rw-r--r--tests/functional/afr/test_add_brick_followed_by_remove_brick.py170
-rw-r--r--tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py41
-rw-r--r--tests/functional/afr/test_default_granular_entry_heal.py235
-rw-r--r--tests/functional/afr/test_self_heal_with_expand_volume.py221
-rw-r--r--tests/functional/afr/test_split_brain_with_hard_link_file.py175
-rw-r--r--tests/functional/afr/test_split_brain_with_node_reboot.py149
-rw-r--r--tests/functional/arbiter/test_verify_metadata_and_data_heal.py297
-rw-r--r--tests/functional/bvt/test_cvt.py34
-rw-r--r--tests/functional/dht/test_rebalance_multiple_expansions.py100
-rw-r--r--tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py134
-rw-r--r--tests/functional/glusterd/test_glusterd_memory_consumption_increase.py207
-rw-r--r--tests/functional/glusterd/test_probe_glusterd_down.py66
-rw-r--r--tests/functional/glusterd/test_verify_df_output.py171
-rw-r--r--tests/functional/glusterfind/test_glusterfind_when_brick_down.py219
-rw-r--r--tests/functional/resource_leak/test_memory_leaks_with_files_delete.py113
-rw-r--r--tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py231
19 files changed, 3085 insertions, 105 deletions
diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
index d2b43bf..bbefe0c 100644
--- a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
+++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -58,7 +58,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass):
for volume_config in cls.volume_configs:
ret = setup_volume(mnode=cls.mnode,
all_servers_info=cls.all_servers_info,
- volume_config=volume_config)
+ volume_config=volume_config, multi_vol=True)
volname = volume_config['name']
if not ret:
raise ExecutionError("Failed to setup Volume"
diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
new file mode 100644
index 0000000..37bd2ec
--- /dev/null
+++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
@@ -0,0 +1,600 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online,
+ get_all_bricks)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain,
+ is_heal_complete,
+ enable_granular_heal,
+ disable_granular_heal)
+from glustolibs.gluster.lib_utils import (add_user, del_user, group_del,
+ group_add, collect_bricks_arequal)
+from glustolibs.gluster.volume_ops import get_volume_options
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']])
+class TestHealWithLinkFiles(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+ self.user_group_created = False
+
+ # If test case running is test_self_heal_meta_data
+ # create user and group
+ test_name_splitted = self.id().split('.')
+ test_id = test_name_splitted[len(test_name_splitted) - 1]
+ if test_id == 'test_self_heal_meta_data':
+
+ # Create non-root group
+ if not group_add(self.first_client, 'qa_all'):
+ raise ExecutionError("Failed to create group qa_all")
+
+ # Create non-root users
+ self.users = ('qa_func', 'qa_system', 'qa_perf')
+ for user in self.users:
+ if not add_user(self.first_client, user, group='qa_all'):
+ raise ExecutionError("Failed to create user {}"
+ .format(user))
+
+ self.user_group_created = True
+ g.log.info("Successfully created all users.")
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ # Delete non-root users and group if created
+ if self.user_group_created:
+
+ # Delete non-root users
+ for user in self.users:
+ del_user(self.first_client, user)
+ g.log.info("Successfully deleted all users")
+
+ # Delete non-root group
+ group_del(self.first_client, 'qa_all')
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _set_granular_heal_to_on_or_off(self, enabled=False):
+ """Set granular heal to ON or OFF"""
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if enabled:
+ if granular['cluster.granular-entry-heal'] != 'on':
+ ret = enable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+ else:
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to off")
+
+ def _run_cmd(self, io_cmd, err_msg):
+ """Run cmd and show error message if it fails"""
+ cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, err_msg)
+
+ def _create_files_and_dirs_on_mount_point(self, index, second_set=False):
+ """A function to create files and dirs on mount point"""
+ # Create a parent directory test_self_heal on mount point
+ if not second_set:
+ ret = mkdir(self.first_client, '{}/{}'.format(
+ self.mountpoint, 'test_self_heal'))
+ self.assertTrue(ret, "Failed to create dir test_self_heal")
+
+ # Create dirctories and files inside directory test_self_heal
+ io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random"
+ " of=file.$i count=1K bs=$i; done",
+
+ "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;"
+ " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j"
+ ";done;done",
+
+ "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq "
+ "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;"
+ " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k"
+ " bs=1k count=$k; done; done; done;",
+
+ "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq"
+ " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j "
+ "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i"
+ " count=1K bs=$i; done ;")
+ self._run_cmd(
+ io_cmd[index], "Failed to create dirs and files inside")
+
+ def _delete_files_and_dirs(self):
+ """Delete files and dirs from mount point"""
+ io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done")
+ self._run_cmd(io_cmd, "Failed to delete dirs and files")
+
+ def _rename_files_and_dirs(self):
+ """Rename files and dirs from mount point"""
+ io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;"
+ " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j "
+ "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i "
+ "renamed_dir.$i; done;")
+ self._run_cmd(io_cmd, "Failed to rename dirs and files")
+
+ def _change_meta_deta_of_dirs_and_files(self):
+ """Change meta data of dirs and files"""
+ cmds = (
+ # Change permission
+ "for i in `seq 1 100`; do chmod 555 dir.$i; done; "
+ "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 "
+ "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for "
+ "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;",
+
+ # Change ownership
+ "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; "
+ "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; "
+ "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;",
+
+ # Change group
+ "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd,
+ "Failed to change meta data on dirs and files")
+ g.log.info("Successfully changed meta data on dirs and files")
+
+ def _verify_meta_data_of_files_and_dirs(self):
+ """Verify meta data of files and dirs"""
+ cmds = (
+ # Verify permissions
+ "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";"
+ " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do "
+ "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]"
+ "; then exit 1; fi; done; done; for i in `seq 51 100`; do "
+ "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then "
+ "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j"
+ " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;"
+ "done;",
+
+ # Verify ownership
+ "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;",
+
+ # Verify group
+ "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd, "Meta data of dirs and files not proper")
+
+ def _set_and_remove_extended_attributes(self, remove=False):
+ """Set and remove extended attributes"""
+ # Command to set extended attribute to files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 "
+ "5`;do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_files dir.$i/file.$j; done; "
+ "done;")
+ err_msg = "Failed to set extended attributes to files and dirs"
+ if remove:
+ # Command to remove extended attribute set on files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name "
+ "dir.$i; for j in `seq 1 5`; do setfattr -x "
+ "trusted.name dir.$i/file.$j ; done ; done ;")
+ err_msg = "Failed to remove extended attributes to files and dirs"
+
+ self._run_cmd(io_cmd, err_msg)
+
+ def _verify_if_extended_attributes_are_proper(self, remove=False):
+ """Verify if extended attributes are set or remove properly"""
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text "
+ "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? "
+ "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do "
+ "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F "
+ "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then "
+ "exit 1; fi; done; done;")
+ err_msg = "Extended attributes on files and dirs are not proper"
+ if remove:
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e "
+ "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in"
+ " `seq 1 5`; do getfattr -n trusted.name -e text "
+ "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; "
+ "done;")
+ err_msg = "Extended attributes set to files and dirs not removed"
+ self._run_cmd(io_cmd, err_msg)
+
+ def _remove_files_and_create_dirs_with_the_same_name(self):
+ """Remove files and create dirs with the same name"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir "
+ "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;")
+ self._run_cmd(io_cmd,
+ "Failed to remove files and create dirs with same name")
+
+ def _verify_if_dirs_are_proper_or_not(self):
+ """Verify if dirs are proper or not"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | "
+ "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; "
+ "done; done; done;")
+ self._run_cmd(io_cmd, "Dirs created instead of files aren't proper")
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(subvol[0])
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ @staticmethod
+ def _add_dir_path_to_brick_list(brick_list):
+ """Add test_self_heal at the end of brick path"""
+ dir_brick_list = []
+ for brick in brick_list:
+ dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal'))
+ return dir_brick_list
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+
+ # Get arequal before getting bricks offline
+ work_dir = '{}/test_self_heal'.format(self.mountpoint)
+ ret, arequals = collect_mounts_arequal([self.mounts[0]],
+ path=work_dir)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+ ret, arequals = collect_bricks_arequal([dir_brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ def _check_heal_is_completed_and_not_in_split_brain(self):
+ """Check if heal is completed and volume not in split brain"""
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check if volume is in split brian or not
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ def _check_if_there_are_files_and_dirs_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _wait_for_heal_is_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_heal_status_restart_vol_wait_and_check_data(self):
+ """
+ Perform repatative steps mentioned below:
+ 1 Check if heal info is showing all the files and dirs to be healed
+ 2 Bring back all brick processes which were killed
+ 3 Wait for heal to complete on the volume
+ 4 Check if heal is complete and check if volume is in split brain
+ 5 Collect and compare arequal-checksum according to the volume type
+ for bricks
+ """
+ # Check if heal info is showing all the files and dirs to be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ def _run_test_self_heal_entry_heal(self):
+ """Run steps of test_self_heal_entry_heal"""
+ # Create a directory and create files and directories inside it on
+ # mount point
+ self._create_files_and_dirs_on_mount_point(0)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create a new set of files and directories on mount point
+ self._create_files_and_dirs_on_mount_point(3, second_set=True)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Delete files and directories from mount point
+ self._delete_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Rename the existing files and dirs
+ self._rename_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ def test_self_heal_entry_heal(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create a new set of files and directories on mount point.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Bring down brick processes accoding to the volume type.
+ 12. Delete files and directories from mount point.
+ 13. Check if heal info is showing all the files and dirs to be healed.
+ 14. Bring back all brick processes which were killed.
+ 15. Wait for heal to complete on the volume.
+ 16. Check if heal is complete and check if volume is in split brain.
+ 17. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 18. Bring down brick processes accoding to the volume type.
+ 19. Rename the existing files and dirs.
+ 20. Check if heal info is showing all the files and dirs to be healed.
+ 21. Bring back all brick processes which were killed.
+ 22. Wait for heal to complete on the volume.
+ 23. Check if heal is complete and check if volume is in split brain.
+ 24. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+
+ Note:
+ Do this test with both Granular-entry-heal set enable and disable.
+ """
+ for value in (False, True):
+ if value:
+ # Cleanup old data from mount point
+ ret, _, _ = g.run(self.first_client,
+ 'rm -rf {}/*'.format(self.mountpoint))
+ self.assertFalse(ret, 'Failed to cleanup mount point')
+ g.log.info("Testing with granular heal set to enabled")
+ self._set_granular_heal_to_on_or_off(enabled=value)
+ self._run_test_self_heal_entry_heal()
+
+ def test_self_heal_meta_data(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Change the meta data of files and dirs.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if the meta data of files and dirs.
+ 12. Bring down brick processes accoding to the volume type.
+ 13. Set extended attributes on the files and dirs.
+ 14. Verify if the extended attributes are set properly or not.
+ 15. Check if heal info is showing all the files and dirs to be healed.
+ 16. Bring back all brick processes which were killed.
+ 17. Wait for heal to complete on the volume.
+ 18. Check if heal is complete and check if volume is in split brain.
+ 19. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 20. Verify if extended attributes are consitent or not.
+ 21. Bring down brick processes accoding to the volume type
+ 22. Remove extended attributes on the files and dirs.
+ 23. Verify if extended attributes were removed properly.
+ 24. Check if heal info is showing all the files and dirs to be healed.
+ 25. Bring back all brick processes which were killed.
+ 26. Wait for heal to complete on the volume.
+ 27. Check if heal is complete and check if volume is in split brain.
+ 28. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 29. Verify if extended attributes are removed or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(1)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Change the meta data of files and dirs
+ self._change_meta_deta_of_dirs_and_files()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if the meta data of files and dirs
+ self._verify_meta_data_of_files_and_dirs()
+
+ for value in (False, True):
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Set or remove extended attributes on the files and dirs
+ self._set_and_remove_extended_attributes(remove=value)
+
+ # Verify if the extended attributes are set properly or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if extended attributes are consitent or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ def test_self_heal_of_dir_with_files_removed(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Remove all files and create dir which have name of files.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if dirs are healed properly or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(2)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Remove all files and create dir which have name of files
+ self._remove_files_and_create_dirs_with_the_same_name()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if dirs are healed properly or not
+ self._verify_if_dirs_are_proper_or_not()
diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
index 43b4f4e..a449e39 100644
--- a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
+++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -26,12 +26,14 @@ from glustolibs.gluster.brick_libs import (
select_volume_bricks_to_bring_offline, get_online_bricks_list)
from glustolibs.gluster.heal_libs import (
get_self_heal_daemon_pid, is_shd_daemonized,
- monitor_heal_completion, bring_self_heal_daemon_process_offline)
+ monitor_heal_completion, bring_self_heal_daemon_process_offline,
+ disable_granular_heal)
from glustolibs.gluster.heal_ops import (get_heal_info_summary,
trigger_heal_full)
from glustolibs.io.utils import validate_io_procs
from glustolibs.misc.misc_libs import upload_scripts
-from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.gluster.volume_ops import (set_volume_options,
+ get_volume_options)
from glustolibs.gluster.mount_ops import mount_volume, umount_volume
@@ -99,6 +101,15 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
* heal should complete successfully
"""
# pylint: disable=too-many-locals,too-many-statements,too-many-lines
+
+ # Disable granular heal if not disabled already
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+
# Setting Volume options
options = {"metadata-self-heal": "on",
"entry-self-heal": "on",
@@ -131,7 +142,7 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
all_mounts_procs, num_files_to_write = [], 100
for mount_obj in self.mounts:
cmd = ("/usr/bin/env python %s create_files "
- "-f %s --base-file-name file %s" % (self.script_upload_path,
+ "-f %d --base-file-name file %s" % (self.script_upload_path,
num_files_to_write,
mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, cmd,
@@ -221,8 +232,8 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
all_mounts_procs = []
for mount_obj in self.mounts:
- cmd = ("/usr/bin/env python %s read %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;"
+ " done " % (mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, cmd,
user=mount_obj.user)
all_mounts_procs.append(proc)
diff --git a/tests/functional/afr/test_add_brick_followed_by_remove_brick.py b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py
new file mode 100644
index 0000000..a653b79
--- /dev/null
+++ b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py
@@ -0,0 +1,170 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import get_all_bricks
+from glustolibs.gluster.dht_test_utils import is_layout_complete
+from glustolibs.gluster.glusterfile import (file_exists,
+ occurences_of_pattern_in_file)
+from glustolibs.gluster.rebalance_ops import (rebalance_start,
+ wait_for_rebalance_to_complete)
+from glustolibs.gluster.volume_libs import expand_volume, shrink_volume
+from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete)
+from glustolibs.misc.misc_libs import upload_scripts
+
+
+@runs_on([['replicated'], ['glusterfs']])
+class TestAddBrickFollowedByRemoveBrick(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+ cls.get_super_method(cls, 'setUpClass')()
+
+ cls.first_client = cls.mounts[0].client_system
+ cls.mountpoint = cls.mounts[0].mountpoint
+ cls.is_io_running = False
+
+ # Upload IO scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ if not file_exists(cls.first_client, cls.script_upload_path):
+ if not upload_scripts(cls.first_client, cls.script_upload_path):
+ raise ExecutionError(
+ "Failed to upload IO scripts to client %s"
+ % cls.first_client)
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ if self.is_io_running:
+ if not wait_for_io_to_complete(self.all_mounts_procs,
+ [self.mounts[0]]):
+ raise ExecutionError("IO failed on some of the clients")
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _check_layout_of_bricks(self):
+ """Check the layout of bricks"""
+ ret = is_layout_complete(self.mnode, self.volname, "/")
+ self.assertTrue(ret, ("Volume %s: Layout is not complete",
+ self.volname))
+ g.log.info("Volume %s: Layout is complete", self.volname)
+
+ def _add_brick_and_wait_for_rebalance_to_complete(self):
+ """Add brick and wait for rebalance to complete"""
+
+ # Add brick to volume
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info)
+ self.assertTrue(ret, "Failed to add brick on volume %s"
+ % self.volname)
+
+ # Trigger rebalance and wait for it to complete
+ ret, _, _ = rebalance_start(self.mnode, self.volname,
+ force=True)
+ self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
+ % self.volname)
+
+ # Wait for rebalance to complete
+ ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+ timeout=1200)
+ self.assertTrue(ret, "Rebalance is not yet complete on the volume "
+ "%s" % self.volname)
+ g.log.info("Rebalance successfully completed")
+
+ self._check_layout_of_bricks()
+
+ def _remove_brick_from_volume(self):
+ """Remove bricks from volume"""
+ # Remove bricks from the volume
+ ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=2000)
+ self.assertTrue(ret, "Failed to remove-brick from volume")
+ g.log.info("Remove-brick rebalance successful")
+
+ def test_add_brick_followed_by_remove_brick(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it to a client.
+ 2. Start I/O on volume.
+ 3. Add brick and trigger rebalance, wait for rebalance to complete.
+ (The volume which was 1x3 should now be 2x3)
+ 4. Add brick and trigger rebalance, wait for rebalance to complete.
+ (The volume which was 2x3 should now be 3x3)
+ 5. Remove brick from volume such that it becomes a 2x3.
+ 6. Remove brick from volume such that it becomes a 1x3.
+ 7. Wait for I/O to complete and check for any input/output errors in
+ both client and rebalance logs.
+ """
+ # Start I/O on mount point
+ self.all_mounts_procs = []
+ cmd = ("/usr/bin/env python {} create_deep_dirs_with_files "
+ "--dirname-start-num {} --dir-depth 5 --dir-length 5 "
+ "--max-num-of-dirs 5 --num-of-files 5 {}"
+ .format(self.script_upload_path, 10, self.mountpoint))
+ proc = g.run_async(self.first_client, cmd)
+ self.all_mounts_procs.append(proc)
+ self.is_io_running = True
+
+ # Convert 1x3 to 2x3 and then convert 2x3 to 3x3
+ for _ in range(0, 2):
+ self._add_brick_and_wait_for_rebalance_to_complete()
+
+ # Convert 3x3 to 2x3 and then convert 2x3 to 1x3
+ for _ in range(0, 2):
+ self._remove_brick_from_volume()
+
+ # Validate I/O processes running on the nodes
+ ret = validate_io_procs(self.all_mounts_procs, [self.mounts[0]])
+ self.is_io_running = False
+ self.assertTrue(ret, "IO failed on some of the clients")
+ g.log.info("IO on all mounts: Complete")
+
+ # Check for Input/output errors in rebalance logs
+ particiapting_nodes = []
+ for brick in get_all_bricks(self.mnode, self.volname):
+ node, _ = brick.split(':')
+ particiapting_nodes.append(node)
+
+ for server in particiapting_nodes:
+ ret = occurences_of_pattern_in_file(
+ server, "Input/output error",
+ "/var/log/glusterfs/{}-rebalance.log".format(self.volname))
+ self.assertEqual(ret, 0,
+ "[Input/output error] present in rebalance log"
+ " file")
+
+ # Check for Input/output errors in client logs
+ ret = occurences_of_pattern_in_file(
+ self.first_client, "Input/output error",
+ "/var/log/glusterfs/mnt-{}_{}.log".format(self.volname,
+ self.mount_type))
+ self.assertEqual(ret, 0,
+ "[Input/output error] present in client log file")
+ g.log.info("Expanding and shrinking volume successful and no I/O "
+ "errors see in rebalance and client logs")
diff --git a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py
index ad6f336..1acd11f 100644
--- a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py
+++ b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -112,17 +112,16 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info("creating 5 files from mount point")
all_mounts_procs = []
- for mount_obj in self.mounts:
- cmd = ("/usr/bin/env python %s create_files -f 5 "
- "--base-file-name test_file --fixed-file-size 1k %s" % (
- self.script_upload_path,
- mount_obj.mountpoint))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
- all_mounts_procs.append(proc)
+ cmd = ("/usr/bin/env python %s create_files -f 5 "
+ "--base-file-name test_file --fixed-file-size 1k %s" % (
+ self.script_upload_path, self.mounts[0].mountpoint))
+ proc = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+ all_mounts_procs.append(proc)
+
# Validate I/O
g.log.info("Wait for IO to complete and validate IO.....")
- ret = validate_io_procs(all_mounts_procs, self.mounts)
+ ret = validate_io_procs(all_mounts_procs, [self.mounts[0]])
self.assertTrue(ret, "IO failed on some of the clients")
g.log.info("IO is successful on all mounts")
g.log.info("Successfully created a file from mount point")
@@ -149,17 +148,16 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info("creating 5 new files of same name from mount point")
all_mounts_procs = []
- for mount_obj in self.mounts:
- cmd = ("/usr/bin/env python %s create_files -f 5 "
- "--base-file-name test_file --fixed-file-size 10k %s" % (
- self.script_upload_path,
- mount_obj.mountpoint))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
- all_mounts_procs.append(proc)
+ cmd = ("/usr/bin/env python %s create_files -f 5 "
+ "--base-file-name test_file --fixed-file-size 10k %s" % (
+ self.script_upload_path, self.mounts[0].mountpoint))
+ proc = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+ all_mounts_procs.append(proc)
+
# Validate I/O
g.log.info("Wait for IO to complete and validate IO.....")
- ret = validate_io_procs(all_mounts_procs, self.mounts)
+ ret = validate_io_procs(all_mounts_procs, [self.mounts[0]])
self.assertTrue(ret, "IO failed on some of the clients")
g.log.info("IO is successful on all mounts")
g.log.info("Successfully created a new file of same name "
@@ -225,10 +223,11 @@ class TestSelfHeal(GlusterBaseClass):
fpath = (self.mounts[0].mountpoint + '/test_file' +
str(fcount) + '.txt')
status = get_fattr(self.mounts[0].client_system,
- fpath, 'replica.split-brain-status')
+ fpath, 'replica.split-brain-status',
+ encode="text")
compare_string = ("The file is not under data or metadata "
"split-brain")
- self.assertEqual(status.rstrip('\x00'), compare_string,
+ self.assertEqual(status, compare_string,
"file test_file%s is under"
" split-brain" % str(fcount))
g.log.info("none of the files are under split-brain")
diff --git a/tests/functional/afr/test_default_granular_entry_heal.py b/tests/functional/afr/test_default_granular_entry_heal.py
new file mode 100644
index 0000000..91ca259
--- /dev/null
+++ b/tests/functional/afr/test_default_granular_entry_heal.py
@@ -0,0 +1,235 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online, get_all_bricks)
+from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete)
+from glustolibs.gluster.lib_utils import collect_bricks_arequal
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.gluster.volume_ops import get_volume_options
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated',
+ 'arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestDefaultGranularEntryHeal(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(choice(subvol))
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _wait_for_heal_to_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ @staticmethod
+ def _add_dir_path_to_brick_list(brick_list):
+ """Add test_self_heal at the end of brick path"""
+ dir_brick_list = []
+ for brick in brick_list:
+ dir_brick_list.append('{}/{}'.format(brick, 'mydir'))
+ return dir_brick_list
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+
+ # Get arequal before getting bricks offline
+ work_dir = '{}/mydir'.format(self.mountpoint)
+ ret, arequals = collect_mounts_arequal([self.mounts[0]],
+ path=work_dir)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+ ret, arequals = collect_bricks_arequal([dir_brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ def test_default_granular_entry_heal(self):
+ """
+ Test case:
+ 1. Create a cluster.
+ 2. Create volume start it and mount it.
+ 3. Check if cluster.granular-entry-heal is ON by default or not.
+ 4. Check /var/lib/glusterd/<volname>/info for
+ cluster.granular-entry-heal=on.
+ 5. Check if option granular-entry-heal is present in the
+ volume graph or not.
+ 6. Kill one or two bricks of the volume depending on volume type.
+ 7. Create all types of files on the volume like text files, hidden
+ files, link files, dirs, char device, block device and so on.
+ 8. Bring back the killed brick by restarting the volume.
+ 9. Wait for heal to complete.
+ 10. Check arequal-checksum of all the bricks and see if it's proper or
+ not.
+ """
+ # Check if cluster.granular-entry-heal is ON by default or not
+ ret = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ self.assertEqual(ret['cluster.granular-entry-heal'], 'on',
+ "Value of cluster.granular-entry-heal not on "
+ "by default")
+
+ # Check var/lib/glusterd/<volname>/info for
+ # cluster.granular-entry-heal=on
+ ret = occurences_of_pattern_in_file(self.mnode,
+ 'cluster.granular-entry-heal=on',
+ '/var/lib/glusterd/vols/{}/info'
+ .format(self.volname))
+ self.assertEqual(ret, 1, "Failed get cluster.granular-entry-heal=on in"
+ " info file")
+
+ # Check if option granular-entry-heal is present in the
+ # volume graph or not
+ ret = occurences_of_pattern_in_file(self.first_client,
+ 'option granular-entry-heal on',
+ "/var/log/glusterfs/mnt-{}_{}.log"
+ .format(self.volname,
+ self.mount_type))
+ self.assertTrue(ret > 0,
+ "Failed to find granular-entry-heal in volume graph")
+ g.log.info("granular-entry-heal properly set to ON by default")
+
+ # Kill one or two bricks of the volume depending on volume type
+ self._bring_bricks_offline()
+
+ # Create all types of files on the volume like text files, hidden
+ # files, link files, dirs, char device, block device and so on
+ cmd = ("cd {};mkdir mydir;cd mydir;mkdir dir;mkdir .hiddendir;"
+ "touch file;touch .hiddenfile;mknod blockfile b 1 5;"
+ "mknod charfile b 1 5; mkfifo pipefile;touch fileforhardlink;"
+ "touch fileforsoftlink;ln fileforhardlink hardlinkfile;"
+ "ln -s fileforsoftlink softlinkfile".format(self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create files of all types")
+
+ # Bring back the killed brick by restarting the volume Bricks should
+ # be online again
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete
+ self._wait_for_heal_to_completed()
+
+ # Check arequal-checksum of all the bricks and see if it's proper or
+ # not
+ self._check_arequal_checksum_for_the_volume()
diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py
new file mode 100644
index 0000000..d5b6d5d
--- /dev/null
+++ b/tests/functional/afr/test_self_heal_with_expand_volume.py
@@ -0,0 +1,221 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online, get_all_bricks)
+from glustolibs.gluster.glusterfile import (set_file_permissions,
+ occurences_of_pattern_in_file)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete)
+from glustolibs.gluster.rebalance_ops import (
+ rebalance_start, wait_for_rebalance_to_complete)
+from glustolibs.gluster.lib_utils import (add_user, del_user)
+from glustolibs.gluster.volume_libs import (get_subvols, expand_volume)
+
+
+@runs_on([['distributed-replicated'], ['glusterfs']])
+class TestHealWithExpandVolume(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+
+ # Create non-root users
+ self.users = ('qa_user', 'qa_admin')
+ for user in self.users:
+ if not add_user(self.first_client, user):
+ raise ExecutionError("Failed to create non-root user {}"
+ .format(user))
+ g.log.info("Successfully created non-root users")
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ # Delete non-root users
+ for user in self.users:
+ del_user(self.first_client, user)
+ ret, _, _ = g.run(self.first_client,
+ "rm -rf /home/{}".format(user))
+ if ret:
+ raise ExecutionError("Failed to remove home dir of "
+ "non-root user")
+ g.log.info("Successfully deleted all users")
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ self.bricks_to_bring_offline.append(choice(subvols[0]))
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _wait_for_heal_to_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_if_there_are_files_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _expand_volume_and_wait_for_rebalance_to_complete(self):
+ """Expand volume and wait for rebalance to complete"""
+ # Add brick to volume
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info)
+ self.assertTrue(ret, "Failed to add brick on volume %s"
+ % self.volname)
+
+ # Trigger rebalance and wait for it to complete
+ ret, _, _ = rebalance_start(self.mnode, self.volname,
+ force=True)
+ self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
+ % self.volname)
+
+ # Wait for rebalance to complete
+ ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+ timeout=6000)
+ self.assertTrue(ret, "Rebalance is not yet complete on the volume "
+ "%s" % self.volname)
+ g.log.info("Rebalance successfully completed")
+
+ def test_self_heal_and_add_brick_with_data_from_diff_users(self):
+ """
+ Test case:
+ 1. Created a 2X3 volume.
+ 2. Mount the volume using FUSE and give 777 permissions to the mount.
+ 3. Added a new user.
+ 4. Login as new user and created 100 files from the new user:
+ for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done
+ 5. Kill a brick which is part of the volume.
+ 6. On the mount, login as root user and create 1000 files:
+ for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done
+ 7. On the mount, login as new user, and copy existing data to
+ the mount.
+ 8. Start volume using force.
+ 9. While heal is in progress, add-brick and start rebalance.
+ 10. Wait for rebalance and heal to complete,
+ 11. Check for MSGID: 108008 errors in rebalance logs.
+ """
+ # Change permissions of mount point to 777
+ ret = set_file_permissions(self.first_client, self.mountpoint,
+ '-R 777')
+ self.assertTrue(ret, "Unable to change mount point permissions")
+ g.log.info("Mount point permissions set to 777")
+
+ # Create 100 files from non-root user
+ cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom "
+ "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0],
+ self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create files from non-root user")
+
+ # Kill one brick which is part of the volume
+ self._bring_bricks_offline()
+
+ # Create 1000 files from root user
+ cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i"
+ " bs=10M count=1;done" % self.mountpoint)
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to creare files from root user")
+
+ # On the mount, login as new user, and copy existing data to
+ # the mount
+ cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/"
+ "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;"
+ "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to copy files from non-root user")
+
+ # Check if there are files to be healed
+ self._check_if_there_are_files_to_be_healed()
+
+ # Start the vol using force
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Add bricks to volume and wait for heal to complete
+ self._expand_volume_and_wait_for_rebalance_to_complete()
+
+ # Wait for heal to complete
+ self._wait_for_heal_to_completed()
+
+ # Check for MSGID: 108008 errors in rebalance logs
+ particiapting_nodes = []
+ for brick in get_all_bricks(self.mnode, self.volname):
+ node, _ = brick.split(':')
+ particiapting_nodes.append(node)
+
+ for server in particiapting_nodes:
+ ret = occurences_of_pattern_in_file(
+ server, "MSGID: 108008",
+ "/var/log/glusterfs/{}-rebalance.log".format(self.volname))
+ self.assertEqual(ret, 0,
+ "[Input/output error] present in rebalance log"
+ " file")
+ g.log.info("Expanding volume successful and no MSGID: 108008 "
+ "errors see in rebalance logs")
diff --git a/tests/functional/afr/test_split_brain_with_hard_link_file.py b/tests/functional/afr/test_split_brain_with_hard_link_file.py
new file mode 100644
index 0000000..a8248fb
--- /dev/null
+++ b/tests/functional/afr/test_split_brain_with_hard_link_file.py
@@ -0,0 +1,175 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# pylint: disable=too-many-statements, too-many-locals, unused-variable
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (get_all_bricks,
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline)
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.heal_libs import (is_volume_in_split_brain,
+ monitor_heal_completion,
+ is_heal_complete)
+
+from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.gluster.glusterfile import create_link_file
+
+
+@runs_on([['distributed-replicated'], ['glusterfs']])
+class TestSelfHeal(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Override Volumes
+ if cls.volume_type == "distributed-replicated":
+ # Define x3 distributed-replicated volume
+ cls.volume['voltype'] = {
+ 'type': 'distributed-replicated',
+ 'dist_count': 2,
+ 'replica_count': 3,
+ 'transport': 'tcp'}
+
+ # Setup Volume and Mount Volume
+ ret = cls.setup_volume_and_mount_volume(cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ @classmethod
+ def tearDownClass(cls):
+
+ # Cleanup Volume
+ ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to create volume")
+ g.log.info("Successful in cleaning up Volume %s", cls.volname)
+
+ cls.get_super_method(cls, 'tearDownClass')()
+
+ def _test_brick_down_with_file_rename(self, pfile, rfile, brick):
+ # Bring brick offline
+ g.log.info('Bringing brick %s offline', brick)
+ ret = bring_bricks_offline(self.volname, brick)
+ self.assertTrue(ret, 'Failed to bring brick %s offline'
+ % brick)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ [brick])
+ self.assertTrue(ret, 'Brick %s is not offline'
+ % brick)
+ g.log.info('Bringing brick %s offline is successful',
+ brick)
+
+ # Rename file
+ cmd = ("mv %s/%s %s/%s"
+ % (self.mounts[0].mountpoint, pfile,
+ self.mounts[0].mountpoint, rfile))
+ ret, _, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "rename of file failed")
+
+ # Bring brick back online
+ g.log.info('Bringing brick %s online', brick)
+ ret = bring_bricks_online(self.mnode, self.volname,
+ brick)
+ self.assertTrue(ret, 'Failed to bring brick %s online' %
+ brick)
+ g.log.info('Bringing brick %s online is successful', brick)
+
+ def test_afr_heal_with_brickdown_hardlink(self):
+ """
+ Steps:
+ 1. Create 2 * 3 distribute replicate volume and disable all heals
+ 2. Create a file and 3 hardlinks to it from fuse mount.
+ 3. Kill brick4, rename HLINK1 to an appropriate name so that
+ it gets hashed to replicate-1
+ 4. Likewise rename HLINK3 and HLINK7 as well, killing brick5 and brick6
+ respectively each time. i.e. a different brick of the 2nd
+ replica is down each time.
+ 5. Now enable shd and let selfheals complete.
+ 6. Heal should complete without split-brains.
+ """
+ bricks_list = get_all_bricks(self.mnode, self.volname)
+ options = {"metadata-self-heal": "off",
+ "entry-self-heal": "off",
+ "data-self-heal": "off",
+ "self-heal-daemon": "off"}
+ g.log.info("setting options %s", options)
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, ("Unable to set volume option %s for"
+ "volume %s" % (options, self.volname)))
+ g.log.info("Successfully set %s for volume %s", options, self.volname)
+
+ cmd = ("touch %s/FILE" % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "file creation failed")
+
+ # Creating a hardlink for the file created
+ for i in range(1, 4):
+ ret = create_link_file(self.clients[0],
+ '{}/FILE'.format(self.mounts[0].mountpoint),
+ '{}/HLINK{}'.format
+ (self.mounts[0].mountpoint, i))
+ self.assertTrue(ret, "Unable to create hard link file ")
+
+ # Bring brick3 offline,Rename file HLINK1,and bring back brick3 online
+ self._test_brick_down_with_file_rename("HLINK1", "NEW-HLINK1",
+ bricks_list[3])
+
+ # Bring brick4 offline,Rename file HLINK2,and bring back brick4 online
+ self._test_brick_down_with_file_rename("HLINK2", "NEW-HLINK2",
+ bricks_list[4])
+
+ # Bring brick5 offline,Rename file HLINK3,and bring back brick5 online
+ self._test_brick_down_with_file_rename("HLINK3", "NEW-HLINK3",
+ bricks_list[5])
+
+ # Setting options
+ options = {"self-heal-daemon": "on"}
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, 'Failed to set options %s' % options)
+ g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")
+
+ # Start healing
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not started')
+ g.log.info('Healing is started')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ # Check data on mount point
+ cmd = ("ls %s" % (self.mounts[0].mountpoint))
+ ret, _, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "failed to fetch data from mount point")
diff --git a/tests/functional/afr/test_split_brain_with_node_reboot.py b/tests/functional/afr/test_split_brain_with_node_reboot.py
new file mode 100644
index 0000000..9b630ba
--- /dev/null
+++ b/tests/functional/afr/test_split_brain_with_node_reboot.py
@@ -0,0 +1,149 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# pylint: disable=too-many-statements, too-many-locals
+from unittest import SkipTest
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.misc.misc_libs import upload_scripts
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete)
+from glustolibs.io.utils import (run_linux_untar, run_crefi,
+ wait_for_io_to_complete)
+
+
+@runs_on([['replicated', 'distributed-replicated'], ['glusterfs']])
+class TestSelfHeal(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Check for availability of atleast 3 clients
+ if len(cls.clients) < 3:
+ raise SkipTest("This test requires atleast 3 clients")
+
+ # Upload io scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts "
+ "to clients %s" % cls.clients)
+ g.log.info("Successfully uploaded IO scripts to clients %s",
+ cls.clients)
+
+ # Setup Volume and Mount Volume
+ ret = cls.setup_volume_and_mount_volume(cls.mounts, True)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ cls.list_of_io_processes = []
+ cls.is_io_running = False
+
+ def tearDown(self):
+
+ # If I/O processes are running wait from them to complete
+ if self.is_io_running:
+ if not wait_for_io_to_complete(self.list_of_io_processes,
+ self.mounts):
+ raise ExecutionError("Failed to wait for I/O to complete")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
+ if not ret:
+ raise ExecutionError("Unable to delete volume %s" % self.volname)
+
+ self.get_super_method(self, 'tearDown')()
+
+ def test_afr_node_reboot_self_heal(self):
+ """
+ Steps:
+ 1. Create *3 replica volume
+ 2. Mount the volume on 3 clients
+ 3. Run following workload from clients
+ Client 1: Linux Untars
+ Client 2: Lookups ls
+ Client 3: Lookups du
+ 4. Create a directory on mount point
+ 5. Create deep dirs and file in the directory created at step 4
+ 6. Perform node reboot
+ 7. Check for heal status
+ 8. Reboot another node
+ 9. Check for heal status
+ """
+
+ # Create a dir to start untar
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar from client 1
+ ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.list_of_io_processes += ret
+ self.is_io_running = True
+
+ # Run lookup operation ls from client 2
+ cmd = ("cd {}; for i in `seq 1 1000000`;do du -sh; done"
+ .format(self.mounts[1].mountpoint))
+ ret = g.run_async(self.mounts[1].client_system, cmd)
+ self.list_of_io_processes += [ret]
+
+ # Run lookup operation du from client 3
+ cmd = ("cd {}; for i in `seq 1 1000000`;do ls -laRt; done"
+ .format(self.mounts[2].mountpoint))
+ ret = g.run_async(self.mounts[2].client_system, cmd)
+ self.list_of_io_processes += [ret]
+
+ # Create a dir to start crefi tool
+ self.linux_untar_dir = "{}/{}".format(self.mounts[3].mountpoint,
+ "crefi")
+ ret = mkdir(self.clients[3], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir for crefi")
+
+ # Create deep dirs and files on mount point from client 4
+ list_of_fops = ("create", "rename", "chmod", "chown", "chgrp",
+ "hardlink", "truncate", "setxattr")
+ for fops in list_of_fops:
+ ret = run_crefi(self.clients[3],
+ self.linux_untar_dir, 10, 3, 3, thread=4,
+ random_size=True, fop=fops, minfs=0,
+ maxfs=102400, multi=True, random_filename=True)
+ self.assertTrue(ret, "crefi failed during {}".format(fops))
+ g.log.info("crefi PASSED FOR fop %s", fops)
+ g.log.info("IOs were successful using crefi")
+
+ for server_num in (1, 2):
+ # Perform node reboot for servers
+ g.log.info("Rebooting %s", self.servers[server_num])
+ ret = g.run_async(self.servers[server_num], "reboot")
+ self.assertTrue(ret, 'Failed to reboot node')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
diff --git a/tests/functional/arbiter/test_verify_metadata_and_data_heal.py b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py
new file mode 100644
index 0000000..d48e36e
--- /dev/null
+++ b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py
@@ -0,0 +1,297 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ get_online_bricks_list)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_libs import (
+ is_heal_complete, is_volume_in_split_brain, monitor_heal_completion,
+ wait_for_self_heal_daemons_to_be_online)
+from glustolibs.gluster.heal_ops import (disable_self_heal_daemon,
+ enable_self_heal_daemon, trigger_heal)
+from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal,
+ del_user, group_add, group_del)
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.io.utils import list_all_files_and_dirs_mounts
+
+
+@runs_on([['arbiter', 'replicated'], ['glusterfs']])
+class TestMetadataAndDataHeal(GlusterBaseClass):
+ '''Description: Verify shd heals files after performing metadata and data
+ operations while a brick was down'''
+ def _dac_helper(self, host, option):
+ '''Helper for creating, deleting users and groups'''
+
+ # Permission/Ownership changes required only for `test_metadata..`
+ # tests, using random group and usernames
+ if 'metadata' not in self.test_dir:
+ return
+
+ if option == 'create':
+ # Groups
+ for group in ('qa_func', 'qa_system'):
+ if not group_add(host, group):
+ raise ExecutionError('Unable to {} group {} on '
+ '{}'.format(option, group, host))
+
+ # User
+ if not add_user(host, 'qa_all', group='qa_func'):
+ raise ExecutionError('Unable to {} user {} under {} on '
+ '{}'.format(option, 'qa_all', 'qa_func',
+ host))
+ elif option == 'delete':
+ # Groups
+ for group in ('qa_func', 'qa_system'):
+ if not group_del(host, group):
+ raise ExecutionError('Unable to {} group {} on '
+ '{}'.format(option, group, host))
+
+ # User
+ if not del_user(host, 'qa_all'):
+ raise ExecutionError('Unable to {} user on {}'.format(
+ option, host))
+
+ def setUp(self):
+ self.get_super_method(self, 'setUp')()
+
+ # A single mount is enough for all the tests
+ self.mounts = self.mounts[0:1]
+ self.client = self.mounts[0].client_system
+
+ # Use testcase name as test directory
+ self.test_dir = self.id().split('.')[-1]
+ self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir
+
+ if not self.setup_volume_and_mount_volume(mounts=self.mounts):
+ raise ExecutionError('Failed to setup and mount '
+ '{}'.format(self.volname))
+
+ # Crete group and user names required for the test
+ self._dac_helper(host=self.client, option='create')
+
+ def tearDown(self):
+ # Delete group and user names created as part of setup
+ self._dac_helper(host=self.client, option='delete')
+
+ if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts):
+ raise ExecutionError('Not able to unmount and cleanup '
+ '{}'.format(self.volname))
+
+ self.get_super_method(self, 'tearDown')()
+
+ def _perform_io_and_disable_self_heal(self):
+ '''Refactor of steps common to all tests: Perform IO, disable heal'''
+ ret = mkdir(self.client, self.fqpath)
+ self.assertTrue(ret,
+ 'Directory creation failed on {}'.format(self.client))
+ self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c '
+ # Create 6 dir's, 6 files and 6 files in each subdir with 10K data
+ file_io = ('''cd {0}; for i in `seq 1 6`;
+ do mkdir dir.$i; {1} 10K > file.$i;
+ for j in `seq 1 6`;
+ do {1} 10K > dir.$i/file.$j; done;
+ done;'''.format(self.fqpath, self.io_cmd))
+ ret, _, err = g.run(self.client, file_io)
+ self.assertEqual(ret, 0, 'Unable to create directories and data files')
+ self.assertFalse(err, '{0} failed with {1}'.format(file_io, err))
+
+ # Disable self heal deamon
+ self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname),
+ 'Disabling self-heal-daemon falied')
+
+ def _perform_brick_ops_and_enable_self_heal(self, op_type):
+ '''Refactor of steps common to all tests: Brick down and perform
+ metadata/data operations'''
+ # First brick in the subvol will always be online and used for self
+ # heal, so make keys match brick index
+ self.op_cmd = {
+ # Metadata Operations (owner and permission changes)
+ 'metadata': {
+ 2:
+ '''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \
+ dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''',
+ 3:
+ '''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \
+ dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''',
+ # 4 - Will be used for final data consistency check
+ 4:
+ '''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \
+ dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''',
+ },
+ # Data Operations (append data to the files)
+ 'data': {
+ 2:
+ '''cd {0}; for i in `seq 1 3`;
+ do {1} 2K >> file.$i;
+ for j in `seq 1 3`;
+ do {1} 2K >> dir.$i/file.$j; done;
+ done;''',
+ 3:
+ '''cd {0}; for i in `seq 1 3`;
+ do {1} 3K >> file.$i;
+ for j in `seq 1 3`;
+ do {1} 3K >> dir.$i/file.$j; done;
+ done;''',
+ # 4 - Will be used for final data consistency check
+ 4:
+ '''cd {0}; for i in `seq 1 6`;
+ do {1} 4K >> file.$i;
+ for j in `seq 1 6`;
+ do {1} 4K >> dir.$i/file.$j; done;
+ done;''',
+ },
+ }
+ bricks = get_online_bricks_list(self.mnode, self.volname)
+ self.assertIsNotNone(bricks,
+ 'Not able to get list of bricks in the volume')
+
+ # Make first brick always online and start operations from second brick
+ for index, brick in enumerate(bricks[1:], start=2):
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, brick)
+ self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks))
+
+ # Perform metadata/data operation
+ cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd)
+ ret, _, err = g.run(self.client, cmd)
+ self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err))
+ self.assertFalse(err, '{0} failed with {1}'.format(cmd, err))
+
+ # Bring brick online
+ ret = bring_bricks_online(
+ self.mnode,
+ self.volname,
+ brick,
+ bring_bricks_online_methods='volume_start_force')
+
+ # Assert metadata/data operations resulted in pending heals
+ self.assertFalse(is_heal_complete(self.mnode, self.volname))
+
+ # Enable and wait self heal daemon to be online
+ self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname),
+ 'Enabling self heal daemon failed')
+ self.assertTrue(
+ wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname),
+ 'Not all self heal daemons are online')
+
+ def _validate_heal_completion_and_arequal(self, op_type):
+ '''Refactor of steps common to all tests: Validate heal from heal
+ commands, verify arequal, perform IO and verify arequal after IO'''
+
+ # Validate heal completion
+ self.assertTrue(monitor_heal_completion(self.mnode, self.volname),
+ 'Self heal is not completed within timeout')
+ self.assertFalse(
+ is_volume_in_split_brain(self.mnode, self.volname),
+ 'Volume is in split brain even after heal completion')
+
+ subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+ self.assertTrue(subvols, 'Not able to get list of subvols')
+ arbiter = self.volume_type.find('arbiter') >= 0
+ stop = len(subvols[0]) - 1 if arbiter else len(subvols[0])
+
+ # Validate arequal
+ self._validate_arequal_and_perform_lookup(subvols, stop)
+
+ # Perform some additional metadata/data operations
+ cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd)
+ ret, _, err = g.run(self.client, cmd)
+ self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err))
+ self.assertFalse(err, '{0} failed with {1}'.format(cmd, err))
+
+ # Validate arequal after additional operations
+ self._validate_arequal_and_perform_lookup(subvols, stop)
+
+ def _validate_arequal_and_perform_lookup(self, subvols, stop):
+ '''Refactor of steps common to all tests: Validate arequal from bricks
+ backend and perform a lookup of all files from mount'''
+ for subvol in subvols:
+ ret, arequal = collect_bricks_arequal(subvol[0:stop])
+ self.assertTrue(
+ ret, 'Unable to get `arequal` checksum on '
+ '{}'.format(subvol[0:stop]))
+ self.assertEqual(
+ len(set(arequal)), 1, 'Mismatch of `arequal` '
+ 'checksum among {} is identified'.format(subvol[0:stop]))
+
+ # Perform a lookup of all files and directories on mounts
+ self.assertTrue(list_all_files_and_dirs_mounts(self.mounts),
+ 'Failed to list all files and dirs from mount')
+
+ def test_metadata_heal_from_shd(self):
+ '''Description: Verify files heal after switching on `self-heal-daemon`
+ when metadata operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform
+ metadata operations
+ 3. Set `self-heal-daemon` to `on` and wait for heal completion
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'metadata'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info('Pass: Verification of metadata heal after switching on '
+ '`self heal daemon` is complete')
+
+ def test_metadata_heal_from_heal_cmd(self):
+ '''Description: Verify files heal after triggering heal command when
+ metadata operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform
+ metadata operations
+ 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal`
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'metadata'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+
+ # Invoke `glfsheal`
+ self.assertTrue(trigger_heal(self.mnode, self.volname),
+ 'Unable to trigger index heal on the volume')
+
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info(
+ 'Pass: Verification of metadata heal via `glfsheal` is complete')
+
+ def test_data_heal_from_shd(self):
+ '''Description: Verify files heal after triggering heal command when
+ data operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data
+ operations
+ 3. Set `self-heal-daemon` to `on` and wait for heal completion
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'data'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info('Pass: Verification of data heal after switching on '
+ '`self heal daemon` is complete')
diff --git a/tests/functional/bvt/test_cvt.py b/tests/functional/bvt/test_cvt.py
index dea2512..f8cb4f2 100644
--- a/tests/functional/bvt/test_cvt.py
+++ b/tests/functional/bvt/test_cvt.py
@@ -41,15 +41,13 @@ from glustolibs.gluster.volume_libs import (
from glustolibs.gluster.volume_libs import (
log_volume_info_and_status, expand_volume, shrink_volume,
replace_brick_from_volume, wait_for_volume_process_to_be_online)
-from glustolibs.gluster.glusterfile import get_fattr_list
from glustolibs.gluster.rebalance_ops import (rebalance_start,
wait_for_rebalance_to_complete,
rebalance_status)
from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline,
bring_bricks_offline,
bring_bricks_online,
- are_bricks_offline,
- get_all_bricks)
+ are_bricks_offline)
from glustolibs.gluster.heal_libs import monitor_heal_completion
from glustolibs.gluster.quota_ops import (quota_enable, quota_disable,
quota_limit_usage,
@@ -286,39 +284,9 @@ class TestGlusterShrinkVolumeSanity(GlusterBasicFeaturesSanityBaseClass):
g.log.info("Successful in logging volume info and status of volume %s",
self.volname)
- # Temporary code:
- # Additional checks to gather infomartion from all
- # servers for Bug 1810901 and setting log level to debug.
- if self.volume_type == 'distributed-dispersed':
- for brick_path in get_all_bricks(self.mnode, self.volname):
- node, path = brick_path.split(':')
- ret, out, _ = g.run(node, 'find {}/'.format(path))
- g.log.info(out)
- for filedir in out.split('\n'):
- ret, out, _ = g.run(node, 'ls -l {}'.format(filedir))
- g.log.info("Return value for ls -l command: %s", ret)
- g.log.info(out)
- ret = get_fattr_list(node, filedir, encode_hex=True)
- g.log.info(ret)
-
# Shrinking volume by removing bricks from volume when IO in progress
ret = shrink_volume(self.mnode, self.volname)
- # Temporary code:
- # Additional checks to gather infomartion from all
- # servers for Bug 1810901.
- if not ret and self.volume_type == 'distributed-dispersed':
- for brick_path in get_all_bricks(self.mnode, self.volname):
- node, path = brick_path.split(':')
- ret, out, _ = g.run(node, 'find {}/'.format(path))
- g.log.info(out)
- for filedir in out.split('\n'):
- ret, out, _ = g.run(node, 'ls -l {}'.format(filedir))
- g.log.info("Return value for ls -l command: %s", ret)
- g.log.info(out)
- ret = get_fattr_list(node, filedir, encode_hex=True)
- g.log.info(ret)
-
self.assertTrue(ret, ("Failed to shrink the volume when IO in "
"progress on volume %s", self.volname))
g.log.info("Shrinking volume when IO in progress is successful on "
diff --git a/tests/functional/dht/test_rebalance_multiple_expansions.py b/tests/functional/dht/test_rebalance_multiple_expansions.py
new file mode 100644
index 0000000..e96d88d
--- /dev/null
+++ b/tests/functional/dht/test_rebalance_multiple_expansions.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.rebalance_ops import (
+ rebalance_start, wait_for_rebalance_to_complete)
+from glustolibs.gluster.volume_libs import expand_volume
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed', 'distributed-replicated'],
+ ['glusterfs']])
+class TestRebalanceMultipleExpansions(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to Setup and mount volume")
+
+ self.first_client = self.mounts[0].client_system
+
+ def tearDown(self):
+
+ # Unmount and clean volume
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to Cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_rebalance_multiple_expansions(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it
+ 2. Create some file on mountpoint
+ 3. Collect arequal checksum on mount point pre-rebalance
+ 4. Do the following 3 times:
+ 5. Expand the volume
+ 6. Start rebalance and wait for it to finish
+ 7. Collect arequal checksum on mount point post-rebalance
+ and compare with value from step 3
+ """
+
+ # Create some file on mountpoint
+ cmd = ("cd %s; for i in {1..500} ; do "
+ "dd if=/dev/urandom of=file$i bs=10M count=1; done"
+ % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertEqual(ret, 0, "IO failed on volume %s"
+ % self.volname)
+
+ # Collect arequal checksum before rebalance
+ arequal_checksum_before = collect_mounts_arequal(self.mounts[0])
+
+ for _ in range(3):
+ # Add brick to volume
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info)
+ self.assertTrue(ret, "Failed to add brick on volume %s"
+ % self.volname)
+
+ # Trigger rebalance and wait for it to complete
+ ret, _, _ = rebalance_start(self.mnode, self.volname,
+ force=True)
+ self.assertEqual(ret, 0, "Failed to start rebalance on "
+ "volume %s" % self.volname)
+
+ # Wait for rebalance to complete
+ ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+ timeout=1200)
+ self.assertTrue(ret, "Rebalance is not yet complete on the volume "
+ "%s" % self.volname)
+ g.log.info("Rebalance successfully completed")
+
+ # Collect arequal checksum after rebalance
+ arequal_checksum_after = collect_mounts_arequal(self.mounts[0])
+
+ # Check for data loss by comparing arequal before and after
+ # rebalance
+ self.assertEqual(arequal_checksum_before, arequal_checksum_after,
+ "arequal checksum is NOT MATCHNG")
+ g.log.info("arequal checksum is SAME")
diff --git a/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py b/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py
new file mode 100644
index 0000000..f6228c1
--- /dev/null
+++ b/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py
@@ -0,0 +1,134 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.glusterfile import set_file_permissions
+from glustolibs.gluster.brick_libs import (get_all_bricks,
+ bring_bricks_offline,
+ bring_bricks_online)
+
+
+@runs_on([['distributed', 'distributed-replicated', 'distributed-dispersed',
+ 'distributed-arbiter'],
+ ['glusterfs']])
+class TestVerifyPermissionChanges(GlusterBaseClass):
+ def setUp(self):
+ """
+ Setup and mount volume
+ """
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]):
+ raise ExecutionError("Failed to Setup and Mount Volume")
+
+ def _set_root_dir_permission(self, permission):
+ """ Sets the root dir permission to the given value"""
+ m_point = self.mounts[0].mountpoint
+ ret = set_file_permissions(self.clients[0], m_point, permission)
+ self.assertTrue(ret, "Failed to set root dir permissions")
+
+ def _get_dir_permissions(self, host, directory):
+ """ Returns dir permissions"""
+ cmd = 'stat -c "%a" {}'.format(directory)
+ ret, out, _ = g.run(host, cmd)
+ self.assertEqual(ret, 0, "Failed to get permission on {}".format(host))
+ return out.strip()
+
+ def _get_root_dir_permission(self, expected=None):
+ """ Returns the root dir permission """
+ permission = self._get_dir_permissions(self.mounts[0].client_system,
+ self.mounts[0].mountpoint)
+ if not expected:
+ return permission.strip()
+ self.assertEqual(permission, expected, "The permissions doesn't match")
+ return True
+
+ def _bring_a_brick_offline(self):
+ """ Brings down a brick from the volume"""
+ brick_to_kill = get_all_bricks(self.mnode, self.volname)[-1]
+ ret = bring_bricks_offline(self.volname, brick_to_kill)
+ self.assertTrue(ret, "Failed to bring brick offline")
+ return brick_to_kill
+
+ def _bring_back_brick_online(self, brick):
+ """ Brings back down brick from the volume"""
+ ret = bring_bricks_online(self.mnode, self.volname, brick)
+ self.assertTrue(ret, "Failed to bring brick online")
+
+ def _verify_mount_dir_and_brick_dir_permissions(self, expected,
+ down_brick=None):
+ """ Verifies the mount directory and brick dir permissions are same"""
+ # Get root dir permission and verify
+ self._get_root_dir_permission(expected)
+
+ # Verify brick dir permission
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ for brick in brick_list:
+ brick_node, brick_path = brick.split(":")
+ if down_brick and down_brick.split(":")[-1] != brick_path:
+ actual_perm = self._get_dir_permissions(brick_node,
+ brick_path)
+ self.assertEqual(actual_perm, expected,
+ "The permissions are not same")
+
+ def test_verify_root_dir_permission_changes(self):
+ """
+ 1. create pure dist volume
+ 2. mount on client
+ 3. Checked default permission (should be 755)
+ 4. Change the permission to 444 and verify
+ 5. Kill a brick
+ 6. Change root permission to 755
+ 7. Verify permission changes on all bricks, except down brick
+ 8. Bring back the brick and verify the changes are reflected
+ """
+
+ # Verify the default permission on root dir is 755
+ self._verify_mount_dir_and_brick_dir_permissions("755")
+
+ # Change root permission to 444
+ self._set_root_dir_permission("444")
+
+ # Verify the changes were successful
+ self._verify_mount_dir_and_brick_dir_permissions("444")
+
+ # Kill a brick
+ offline_brick = self._bring_a_brick_offline()
+
+ # Change root permission to 755
+ self._set_root_dir_permission("755")
+
+ # Verify the permission changed to 755 on mount and brick dirs
+ self._verify_mount_dir_and_brick_dir_permissions("755", offline_brick)
+
+ # Bring brick online
+ self._bring_back_brick_online(offline_brick)
+
+ # Verify the permission changed to 755 on mount and brick dirs
+ self._verify_mount_dir_and_brick_dir_permissions("755")
+
+ def tearDown(self):
+ # Unmount and cleanup original volume
+ if not self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]):
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
diff --git a/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py
new file mode 100644
index 0000000..92c48da
--- /dev/null
+++ b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py
@@ -0,0 +1,207 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+""" Description:
+ Increase in glusterd memory consumption on repetetive operations
+ for 100 volumes
+"""
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass
+from glustolibs.gluster.volume_ops import (volume_stop, volume_delete,
+ get_volume_list,
+ volume_start)
+from glustolibs.gluster.gluster_init import (restart_glusterd,
+ wait_for_glusterd_to_start)
+from glustolibs.gluster.volume_libs import (bulk_volume_creation,
+ cleanup_volume)
+from glustolibs.gluster.volume_ops import set_volume_options
+
+
+class TestGlusterMemoryConsumptionIncrease(GlusterBaseClass):
+ def tearDown(self):
+ # Clean up all volumes
+ if self.volume_present:
+ vol_list = get_volume_list(self.mnode)
+ if vol_list is None:
+ raise ExecutionError("Failed to get the volume list")
+
+ for volume in vol_list:
+ ret = cleanup_volume(self.mnode, volume)
+ if not ret:
+ raise ExecutionError("Unable to delete volume %s" % volume)
+ g.log.info("Volume deleted successfully : %s", volume)
+
+ # Disable multiplex
+ ret = set_volume_options(self.mnode, 'all',
+ {'cluster.brick-multiplex': 'disable'})
+ self.assertTrue(ret, "Failed to enable brick-multiplex"
+ " for the cluster")
+
+ # Calling baseclass tearDown method
+ self.get_super_method(self, 'tearDown')()
+
+ def _volume_operations_in_loop(self):
+ """ Create, start, stop and delete 100 volumes in a loop """
+ # Create and start 100 volumes in a loop
+ self.volume_config = {
+ 'name': 'volume-',
+ 'servers': self.servers,
+ 'voltype': {'type': 'distributed-replicated',
+ 'dist_count': 2,
+ 'replica_count': 3},
+ }
+
+ ret = bulk_volume_creation(self.mnode, 100, self.all_servers_info,
+ self.volume_config, "", False, True)
+ self.assertTrue(ret, "Failed to create volumes")
+
+ self.volume_present = True
+
+ g.log.info("Successfully created all the volumes")
+
+ # Start 100 volumes in loop
+ for i in range(100):
+ self.volname = "volume-%d" % i
+ ret, _, _ = volume_start(self.mnode, self.volname)
+ self.assertEqual(ret, 0, "Failed to start volume: %s"
+ % self.volname)
+
+ g.log.info("Successfully started all the volumes")
+
+ # Stop 100 volumes in loop
+ for i in range(100):
+ self.volname = "volume-%d" % i
+ ret, _, _ = volume_stop(self.mnode, self.volname)
+ self.assertEqual(ret, 0, "Failed to stop volume: %s"
+ % self.volname)
+
+ g.log.info("Successfully stopped all the volumes")
+
+ # Delete 100 volumes in loop
+ for i in range(100):
+ self.volname = "volume-%d" % i
+ ret = volume_delete(self.mnode, self.volname)
+ self.assertTrue(ret, "Failed to delete volume: %s"
+ % self.volname)
+
+ self.volume_present = False
+
+ g.log.info("Successfully deleted all the volumes")
+
+ def _memory_consumption_for_all_nodes(self, pid_list):
+ """Fetch the memory consumption by glusterd process for
+ all the nodes
+ """
+ memory_consumed_list = []
+ for i, server in enumerate(self.servers):
+ # Get the memory consumption of glusterd in each node
+ cmd = "top -b -n 1 -p %d | awk 'FNR==8 {print $6}'" % pid_list[i]
+ ret, mem, _ = g.run(server, cmd)
+ self.assertEqual(ret, 0, "Failed to get the memory usage of"
+ " glusterd process")
+ mem = int(mem)//1024
+ memory_consumed_list.append(mem)
+
+ return memory_consumed_list
+
+ def test_glusterd_memory_consumption_increase(self):
+ """
+ Test Case:
+ 1) Enable brick-multiplex and set max-bricks-per-process to 3 in
+ the cluster
+ 2) Get the glusterd memory consumption
+ 3) Perform create,start,stop,delete operation for 100 volumes
+ 4) Check glusterd memory consumption, it should not increase by
+ more than 50MB
+ 5) Repeat steps 3-4 for two more time
+ 6) Check glusterd memory consumption it should not increase by
+ more than 10MB
+ """
+ # pylint: disable=too-many-locals
+ # Restarting glusterd to refresh its memory consumption
+ ret = restart_glusterd(self.servers)
+ self.assertTrue(ret, "Restarting glusterd failed")
+
+ # check if glusterd is running post reboot
+ ret = wait_for_glusterd_to_start(self.servers)
+ self.assertTrue(ret, "Glusterd service is not running post reboot")
+
+ # Enable brick-multiplex, set max-bricks-per-process to 3 in cluster
+ for key, value in (('cluster.brick-multiplex', 'enable'),
+ ('cluster.max-bricks-per-process', '3')):
+ ret = set_volume_options(self.mnode, 'all', {key: value})
+ self.assertTrue(ret, "Failed to set {} to {} "
+ " for the cluster".format(key, value))
+
+ # Get the pidof of glusterd process
+ pid_list = []
+ for server in self.servers:
+ # Get the pidof of glusterd process
+ cmd = "pidof glusterd"
+ ret, pid, _ = g.run(server, cmd)
+ self.assertEqual(ret, 0, "Failed to get the pid of glusterd")
+ pid = int(pid)
+ pid_list.append(pid)
+
+ # Fetch the list of memory consumed in all the nodes
+ mem_consumed_list = self._memory_consumption_for_all_nodes(pid_list)
+
+ # Perform volume operations for 100 volumes for first time
+ self._volume_operations_in_loop()
+
+ # Fetch the list of memory consumed in all the nodes after 1 iteration
+ mem_consumed_list_1 = self._memory_consumption_for_all_nodes(pid_list)
+
+ for i, mem in enumerate(mem_consumed_list_1):
+ condition_met = False
+ if mem - mem_consumed_list[i] <= 50:
+ condition_met = True
+
+ self.assertTrue(condition_met, "Unexpected: Memory consumption"
+ " glusterd increased more than the expected"
+ " of value")
+
+ # Perform volume operations for 100 volumes for second time
+ self._volume_operations_in_loop()
+
+ # Fetch the list of memory consumed in all the nodes after 2 iterations
+ mem_consumed_list_2 = self._memory_consumption_for_all_nodes(pid_list)
+
+ for i, mem in enumerate(mem_consumed_list_2):
+ condition_met = False
+ if mem - mem_consumed_list_1[i] <= 10:
+ condition_met = True
+
+ self.assertTrue(condition_met, "Unexpected: Memory consumption"
+ " glusterd increased more than the expected"
+ " of value")
+
+ # Perform volume operations for 100 volumes for third time
+ self._volume_operations_in_loop()
+
+ # Fetch the list of memory consumed in all the nodes after 3 iterations
+ mem_consumed_list_3 = self._memory_consumption_for_all_nodes(pid_list)
+
+ for i, mem in enumerate(mem_consumed_list_3):
+ condition_met = False
+ if mem - mem_consumed_list_2[i] <= 10:
+ condition_met = True
+
+ self.assertTrue(condition_met, "Unexpected: Memory consumption"
+ " glusterd increased more than the expected"
+ " of value")
diff --git a/tests/functional/glusterd/test_probe_glusterd_down.py b/tests/functional/glusterd/test_probe_glusterd_down.py
index 3705904..c851bf1 100644
--- a/tests/functional/glusterd/test_probe_glusterd_down.py
+++ b/tests/functional/glusterd/test_probe_glusterd_down.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2020-2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -14,17 +14,14 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-from time import sleep
-
from glusto.core import Glusto as g
from glustolibs.gluster.gluster_base_class import GlusterBaseClass
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.peer_ops import peer_probe
from glustolibs.gluster.lib_utils import is_core_file_created
from glustolibs.gluster.peer_ops import peer_detach, is_peer_connected
-from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd,
- wait_for_glusterd_to_start)
-from glustolibs.misc.misc_libs import are_nodes_online
+from glustolibs.gluster.gluster_init import stop_glusterd, start_glusterd
+from glustolibs.misc.misc_libs import bring_down_network_interface
class PeerProbeWhenGlusterdDown(GlusterBaseClass):
@@ -57,7 +54,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass):
ret, test_timestamp, _ = g.run_local('date +%s')
test_timestamp = test_timestamp.strip()
- # detach one of the nodes which is part of the cluster
+ # Detach one of the nodes which is part of the cluster
g.log.info("detaching server %s ", self.servers[1])
ret, _, err = peer_detach(self.mnode, self.servers[1])
msg = 'peer detach: failed: %s is not part of cluster\n' \
@@ -66,12 +63,12 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass):
self.assertEqual(err, msg, "Failed to detach %s "
% (self.servers[1]))
- # bring down glusterd of the server which has been detached
+ # Bring down glusterd of the server which has been detached
g.log.info("Stopping glusterd on %s ", self.servers[1])
ret = stop_glusterd(self.servers[1])
self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])
- # trying to peer probe the node whose glusterd was stopped using its IP
+ # Trying to peer probe the node whose glusterd was stopped using IP
g.log.info("Peer probing %s when glusterd down ", self.servers[1])
ret, _, err = peer_probe(self.mnode, self.servers[1])
self.assertNotEqual(ret, 0, "Peer probe should not pass when "
@@ -79,7 +76,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass):
self.assertEqual(err, "peer probe: failed: Probe returned with "
"Transport endpoint is not connected\n")
- # trying to peer probe the same node with hostname
+ # Trying to peer probe the same node with hostname
g.log.info("Peer probing node %s using hostname with glusterd down ",
self.servers[1])
hostname = g.run(self.servers[1], "hostname")
@@ -89,27 +86,24 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass):
self.assertEqual(err, "peer probe: failed: Probe returned with"
" Transport endpoint is not connected\n")
- # start glusterd again for the next set of test steps
+ # Start glusterd again for the next set of test steps
g.log.info("starting glusterd on %s ", self.servers[1])
ret = start_glusterd(self.servers[1])
self.assertTrue(ret, "glusterd couldn't start successfully on %s"
% self.servers[1])
- # reboot a server and then trying to peer probe at the time of reboot
- g.log.info("Rebooting %s and checking peer probe", self.servers[1])
- reboot = g.run_async(self.servers[1], "reboot")
-
- # Mandatory sleep for 3 seconds to make sure node is in halted state
- sleep(3)
+ # Bring down the network for sometime
+ network_status = bring_down_network_interface(self.servers[1], 150)
# Peer probing the node using IP when it is still not online
- g.log.info("Peer probing node %s which has been issued a reboot ",
+ g.log.info("Peer probing node %s when network is down",
self.servers[1])
ret, _, err = peer_probe(self.mnode, self.servers[1])
self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
" fail")
- self.assertEqual(err, "peer probe: failed: Probe returned with "
- "Transport endpoint is not connected\n")
+ self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
+ "returned with Transport endpoint"
+ " is not connected")
# Peer probing the node using hostname when it is still not online
g.log.info("Peer probing node %s using hostname which is still "
@@ -118,35 +112,21 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass):
ret, _, err = peer_probe(self.mnode, hostname[1].strip())
self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
"has not come online")
- self.assertEqual(err, "peer probe: failed: Probe returned with "
- "Transport endpoint is not connected\n")
+ self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
+ "returned with Transport endpoint"
+ " is not connected")
+
+ ret, _, _ = network_status.async_communicate()
+ if ret != 0:
+ g.log.error("Failed to perform network interface ops")
- ret, _, _ = reboot.async_communicate()
- self.assertEqual(ret, 255, "reboot failed")
-
- # Validate if rebooted node is online or not
- count = 0
- while count < 40:
- sleep(15)
- ret, _ = are_nodes_online(self.servers[1])
- if ret:
- g.log.info("Node %s is online", self.servers[1])
- break
- count += 1
- self.assertTrue(ret, "Node in test not yet online")
-
- # check if glusterd is running post reboot
- ret = wait_for_glusterd_to_start(self.servers[1],
- glusterd_start_wait_timeout=120)
- self.assertTrue(ret, "Glusterd service is not running post reboot")
-
- # peer probe the node must pass
+ # Peer probe the node must pass
g.log.info("peer probing node %s", self.servers[1])
ret, _, err = peer_probe(self.mnode, self.servers[1])
self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
"%s " % err)
- # checking if core file created in "/", "/tmp" and "/var/log/core"
+ # Checking if core file created in "/", "/tmp" and "/var/log/core"
ret = is_core_file_created(self.servers, test_timestamp)
self.assertTrue(ret, "core file found")
diff --git a/tests/functional/glusterd/test_verify_df_output.py b/tests/functional/glusterd/test_verify_df_output.py
new file mode 100644
index 0000000..4eac919
--- /dev/null
+++ b/tests/functional/glusterd/test_verify_df_output.py
@@ -0,0 +1,171 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass,
+ runs_on)
+from glustolibs.gluster.heal_libs import monitor_heal_completion
+from glustolibs.io.utils import validate_io_procs
+from glustolibs.misc.misc_libs import upload_scripts
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import (replace_brick_from_volume,
+ shrink_volume, expand_volume)
+from glustolibs.gluster.brick_libs import get_all_bricks
+
+
+@runs_on([['distributed-dispersed', 'distributed-replicated',
+ 'distributed-arbiter', 'dispersed', 'replicated',
+ 'arbiter'],
+ ['glusterfs']])
+class VerifyDFWithReplaceBrick(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Upload io scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ if not upload_scripts(cls.clients, [cls.script_upload_path]):
+ raise ExecutionError("Failed to upload IO scripts to clients %s"
+ % cls.clients)
+ g.log.info("Successfully uploaded IO scripts to clients %s",
+ cls.clients)
+
+ def setUp(self):
+ # Calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume and Mount Volume
+ if not self.setup_volume_and_mount_volume(mounts=self.mounts):
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def _perform_io_and_validate(self):
+ """ Performs IO on the mount points and validates it"""
+ all_mounts_procs, count = [], 1
+ for mount_obj in self.mounts:
+ cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
+ "--dirname-start-num %d --dir-depth 2 "
+ "--dir-length 3 --max-num-of-dirs 3 "
+ "--num-of-files 2 %s" % (
+ self.script_upload_path, count,
+ mount_obj.mountpoint))
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ all_mounts_procs.append(proc)
+ count = count + 10
+
+ # Validating IO's on mount point and waiting to complete
+ ret = validate_io_procs(all_mounts_procs, self.mounts)
+ self.assertTrue(ret, "IO failed on some of the clients")
+ g.log.info("Successfully validated IO's")
+
+ def _replace_bricks_and_wait_for_heal_completion(self):
+ """ Replaces all the bricks and waits for the heal to complete"""
+ existing_bricks = get_all_bricks(self.mnode, self.volname)
+ for brick_to_replace in existing_bricks:
+ ret = replace_brick_from_volume(self.mnode, self.volname,
+ self.servers,
+ self.all_servers_info,
+ src_brick=brick_to_replace)
+ self.assertTrue(ret,
+ "Replace of %s failed" % brick_to_replace)
+ g.log.info("Replace of brick %s successful for volume %s",
+ brick_to_replace, self.volname)
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+ g.log.info('Heal has completed successfully')
+
+ def _get_mount_size_from_df_h_output(self):
+ """ Extracts the mount size from the df -h output"""
+
+ split_cmd = " | awk '{split($0,a,\" \");print a[2]}' | sed 's/.$//'"
+ cmd = ("cd {};df -h | grep {} {}".format(self.mounts[0].mountpoint,
+ self.volname, split_cmd))
+ ret, mount_size, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "Failed to extract mount size")
+ return float(mount_size.split("\n")[0])
+
+ def test_verify_df_output_when_brick_replaced(self):
+ """
+ - Take the output of df -h.
+ - Replace any one brick for the volumes.
+ - Wait till the heal is completed
+ - Repeat steps 1, 2 and 3 for all bricks for all volumes.
+ - Check if there are any inconsistencies in the output of df -h
+ - Remove bricks from volume and check output of df -h
+ - Add bricks to volume and check output of df -h
+ """
+
+ # Perform some IO on the mount point
+ self._perform_io_and_validate()
+
+ # Get the mount size from df -h output
+ initial_mount_size = self._get_mount_size_from_df_h_output()
+
+ # Replace all the bricks and wait till the heal completes
+ self._replace_bricks_and_wait_for_heal_completion()
+
+ # Get df -h output after brick replace
+ mount_size_after_replace = self._get_mount_size_from_df_h_output()
+
+ # Verify the mount point size remains the same after brick replace
+ self.assertEqual(initial_mount_size, mount_size_after_replace,
+ "The mount sizes before and after replace bricks "
+ "are not same")
+
+ # Add bricks
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info, force=True)
+ self.assertTrue(ret, "Failed to add-brick to volume")
+
+ # Get df -h output after volume expand
+ mount_size_after_expand = self._get_mount_size_from_df_h_output()
+
+ # Verify df -h output returns greater value
+ self.assertGreater(mount_size_after_expand, initial_mount_size,
+ "The mount size has not increased after expanding")
+
+ # Remove bricks
+ ret = shrink_volume(self.mnode, self.volname, force=True)
+ self.assertTrue(ret, ("Remove brick operation failed on "
+ "%s", self.volname))
+ g.log.info("Remove brick operation is successful on "
+ "volume %s", self.volname)
+
+ # Get df -h output after volume shrink
+ mount_size_after_shrink = self._get_mount_size_from_df_h_output()
+
+ # Verify the df -h output returns smaller value
+ self.assertGreater(mount_size_after_expand, mount_size_after_shrink,
+ "The mount size has not reduced after shrinking")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Cleanup and umount volume
+ if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts):
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
diff --git a/tests/functional/glusterfind/test_glusterfind_when_brick_down.py b/tests/functional/glusterfind/test_glusterfind_when_brick_down.py
new file mode 100644
index 0000000..de1ebaf
--- /dev/null
+++ b/tests/functional/glusterfind/test_glusterfind_when_brick_down.py
@@ -0,0 +1,219 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+"""
+Description:
+ Test Glusterfind when brick is down
+"""
+
+from random import choice
+from time import sleep
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.gluster.peer_ops import wait_for_peers_to_connect
+from glustolibs.gluster.lib_utils import list_files
+from glustolibs.gluster.volume_libs import volume_start
+from glustolibs.gluster.glusterfile import (
+ file_exists,
+ remove_file,
+ check_if_pattern_in_file)
+from glustolibs.gluster.glusterfind_ops import (
+ gfind_create,
+ gfind_list,
+ gfind_pre,
+ gfind_post,
+ gfind_delete)
+from glustolibs.gluster.brick_libs import (
+ get_all_bricks,
+ bring_bricks_offline)
+
+
+@runs_on([["replicated", "distributed-replicated", "dispersed",
+ "distributed", "distributed-dispersed"],
+ ["glusterfs"]])
+class TestGlusterFindBrickDown(GlusterBaseClass):
+ """
+ Test glusterfind operation when a brick is down.
+ """
+
+ def setUp(self):
+ """
+ setup volume and mount volume
+ Initiate necessary variables
+ """
+
+ # calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+
+ self.file_limit = 0
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume %s" % self.volname)
+ g.log.info("Successful in Setup Volume %s", self.volname)
+ self.session = "test-session-%s" % self.volname
+ self.outfiles = [("/tmp/test-outfile-%s-%s.txt"
+ % (self.volname, i))for i in range(0, 2)]
+
+ # Set the changelog rollover-time to 1 second
+ # This needs to be done in order for glusterfind to keep checking
+ # for changes in the mount point
+ option = {'changelog.rollover-time': '1'}
+ ret = set_volume_options(self.mnode, self.volname, option)
+ if not ret:
+ raise ExecutionError("Failed to set the volume option %s for %s"
+ % (option, self.volname))
+ g.log.info("Successfully set the volume option for the volume %s",
+ self.volname)
+
+ def _perform_io_and_validate_presence_of_files(self):
+ """
+ Function to perform the IO and validate the presence of files.
+ """
+ self.file_limit += 10
+ # Starting IO on the mounts
+ cmd = ("cd %s ; touch file{%d..%d}" % (self.mounts[0].mountpoint,
+ self.file_limit-10,
+ self.file_limit))
+
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to create files on mountpoint")
+ g.log.info("Files created successfully on mountpoint")
+
+ # Gather the list of files from the mount point
+ files = list_files(self.mounts[0].client_system,
+ self.mounts[0].mountpoint)
+ self.assertIsNotNone(files, "Failed to get the list of files")
+ g.log.info("Successfully gathered the list of files from mount point")
+
+ # Check if the files exist
+ for filename in files:
+ ret = file_exists(self.mounts[0].client_system, filename)
+ self.assertTrue(ret, ("Unexpected: File '%s' does not exist"
+ % filename))
+ g.log.info("Successfully validated existence of '%s'", filename)
+
+ def _perform_glusterfind_pre_and_validate_outfile(self):
+ """
+ Function to perform glusterfind pre and validate outfile
+ """
+ # Perform glusterfind pre for the session
+ ret, _, _ = gfind_pre(self.mnode, self.volname, self.session,
+ self.outfiles[0], full=True, noencode=True,
+ debug=True)
+ self.assertEqual(ret, 0, ("Failed to perform glusterfind pre"))
+ g.log.info("Successfully performed glusterfind pre")
+
+ # Check if the outfile exists
+ ret = file_exists(self.mnode, self.outfiles[0])
+ self.assertTrue(ret, ("Unexpected: File '%s' does not exist"
+ % self.outfiles[0]))
+ g.log.info("Successfully validated existence of '%s'",
+ self.outfiles[0])
+
+ # Check if all the files are listed in the outfile
+ for i in range(1, self.file_limit+1):
+ ret = check_if_pattern_in_file(self.mnode, "file%s" % i,
+ self.outfiles[0])
+ self.assertEqual(ret, 0, ("File 'file%s' not listed in %s"
+ % (i, self.outfiles[0])))
+ g.log.info("File 'file%s' listed in %s", i, self.outfiles[0])
+
+ def test_gfind_when_brick_down(self):
+ """
+ Verifying the glusterfind functionality when a brick is down.
+
+ 1. Create a volume
+ 2. Create a session on the volume
+ 3. Create various files from mount point
+ 4. Bring down brick process on one of the node
+ 5. Perform glusterfind pre
+ 6. Perform glusterfind post
+ 7. Check the contents of outfile
+ """
+
+ # pylint: disable=too-many-statements
+ # Create a session for the volume
+ ret, _, _ = gfind_create(self.mnode, self.volname, self.session)
+ self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the "
+ "volume %s failed" % self.volname))
+ g.log.info("Successfully created a session for the volume %s",
+ self.volname)
+
+ # Perform glusterfind list to check if session exists
+ _, out, _ = gfind_list(self.mnode, volname=self.volname,
+ sessname=self.session)
+ self.assertNotEqual(out, "No sessions found.",
+ "Failed to list the glusterfind session")
+ g.log.info("Successfully listed the glusterfind session")
+
+ self._perform_io_and_validate_presence_of_files()
+
+ # Wait for changelog to get updated
+ sleep(2)
+
+ # Bring one of the brick down.
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ ret = bring_bricks_offline(self.volname, choice(brick_list))
+ self.assertTrue(ret, "Failed to bring down the brick.")
+ g.log.info("Succesfully brought down one brick.")
+
+ self._perform_glusterfind_pre_and_validate_outfile()
+
+ # Perform glusterfind post for the session
+ ret, _, _ = gfind_post(self.mnode, self.volname, self.session)
+ self.assertEqual(ret, 0, ("Failed to perform glusterfind post"))
+ g.log.info("Successfully performed glusterfind post")
+
+ # Bring the brick process up.
+ ret = volume_start(self.mnode, self.volname, force=True)
+ self.assertTrue(ret, "Failed to start the volume.")
+ g.log.info("Successfully started the volume.")
+
+ def tearDown(self):
+ """
+ tearDown for every test
+ Clean up and unmount the volume
+ """
+ # calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ # Delete the glusterfind sessions
+ ret, _, _ = gfind_delete(self.mnode, self.volname, self.session)
+ if ret:
+ raise ExecutionError("Failed to delete session %s" % self.session)
+ g.log.info("Successfully deleted session %s", self.session)
+
+ # Remove the outfiles created during 'glusterfind pre'
+ for out in self.outfiles:
+ ret = remove_file(self.mnode, out, force=True)
+ if not ret:
+ raise ExecutionError("Failed to remove the outfile %s" % out)
+ g.log.info("Successfully removed the outfiles")
+
+ # Wait for the peers to be connected.
+ ret = wait_for_peers_to_connect(self.mnode, self.servers, 100)
+ if not ret:
+ raise ExecutionError("Peers are not in connected state.")
+
+ # Cleanup the volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Cleanup Volume")
+ g.log.info("Successful in Cleanup Volume")
diff --git a/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py b/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py
new file mode 100644
index 0000000..ab29fdb
--- /dev/null
+++ b/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.glusterdir import get_dir_contents
+from glustolibs.io.memory_and_cpu_utils import (
+ wait_for_logging_processes_to_stop)
+from glustolibs.gluster.brick_libs import get_all_bricks
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestMemoryLeakWithRm(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Set test_id for get gathering
+ self.test_id = self.id()
+
+ # Set I/O flag to false
+ self.is_io_running = False
+
+ # Creating Volume and mounting the volume
+ ret = self.setup_volume_and_mount_volume(self.mounts)
+ if not ret:
+ raise ExecutionError("Volume creation or mount failed: %s"
+ % self.volname)
+
+ def tearDown(self):
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume(self.mounts)
+ if not ret:
+ raise ExecutionError("Unable to delete volume %s" % self.volname)
+
+ self.get_super_method(self, 'tearDown')()
+
+ def test_memory_leak_with_rm(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create 10,000 files each of size 200K
+ 3. Delete the files created at step 2
+ 4. Check if the files are deleted from backend
+ 5. Check if there are any memory leaks and OOM killers.
+ """
+ # Start monitoring resource usage on servers and clients
+ monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
+ self.test_id, count=30)
+ self.assertIsNotNone(monitor_proc_dict,
+ "Failed to start monitoring on servers and "
+ "clients")
+ # Create files on mount point
+ cmd = ('cd %s;for i in {1..10000};'
+ 'do dd if=/dev/urandom bs=200K count=1 of=file$i;done;'
+ 'rm -rf %s/file*'
+ % (self.mounts[0].mountpoint, self.mounts[0].mountpoint))
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to create and delete files on"
+ " mountpoint")
+ g.log.info("Successfully created and removed files on mountpoint")
+
+ # Delete files from mount point and check if all files
+ # are deleted or not from mount point as well as backend bricks.
+ ret, _, _ = g.run(self.clients[0],
+ "rm -rf {}/*".format(self.mounts[0].mountpoint))
+ self.assertFalse(ret, "rm -rf * failed on mount point")
+
+ ret = get_dir_contents(self.clients[0],
+ "{}/".format(self.mounts[0].mountpoint))
+ self.assertEqual(ret, [], "Unexpected: Files and directories still "
+ "seen from mount point")
+
+ for brick in get_all_bricks(self.mnode, self.volname):
+ node, brick_path = brick.split(":")
+ ret = get_dir_contents(node, "{}/".format(brick_path))
+ self.assertEqual(ret, [], "Unexpected: Files and dirs still seen "
+ "on brick %s on node %s" % (brick_path, node))
+ g.log.info("rm -rf * on mount point successful")
+
+ # Wait for monitoring processes to complete
+ ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
+ cluster=True)
+ self.assertTrue(ret,
+ "ERROR: Failed to stop monitoring processes")
+
+ # Check if there are any memory leaks and OOM killers
+ ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
+ self.test_id)
+ self.assertFalse(ret,
+ "Memory leak and OOM kills check failed on servers")
+
+ ret = self.check_for_memory_leaks_and_oom_kills_on_clients(
+ self.test_id)
+ self.assertFalse(ret,
+ "Memory leak and OOM kills check failed on clients")
+ g.log.info("No memory leaks or OOM kills found on serves and clients")
diff --git a/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py b/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py
new file mode 100644
index 0000000..25f8325
--- /dev/null
+++ b/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py
@@ -0,0 +1,231 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+from datetime import datetime, timedelta
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.lib_utils import get_usable_size_per_disk
+from glustolibs.gluster.volume_libs import (get_subvols, bulk_volume_creation,
+ volume_stop, volume_start,
+ set_volume_options)
+from glustolibs.io.memory_and_cpu_utils import (
+ wait_for_logging_processes_to_stop)
+from glustolibs.gluster.brick_libs import get_all_bricks
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.io.utils import validate_io_procs
+from glustolibs.gluster.brickmux_ops import (enable_brick_mux,
+ disable_brick_mux,
+ is_brick_mux_enabled)
+from glustolibs.gluster.mount_ops import mount_volume, umount_volume
+
+
+@runs_on([['distributed-replicated'], ['glusterfs']])
+class TestMemLeakAfterMgmntEncrypEnabled(GlusterBaseClass):
+
+ def setUp(self):
+ """
+ Setup and mount volume or raise ExecutionError
+ """
+ self.get_super_method(self, 'setUp')()
+ self.test_id = self.id()
+ # Setup Volume
+ self.volume['dist_count'] = 2
+ self.volume['replica_count'] = 3
+
+ ret = self.setup_volume_and_mount_volume([self.mounts[0]])
+ if not ret:
+ raise ExecutionError("Failed to Setup and Mount Volume")
+
+ # Disable I/O encryption
+ self._disable_io_encryption()
+
+ def tearDown(self):
+ # Disable brick_mux
+ if is_brick_mux_enabled(self.mnode):
+ ret = disable_brick_mux(self.mnode)
+ self.assertTrue(ret, "Failed to brick multiplex")
+ g.log.info("Disable brick multiplex")
+
+ # Unmount and cleanup original volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]])
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _run_io(self):
+ """ Run IO and fill vol upto ~88%"""
+ bricks = get_all_bricks(self.mnode, self.volname)
+ usable_size = int(get_usable_size_per_disk(bricks[0]) * 0.88)
+
+ self.procs = []
+ counter = 1
+ for _ in get_subvols(self.mnode, self.volname)['volume_subvols']:
+ filename = "{}/test_file_{}".format(self.mounts[0].mountpoint,
+ str(counter))
+ proc = g.run_async(self.mounts[0].client_system,
+ "fallocate -l {}G {}".format(usable_size,
+ filename))
+ self.procs.append(proc)
+ counter += 1
+
+ def _perform_gluster_v_heal_for_12_hrs(self):
+ """ Run 'guster v heal info' for 12 hours"""
+ # Perform gluster heal info for 12 hours
+ end_time = datetime.now() + timedelta(hours=12)
+ while True:
+ curr_time = datetime.now()
+ cmd = "gluster volume heal %s info" % self.volname
+ ret, _, _ = g.run(self.mnode, cmd)
+ self.assertEqual(ret, 0, "Failed to execute heal info cmd")
+ if curr_time > end_time:
+ g.log.info("Successfully ran for 12 hours. Checking for "
+ "memory leaks")
+ break
+
+ def _verify_memory_leak(self):
+ """ Verify memory leak is found """
+
+ ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
+ self.test_id)
+ self.assertFalse(ret,
+ "Memory leak and OOM kills check failed on servers")
+
+ ret = self.check_for_memory_leaks_and_oom_kills_on_clients(
+ self.test_id)
+ self.assertFalse(ret,
+ "Memory leak and OOM kills check failed on clients")
+
+ def _disable_io_encryption(self):
+ """ Disables IO encryption """
+ # UnMount Volume
+ g.log.info("Starting to Unmount Volume %s", self.volname)
+ ret, _, _ = umount_volume(self.mounts[0].client_system,
+ self.mounts[0].mountpoint,
+ mtype=self.mount_type)
+ self.assertEqual(ret, 0, "Failed to Unmount volume")
+
+ # Stop Volume
+ ret, _, _ = volume_stop(self.mnode, self.volname)
+ self.assertEqual(ret, 0, "Failed to Stop volume")
+
+ # Disable server and client SSL usage
+ options = {"server.ssl": "off",
+ "client.ssl": "off"}
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, "Failed to set volume options")
+
+ # Start Volume
+ ret, _, _ = volume_start(self.mnode, self.volname)
+ self.assertEqual(ret, 0, "Failed to Start volume")
+
+ # Mount Volume
+ ret, _, _ = mount_volume(self.volname, mtype=self.mount_type,
+ mpoint=self.mounts[0].mountpoint,
+ mserver=self.mnode,
+ mclient=self.mounts[0].client_system)
+ self.assertEqual(ret, 0, "Failed to mount the volume back")
+
+ def test_mem_leak_on_gluster_procs_with_management_encrpytion(self):
+ """
+ Steps:
+ 1) Enable management encryption on the cluster.
+ 2) Create a 2X3 volume.
+ 3) Mount the volume using FUSE on a client node.
+ 4) Start doing IO on the mount (ran IO till the volume is ~88% full)
+ 5) Simultaneously start collecting the memory usage for
+ 'glusterfsd' process.
+ 6) Issue the command "# gluster v heal <volname> info" continuously
+ in a loop.
+ """
+ # Run IO
+ self._run_io()
+
+ # Start monitoring resource usage on servers and clients
+ # default interval = 60 sec, count = 780 (60 *12) => for 12 hrs
+ monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
+ self.test_id, count=780)
+ self.assertIsNotNone(monitor_proc_dict,
+ "Failed to start monitoring on servers and "
+ "clients")
+
+ ret = validate_io_procs(self.procs, self.mounts)
+ self.assertTrue(ret, "IO Failed")
+
+ self._perform_gluster_v_heal_for_12_hrs()
+
+ # Wait for monitoring processes to complete
+ ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
+ cluster=True)
+ self.assertTrue(ret, "ERROR: Failed to stop monitoring processes")
+
+ # Check if there are any memory leaks and OOM killers
+ self._verify_memory_leak()
+ g.log.info("No memory leaks/OOM kills found on serves and clients")
+
+ def test_mem_leak_on_gluster_procs_with_brick_multiplex(self):
+ """
+ Steps:
+ 1) Enable cluster.brick-multiplex
+ 2) Enable SSL on management layer
+ 3) Start creating volumes
+ 4) Mount a volume and starting I/O
+ 5) Monitor the memory consumption by glusterd process
+ """
+
+ # Enable cluster.brick-mulitplex
+ ret = enable_brick_mux(self.mnode)
+ self.assertTrue(ret, "Failed to enable brick-multiplex")
+
+ # Verify the operation
+ ret = is_brick_mux_enabled(self.mnode)
+ self.assertTrue(ret, "Brick mux enble op not successful")
+
+ # Create few volumes
+ self.volume['replica_count'] = 3
+ ret = bulk_volume_creation(self.mnode, 20, self.all_servers_info,
+ self.volume, is_force=True)
+
+ self.assertTrue(ret, "Failed to create bulk volume")
+
+ # Run IO
+ self._run_io()
+
+ # Start memory usage logging
+ monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
+ self.test_id, count=60)
+ self.assertIsNotNone(monitor_proc_dict,
+ "Failed to start monitoring on servers and "
+ "clients")
+
+ ret = validate_io_procs(self.procs, self.mounts)
+ self.assertTrue(ret, "IO Failed")
+
+ # Wait for monitoring processes to complete
+ ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
+ cluster=True)
+ self.assertTrue(ret, "ERROR: Failed to stop monitoring processes")
+
+ # Check if there are any memory leaks and OOM killers
+ self._verify_memory_leak()
+ g.log.info("No memory leaks/OOM kills found on serves and clients")
+
+ # Disable Brick multiplex
+ ret = disable_brick_mux(self.mnode)
+ self.assertTrue(ret, "Failed to brick multiplex")