summaryrefslogtreecommitdiffstats
path: root/tests/functional/afr/heal
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional/afr/heal')
-rw-r--r--tests/functional/afr/heal/test_dir_time_stamp_restoration.py160
-rw-r--r--tests/functional/afr/heal/test_heal_info_no_hang.py162
-rw-r--r--tests/functional/afr/heal/test_no_glustershd_with_distribute.py4
-rw-r--r--tests/functional/afr/heal/test_self_heal_with_link_files.py405
-rw-r--r--tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py600
-rw-r--r--tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py23
6 files changed, 1346 insertions, 8 deletions
diff --git a/tests/functional/afr/heal/test_dir_time_stamp_restoration.py b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py
new file mode 100644
index 000000000..6a4ef2a19
--- /dev/null
+++ b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py
@@ -0,0 +1,160 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA.
+
+"""
+Description:
+ Check if parent directory timestamps are restored after an entry heal.
+"""
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import (
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ select_volume_bricks_to_bring_offline,
+ get_all_bricks)
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.glusterdir import (mkdir, rmdir)
+from glustolibs.gluster.glusterfile import (get_fattr, get_file_stat)
+from glustolibs.gluster.volume_libs import set_volume_options
+from glustolibs.gluster.heal_libs import monitor_heal_completion
+
+
+@runs_on([['replicated'],
+ ['glusterfs']])
+class TestDirTimeStampRestore(GlusterBaseClass):
+
+ def setUp(self):
+ self.get_super_method(self, 'setUp')()
+
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts,
+ volume_create_force=False)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ self.bricks_list = get_all_bricks(self.mnode, self.volname)
+
+ def tearDown(self):
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ self.get_super_method(self, 'tearDown')()
+
+ def are_mdata_xattrs_equal(self):
+ """Check if atime/mtime/ctime in glusterfs.mdata xattr are identical"""
+ timestamps = []
+ for brick_path in self.bricks_list:
+ server, brick = brick_path.split(':')
+ fattr = get_fattr(server, '%s/%s' % (brick, "dir1"),
+ 'trusted.glusterfs.mdata')
+ self.assertIsNotNone(fattr, 'Unable to get mdata xattr')
+ timestamps.append(fattr)
+
+ g.log.debug("mdata list = %s", ''.join(map(str, timestamps)))
+ return timestamps.count(timestamps[0]) == len(timestamps)
+
+ def are_stat_timestamps_equal(self):
+ """Check if atime/mtime/ctime in stat info are identical"""
+ timestamps = []
+ for brick_path in self.bricks_list:
+ server, brick = brick_path.split(':')
+ stat_data = get_file_stat(server, "%s/dir1" % brick)
+ ts_string = "{}-{}-{}".format(stat_data['epoch_atime'],
+ stat_data['epoch_mtime'],
+ stat_data['epoch_ctime'])
+ timestamps.append(ts_string)
+
+ g.log.debug("stat list = %s", ''.join(map(str, timestamps)))
+ return timestamps.count(timestamps[0]) == len(timestamps)
+
+ def perform_test(self, ctime):
+ """
+ Testcase steps:
+ 1. Enable/disable features,ctime based on function argument.
+ 2. Create a directory on the mount point.
+ 3. Kill a brick and create a file inside the directory.
+ 4. Bring the brick online.
+ 5. Trigger heal and wait for its completion.
+ 6. Verify that the atime, mtime and ctime of the directory are same on
+ all bricks of the replica.
+ """
+ if ctime:
+ option = {'features.ctime': 'on'}
+ else:
+ option = {'features.ctime': 'off'}
+ ret = set_volume_options(self.mnode, self.volname, option)
+ self.assertTrue(ret, 'failed to set option %s on %s'
+ % (option, self.volume))
+
+ client, m_point = (self.mounts[0].client_system,
+ self.mounts[0].mountpoint)
+
+ dirpath = '{}/dir1'.format(m_point)
+ ret = mkdir(client, dirpath)
+ self.assertTrue(ret, 'Unable to create a directory from mount point')
+
+ bricks_to_bring_offline = select_volume_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline))
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline))
+
+ cmd = 'touch {}/file1'.format(dirpath)
+ ret, _, _ = g.run(client, cmd)
+ self.assertEqual(ret, 0, 'Unable to create file from mount point')
+
+ ret = bring_bricks_online(
+ self.mnode, self.volname,
+ bricks_to_bring_offline,
+ bring_bricks_online_methods=['volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks {} online'.format
+ (bricks_to_bring_offline))
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Starting heal failed')
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ if ctime:
+ ret = self.are_mdata_xattrs_equal()
+ self.assertTrue(ret, "glusterfs.mdata mismatch for {}"
+ .format(dirpath))
+ else:
+ ret = self.are_stat_timestamps_equal()
+ self.assertTrue(ret, "stat mismatch for {}".format(dirpath))
+
+ ret = rmdir(client, dirpath, force=True)
+ self.assertTrue(ret, 'Unable to delete directory from mount point')
+
+ def test_dir_time_stamp_restoration(self):
+ """
+ Create pending entry self-heal on a replica volume and verify that
+ after the heal is complete, the atime, mtime and ctime of the parent
+ directory are identical on all bricks of the replica.
+
+ The test is run with features.ctime enabled as well as disabled.
+ """
+ self.perform_test(ctime=True)
+ self.perform_test(ctime=False)
diff --git a/tests/functional/afr/heal/test_heal_info_no_hang.py b/tests/functional/afr/heal/test_heal_info_no_hang.py
new file mode 100644
index 000000000..82f8b0598
--- /dev/null
+++ b/tests/functional/afr/heal/test_heal_info_no_hang.py
@@ -0,0 +1,162 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA.
+
+"""
+Description:
+ heal info completes when there is ongoing I/O and a lot of pending heals.
+"""
+import random
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ get_all_bricks)
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.io.utils import run_linux_untar
+from glustolibs.gluster.glusterdir import mkdir
+
+
+@runs_on([['distributed-replicated'],
+ ['glusterfs']])
+class TestHealInfoNoHang(GlusterBaseClass):
+
+ def setUp(self):
+ self.get_super_method(self, 'setUp')()
+
+ self.is_io_running = False
+
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts,
+ volume_create_force=False)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ self.bricks_list = get_all_bricks(self.mnode, self.volname)
+ self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+
+ def tearDown(self):
+ if self.is_io_running:
+ if not self._wait_for_untar_completion():
+ g.log.error("I/O failed to stop on clients")
+
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ self.get_super_method(self, 'tearDown')()
+
+ def _wait_for_untar_completion(self):
+ """Wait for the kernel untar to complete"""
+ has_process_stopped = []
+ for proc in self.list_of_io_processes:
+ try:
+ ret, _, _ = proc.async_communicate()
+ if not ret:
+ has_process_stopped.append(False)
+ has_process_stopped.append(True)
+ except ValueError:
+ has_process_stopped.append(True)
+ return all(has_process_stopped)
+
+ def _does_heal_info_complete_within_timeout(self):
+ """Check if heal info CLI completes within a specific timeout"""
+ # We are just assuming 1 entry takes one second to process, which is
+ # a very high number but some estimate is better than a random magic
+ # value for timeout.
+ timeout = self.num_entries * 1
+
+ # heal_info_data = get_heal_info(self.mnode, self.volname)
+ cmd = "timeout %s gluster volume heal %s info" % (timeout,
+ self.volname)
+ ret, _, _ = g.run(self.mnode, cmd)
+ if ret:
+ return False
+ return True
+
+ def test_heal_info_no_hang(self):
+ """
+ Testcase steps:
+ 1. Start kernel untar on the mount
+ 2. While untar is going on, kill a brick of the replica.
+ 3. Wait for the untar to be over, resulting in pending heals.
+ 4. Get the approx. number of pending heals and save it
+ 5. Bring the brick back online.
+ 6. Trigger heal
+ 7. Run more I/Os with dd command
+ 8. Run heal info command and check that it completes successfully under
+ a timeout that is based on the no. of heals in step 4.
+ """
+ self.list_of_io_processes = []
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar
+ ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.list_of_io_processes += ret
+ self.is_io_running = True
+
+ # Kill brick resulting in heal backlog.
+ brick_to_bring_offline = random.choice(self.bricks_list)
+ ret = bring_bricks_offline(self.volname, brick_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline'
+ % brick_to_bring_offline)
+ ret = are_bricks_offline(self.mnode, self.volname,
+ [brick_to_bring_offline])
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % brick_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ brick_to_bring_offline)
+
+ ret = self._wait_for_untar_completion()
+ self.assertFalse(ret, "IO didn't complete or failed on client")
+ self.is_io_running = False
+
+ # Get approx. no. of entries to be healed.
+ cmd = ("gluster volume heal %s statistics heal-count | grep Number "
+ "| awk '{sum+=$4} END {print sum/2}'" % self.volname)
+ ret, self.num_entries, _ = g.run(self.mnode, cmd)
+ self.assertEqual(ret, 0, "Failed to get heal-count statistics")
+
+ # Restart the down bricks
+ ret = bring_bricks_online(self.mnode, self.volname,
+ brick_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring brick %s online' %
+ brick_to_bring_offline)
+ g.log.info('Bringing brick %s online is successful',
+ brick_to_bring_offline)
+ # Trigger heal
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Starting heal failed')
+ g.log.info('Index heal launched')
+
+ # Run more I/O
+ cmd = ("for i in `seq 1 10`; do dd if=/dev/urandom of=%s/file_$i "
+ "bs=1M count=100; done" % self.mounts[0].mountpoint)
+ ret = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+
+ # Get heal info
+ ret = self._does_heal_info_complete_within_timeout()
+ self.assertTrue(ret, 'Heal info timed out')
+ g.log.info('Heal info completed succesfully')
diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
index d2b43bfe3..bbefe0cff 100644
--- a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
+++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -58,7 +58,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass):
for volume_config in cls.volume_configs:
ret = setup_volume(mnode=cls.mnode,
all_servers_info=cls.all_servers_info,
- volume_config=volume_config)
+ volume_config=volume_config, multi_vol=True)
volname = volume_config['name']
if not ret:
raise ExecutionError("Failed to setup Volume"
diff --git a/tests/functional/afr/heal/test_self_heal_with_link_files.py b/tests/functional/afr/heal/test_self_heal_with_link_files.py
new file mode 100644
index 000000000..d029c3d9e
--- /dev/null
+++ b/tests/functional/afr/heal/test_self_heal_with_link_files.py
@@ -0,0 +1,405 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online,
+ get_all_bricks)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain,
+ is_heal_complete)
+from glustolibs.gluster.lib_utils import collect_bricks_arequal
+from glustolibs.gluster.volume_libs import (get_subvols,
+ replace_brick_from_volume)
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']])
+class TestHealWithLinkFiles(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+
+ def tearDown(self):
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _create_files_and_dirs_on_mount_point(self, second_attempt=False):
+ """A function to create files and dirs on mount point"""
+ # Create a parent directory test_link_self_heal on mount point
+ if not second_attempt:
+ ret = mkdir(self.first_client,
+ '{}/{}'.format(self.mountpoint,
+ 'test_link_self_heal'))
+ self.assertTrue(ret, "Failed to create dir test_link_self_heal")
+
+ # Create dirctories and files inside directory test_link_self_heal
+ io_cmd = ("for i in `seq 1 5`; do mkdir dir.$i; "
+ "for j in `seq 1 10`; do dd if=/dev/random "
+ "of=dir.$i/file.$j bs=1k count=$j; done; done")
+ if second_attempt:
+ io_cmd = ("for i in `seq 1 5` ; do for j in `seq 1 10`; "
+ "do dd if=/dev/random of=sym_link_dir.$i/"
+ "new_file.$j bs=1k count=$j; done; done ")
+ cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create dirs and files inside")
+
+ def _create_soft_links_to_directories(self):
+ """Create soft links to directories"""
+ cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s "
+ "dir.$i sym_link_dir.$i; done".format(self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create soft links to dirs")
+
+ def _verify_soft_links_to_dir(self, option=0):
+ """Verify soft links to dir"""
+
+ cmd_list = [
+ ("for i in `seq 1 5`; do stat -c %F sym_link_dir.$i | "
+ "grep -F 'symbolic link'; if [ $? -ne 0 ]; then exit 1;"
+ " fi ; done; for i in `seq 1 5` ; do readlink sym_link_dir.$i | "
+ "grep \"dir.$i\"; if [ $? -ne 0 ]; then exit 1; fi; done; "),
+ ("for i in `seq 1 5`; do for j in `seq 1 10`; do ls "
+ "dir.$i/new_file.$j; if [ $? -ne 0 ]; then exit 1; fi; done; "
+ "done")]
+
+ # Generate command to check according to option
+ if option == 2:
+ verify_cmd = "".join(cmd_list)
+ else:
+ verify_cmd = cmd_list[option]
+
+ cmd = ("cd {}/test_link_self_heal; {}".format(self.mountpoint,
+ verify_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Symlinks aren't proper")
+
+ def _create_hard_links_to_files(self, second_attempt=False):
+ """Create hard links to files"""
+ io_cmd = ("for i in `seq 1 5`;do for j in `seq 1 10`;do ln "
+ "dir.$i/file.$j dir.$i/link_file.$j;done; done")
+ if second_attempt:
+ io_cmd = ("for i in `seq 1 5`; do mkdir new_dir.$i; for j in "
+ "`seq 1 10`; do ln dir.$i/file.$j new_dir.$i/new_file."
+ "$j;done; done;")
+
+ cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create hard links to files")
+
+ def _verify_hard_links_to_files(self, second_set=False):
+ """Verify if hard links to files"""
+ file_to_compare = "dir.$i/link_file.$j"
+ if second_set:
+ file_to_compare = "new_dir.$i/new_file.$j"
+
+ cmd = ("cd {}/test_link_self_heal;for i in `seq 1 5`; do for j in `seq"
+ " 1 10`;do if [ `stat -c %i dir.$i/file.$j` -ne `stat -c %i "
+ "{}` ];then exit 1; fi; done; done"
+ .format(self.mountpoint, file_to_compare))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to verify hard links to files")
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(subvol[0])
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+
+ # Get arequal before getting bricks offline
+ ret, arequals = collect_mounts_arequal([self.mounts[0]])
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ ret, arequals = collect_bricks_arequal([brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, brick_list)
+
+ def _check_heal_is_completed_and_not_in_split_brain(self):
+ """Check if heal is completed and volume not in split brain"""
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check if volume is in split brian or not
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ def _check_if_there_are_files_and_dirs_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _wait_for_heal_is_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _replace_one_random_brick(self):
+ """Replace one random brick from the volume"""
+ brick = choice(get_all_bricks(self.mnode, self.volname))
+ ret = replace_brick_from_volume(self.mnode, self.volname,
+ self.servers, self.all_servers_info,
+ src_brick=brick)
+ self.assertTrue(ret, "Failed to replace brick %s " % brick)
+ g.log.info("Successfully replaced brick %s", brick)
+
+ def test_self_heal_of_hard_links(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create hard links for the files created in step 2.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring brack all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if hard links are proper or not.
+ 12. Do a lookup on mount point.
+ 13. Bring down brick processes accoding to the volume type.
+ 14. Create a second set of hard links to the files.
+ 15. Check if heal info is showing all the files and dirs to be healed.
+ 16. Bring brack all brick processes which were killed.
+ 17. Wait for heal to complete on the volume.
+ 18. Check if heal is complete and check if volume is in split brain.
+ 19. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 20. Verify both set of hard links are proper or not.
+ 21. Do a lookup on mount point.
+ 22. Pick a random brick and replace it.
+ 23. Wait for heal to complete on the volume.
+ 24. Check if heal is complete and check if volume is in split brain.
+ 25. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 26. Verify both set of hard links are proper or not.
+ 27. Do a lookup on mount point.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+ for attempt in (False, True):
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create hardlinks for the files created in step 2
+ self._create_hard_links_to_files(second_attempt=attempt)
+
+ # Check if heal info is showing all the files and dirs to
+ # be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume
+ # type for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Verify if hard links are proper or not
+ self._verify_hard_links_to_files()
+ if attempt:
+ self._verify_hard_links_to_files(second_set=attempt)
+
+ # Pick a random brick and replace it
+ self._replace_one_random_brick()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume
+ # type for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Verify if hard links are proper or not
+ self._verify_hard_links_to_files()
+ self._verify_hard_links_to_files(second_set=True)
+
+ def test_self_heal_of_soft_links(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create soft links for the dirs created in step 2.
+ 6. Verify if soft links are proper or not.
+ 7. Add files through the soft links.
+ 8. Verify if the soft links are proper or not.
+ 9. Check if heal info is showing all the files and dirs to be healed.
+ 10. Bring brack all brick processes which were killed.
+ 11. Wait for heal to complete on the volume.
+ 12. Check if heal is complete and check if volume is in split brain.
+ 13. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 14. Verify if soft links are proper or not.
+ 15. Do a lookup on mount point.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create soft links for the dirs created in step 2
+ self._create_soft_links_to_directories()
+
+ # Verify if soft links are proper or not
+ self._verify_soft_links_to_dir()
+
+ # Add files through the soft links
+ self._create_files_and_dirs_on_mount_point(second_attempt=True)
+
+ # Verify if the soft links are proper or not
+ self._verify_soft_links_to_dir(option=1)
+
+ # Check if heal info is showing all the files and dirs to
+ # be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Verify if soft links are proper or not
+ self._verify_soft_links_to_dir(option=2)
diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
new file mode 100644
index 000000000..37bd2ec52
--- /dev/null
+++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
@@ -0,0 +1,600 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online,
+ get_all_bricks)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain,
+ is_heal_complete,
+ enable_granular_heal,
+ disable_granular_heal)
+from glustolibs.gluster.lib_utils import (add_user, del_user, group_del,
+ group_add, collect_bricks_arequal)
+from glustolibs.gluster.volume_ops import get_volume_options
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']])
+class TestHealWithLinkFiles(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+ self.user_group_created = False
+
+ # If test case running is test_self_heal_meta_data
+ # create user and group
+ test_name_splitted = self.id().split('.')
+ test_id = test_name_splitted[len(test_name_splitted) - 1]
+ if test_id == 'test_self_heal_meta_data':
+
+ # Create non-root group
+ if not group_add(self.first_client, 'qa_all'):
+ raise ExecutionError("Failed to create group qa_all")
+
+ # Create non-root users
+ self.users = ('qa_func', 'qa_system', 'qa_perf')
+ for user in self.users:
+ if not add_user(self.first_client, user, group='qa_all'):
+ raise ExecutionError("Failed to create user {}"
+ .format(user))
+
+ self.user_group_created = True
+ g.log.info("Successfully created all users.")
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ # Delete non-root users and group if created
+ if self.user_group_created:
+
+ # Delete non-root users
+ for user in self.users:
+ del_user(self.first_client, user)
+ g.log.info("Successfully deleted all users")
+
+ # Delete non-root group
+ group_del(self.first_client, 'qa_all')
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _set_granular_heal_to_on_or_off(self, enabled=False):
+ """Set granular heal to ON or OFF"""
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if enabled:
+ if granular['cluster.granular-entry-heal'] != 'on':
+ ret = enable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+ else:
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to off")
+
+ def _run_cmd(self, io_cmd, err_msg):
+ """Run cmd and show error message if it fails"""
+ cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, err_msg)
+
+ def _create_files_and_dirs_on_mount_point(self, index, second_set=False):
+ """A function to create files and dirs on mount point"""
+ # Create a parent directory test_self_heal on mount point
+ if not second_set:
+ ret = mkdir(self.first_client, '{}/{}'.format(
+ self.mountpoint, 'test_self_heal'))
+ self.assertTrue(ret, "Failed to create dir test_self_heal")
+
+ # Create dirctories and files inside directory test_self_heal
+ io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random"
+ " of=file.$i count=1K bs=$i; done",
+
+ "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;"
+ " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j"
+ ";done;done",
+
+ "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq "
+ "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;"
+ " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k"
+ " bs=1k count=$k; done; done; done;",
+
+ "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq"
+ " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j "
+ "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i"
+ " count=1K bs=$i; done ;")
+ self._run_cmd(
+ io_cmd[index], "Failed to create dirs and files inside")
+
+ def _delete_files_and_dirs(self):
+ """Delete files and dirs from mount point"""
+ io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done")
+ self._run_cmd(io_cmd, "Failed to delete dirs and files")
+
+ def _rename_files_and_dirs(self):
+ """Rename files and dirs from mount point"""
+ io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;"
+ " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j "
+ "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i "
+ "renamed_dir.$i; done;")
+ self._run_cmd(io_cmd, "Failed to rename dirs and files")
+
+ def _change_meta_deta_of_dirs_and_files(self):
+ """Change meta data of dirs and files"""
+ cmds = (
+ # Change permission
+ "for i in `seq 1 100`; do chmod 555 dir.$i; done; "
+ "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 "
+ "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for "
+ "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;",
+
+ # Change ownership
+ "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; "
+ "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; "
+ "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;",
+
+ # Change group
+ "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd,
+ "Failed to change meta data on dirs and files")
+ g.log.info("Successfully changed meta data on dirs and files")
+
+ def _verify_meta_data_of_files_and_dirs(self):
+ """Verify meta data of files and dirs"""
+ cmds = (
+ # Verify permissions
+ "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";"
+ " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do "
+ "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]"
+ "; then exit 1; fi; done; done; for i in `seq 51 100`; do "
+ "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then "
+ "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j"
+ " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;"
+ "done;",
+
+ # Verify ownership
+ "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;",
+
+ # Verify group
+ "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd, "Meta data of dirs and files not proper")
+
+ def _set_and_remove_extended_attributes(self, remove=False):
+ """Set and remove extended attributes"""
+ # Command to set extended attribute to files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 "
+ "5`;do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_files dir.$i/file.$j; done; "
+ "done;")
+ err_msg = "Failed to set extended attributes to files and dirs"
+ if remove:
+ # Command to remove extended attribute set on files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name "
+ "dir.$i; for j in `seq 1 5`; do setfattr -x "
+ "trusted.name dir.$i/file.$j ; done ; done ;")
+ err_msg = "Failed to remove extended attributes to files and dirs"
+
+ self._run_cmd(io_cmd, err_msg)
+
+ def _verify_if_extended_attributes_are_proper(self, remove=False):
+ """Verify if extended attributes are set or remove properly"""
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text "
+ "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? "
+ "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do "
+ "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F "
+ "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then "
+ "exit 1; fi; done; done;")
+ err_msg = "Extended attributes on files and dirs are not proper"
+ if remove:
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e "
+ "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in"
+ " `seq 1 5`; do getfattr -n trusted.name -e text "
+ "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; "
+ "done;")
+ err_msg = "Extended attributes set to files and dirs not removed"
+ self._run_cmd(io_cmd, err_msg)
+
+ def _remove_files_and_create_dirs_with_the_same_name(self):
+ """Remove files and create dirs with the same name"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir "
+ "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;")
+ self._run_cmd(io_cmd,
+ "Failed to remove files and create dirs with same name")
+
+ def _verify_if_dirs_are_proper_or_not(self):
+ """Verify if dirs are proper or not"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | "
+ "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; "
+ "done; done; done;")
+ self._run_cmd(io_cmd, "Dirs created instead of files aren't proper")
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(subvol[0])
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ @staticmethod
+ def _add_dir_path_to_brick_list(brick_list):
+ """Add test_self_heal at the end of brick path"""
+ dir_brick_list = []
+ for brick in brick_list:
+ dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal'))
+ return dir_brick_list
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+
+ # Get arequal before getting bricks offline
+ work_dir = '{}/test_self_heal'.format(self.mountpoint)
+ ret, arequals = collect_mounts_arequal([self.mounts[0]],
+ path=work_dir)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+ ret, arequals = collect_bricks_arequal([dir_brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ def _check_heal_is_completed_and_not_in_split_brain(self):
+ """Check if heal is completed and volume not in split brain"""
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check if volume is in split brian or not
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ def _check_if_there_are_files_and_dirs_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _wait_for_heal_is_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_heal_status_restart_vol_wait_and_check_data(self):
+ """
+ Perform repatative steps mentioned below:
+ 1 Check if heal info is showing all the files and dirs to be healed
+ 2 Bring back all brick processes which were killed
+ 3 Wait for heal to complete on the volume
+ 4 Check if heal is complete and check if volume is in split brain
+ 5 Collect and compare arequal-checksum according to the volume type
+ for bricks
+ """
+ # Check if heal info is showing all the files and dirs to be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ def _run_test_self_heal_entry_heal(self):
+ """Run steps of test_self_heal_entry_heal"""
+ # Create a directory and create files and directories inside it on
+ # mount point
+ self._create_files_and_dirs_on_mount_point(0)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create a new set of files and directories on mount point
+ self._create_files_and_dirs_on_mount_point(3, second_set=True)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Delete files and directories from mount point
+ self._delete_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Rename the existing files and dirs
+ self._rename_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ def test_self_heal_entry_heal(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create a new set of files and directories on mount point.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Bring down brick processes accoding to the volume type.
+ 12. Delete files and directories from mount point.
+ 13. Check if heal info is showing all the files and dirs to be healed.
+ 14. Bring back all brick processes which were killed.
+ 15. Wait for heal to complete on the volume.
+ 16. Check if heal is complete and check if volume is in split brain.
+ 17. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 18. Bring down brick processes accoding to the volume type.
+ 19. Rename the existing files and dirs.
+ 20. Check if heal info is showing all the files and dirs to be healed.
+ 21. Bring back all brick processes which were killed.
+ 22. Wait for heal to complete on the volume.
+ 23. Check if heal is complete and check if volume is in split brain.
+ 24. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+
+ Note:
+ Do this test with both Granular-entry-heal set enable and disable.
+ """
+ for value in (False, True):
+ if value:
+ # Cleanup old data from mount point
+ ret, _, _ = g.run(self.first_client,
+ 'rm -rf {}/*'.format(self.mountpoint))
+ self.assertFalse(ret, 'Failed to cleanup mount point')
+ g.log.info("Testing with granular heal set to enabled")
+ self._set_granular_heal_to_on_or_off(enabled=value)
+ self._run_test_self_heal_entry_heal()
+
+ def test_self_heal_meta_data(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Change the meta data of files and dirs.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if the meta data of files and dirs.
+ 12. Bring down brick processes accoding to the volume type.
+ 13. Set extended attributes on the files and dirs.
+ 14. Verify if the extended attributes are set properly or not.
+ 15. Check if heal info is showing all the files and dirs to be healed.
+ 16. Bring back all brick processes which were killed.
+ 17. Wait for heal to complete on the volume.
+ 18. Check if heal is complete and check if volume is in split brain.
+ 19. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 20. Verify if extended attributes are consitent or not.
+ 21. Bring down brick processes accoding to the volume type
+ 22. Remove extended attributes on the files and dirs.
+ 23. Verify if extended attributes were removed properly.
+ 24. Check if heal info is showing all the files and dirs to be healed.
+ 25. Bring back all brick processes which were killed.
+ 26. Wait for heal to complete on the volume.
+ 27. Check if heal is complete and check if volume is in split brain.
+ 28. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 29. Verify if extended attributes are removed or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(1)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Change the meta data of files and dirs
+ self._change_meta_deta_of_dirs_and_files()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if the meta data of files and dirs
+ self._verify_meta_data_of_files_and_dirs()
+
+ for value in (False, True):
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Set or remove extended attributes on the files and dirs
+ self._set_and_remove_extended_attributes(remove=value)
+
+ # Verify if the extended attributes are set properly or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if extended attributes are consitent or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ def test_self_heal_of_dir_with_files_removed(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Remove all files and create dir which have name of files.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if dirs are healed properly or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(2)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Remove all files and create dir which have name of files
+ self._remove_files_and_create_dirs_with_the_same_name()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if dirs are healed properly or not
+ self._verify_if_dirs_are_proper_or_not()
diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
index 43b4f4edf..a449e396f 100644
--- a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
+++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -26,12 +26,14 @@ from glustolibs.gluster.brick_libs import (
select_volume_bricks_to_bring_offline, get_online_bricks_list)
from glustolibs.gluster.heal_libs import (
get_self_heal_daemon_pid, is_shd_daemonized,
- monitor_heal_completion, bring_self_heal_daemon_process_offline)
+ monitor_heal_completion, bring_self_heal_daemon_process_offline,
+ disable_granular_heal)
from glustolibs.gluster.heal_ops import (get_heal_info_summary,
trigger_heal_full)
from glustolibs.io.utils import validate_io_procs
from glustolibs.misc.misc_libs import upload_scripts
-from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.gluster.volume_ops import (set_volume_options,
+ get_volume_options)
from glustolibs.gluster.mount_ops import mount_volume, umount_volume
@@ -99,6 +101,15 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
* heal should complete successfully
"""
# pylint: disable=too-many-locals,too-many-statements,too-many-lines
+
+ # Disable granular heal if not disabled already
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+
# Setting Volume options
options = {"metadata-self-heal": "on",
"entry-self-heal": "on",
@@ -131,7 +142,7 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
all_mounts_procs, num_files_to_write = [], 100
for mount_obj in self.mounts:
cmd = ("/usr/bin/env python %s create_files "
- "-f %s --base-file-name file %s" % (self.script_upload_path,
+ "-f %d --base-file-name file %s" % (self.script_upload_path,
num_files_to_write,
mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, cmd,
@@ -221,8 +232,8 @@ class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
all_mounts_procs = []
for mount_obj in self.mounts:
- cmd = ("/usr/bin/env python %s read %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;"
+ " done " % (mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, cmd,
user=mount_obj.user)
all_mounts_procs.append(proc)