summaryrefslogtreecommitdiffstats
path: root/tests/functional/afr/heal
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional/afr/heal')
-rw-r--r--tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py199
-rw-r--r--tests/functional/afr/heal/test_data_split_brain_resolution.py29
-rw-r--r--tests/functional/afr/heal/test_dir_time_stamp_restoration.py160
-rwxr-xr-xtests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py6
-rw-r--r--tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py175
-rw-r--r--tests/functional/afr/heal/test_heal_info_no_hang.py162
-rw-r--r--tests/functional/afr/heal/test_heal_info_while_accessing_file.py23
-rw-r--r--tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py186
-rw-r--r--tests/functional/afr/heal/test_metadata_split_brain_resolution.py28
-rw-r--r--tests/functional/afr/heal/test_no_glustershd_with_distribute.py177
-rwxr-xr-xtests/functional/afr/heal/test_self_heal.py199
-rwxr-xr-xtests/functional/afr/heal/test_self_heal_daemon_process.py41
-rw-r--r--tests/functional/afr/heal/test_self_heal_with_link_files.py405
-rw-r--r--tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py600
-rw-r--r--tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py250
15 files changed, 2427 insertions, 213 deletions
diff --git a/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py
new file mode 100644
index 000000000..df05dd86c
--- /dev/null
+++ b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py
@@ -0,0 +1,199 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from time import sleep
+from random import sample
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ are_bricks_offline)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.heal_libs import (is_volume_in_split_brain,
+ monitor_heal_completion)
+from glustolibs.gluster.lib_utils import collect_bricks_arequal
+from glustolibs.gluster.rebalance_ops import (rebalance_start,
+ wait_for_rebalance_to_complete)
+from glustolibs.gluster.volume_ops import volume_start
+from glustolibs.gluster.volume_libs import (
+ verify_all_process_of_volume_are_online, get_subvols, expand_volume,
+ wait_for_volume_process_to_be_online)
+from glustolibs.io.utils import (validate_io_procs,
+ list_all_files_and_dirs_mounts,
+ wait_for_io_to_complete)
+from glustolibs.misc.misc_libs import upload_scripts
+
+
+@runs_on([['arbiter', 'distributed-arbiter', 'replicated',
+ 'distributed-replicated'], ['glusterfs']])
+class TestAfrSelfHealAddBrickRebalance(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Upload io scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts to clients %s" %
+ cls.clients)
+ g.log.info("Successfully uploaded IO scripts to clients % s",
+ cls.clients)
+
+ def setUp(self):
+
+ # Calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+
+ if not self.setup_volume_and_mount_volume(self.mounts):
+ raise ExecutionError("Unable to setup and mount volume")
+
+ def tearDown(self):
+
+ # Wait if any IOs are pending from the test
+ if self.all_mounts_procs:
+ ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
+ if ret:
+ raise ExecutionError(
+ "Wait for IO completion failed on some of the clients")
+
+ # Unmount and cleanup the volume
+ if not self.unmount_volume_and_cleanup_volume(self.mounts):
+ raise ExecutionError("Unable to unmount and cleanup volume")
+
+ # Calling GlusterBaseClass Teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_afr_self_heal_add_brick_rebalance(self):
+ """
+ Test Steps:
+ 1. Create a replicated/distributed-replicate volume and mount it
+ 2. Start IO from the clients
+ 3. Bring down a brick from the subvol and validate it is offline
+ 4. Bring back the brick online and wait for heal to complete
+ 5. Once the heal is completed, expand the volume.
+ 6. Trigger rebalance and wait for rebalance to complete
+ 7. Validate IO, no errors during the steps performed from step 2
+ 8. Check arequal of the subvol and all the brick in the same subvol
+ should have same checksum
+ """
+ # Start IO from the clients
+ self.all_mounts_procs = []
+ for count, mount_obj in enumerate(self.mounts):
+ g.log.info("Starting IO on %s:%s", mount_obj.client_system,
+ mount_obj.mountpoint)
+ cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
+ "--dirname-start-num %d --dir-depth 3 --dir-length 5 "
+ "--max-num-of-dirs 5 --num-of-files 30 %s" % (
+ self.script_upload_path, count,
+ mount_obj.mountpoint))
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ self.all_mounts_procs.append(proc)
+
+ # List a brick in each subvol and bring them offline
+ subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+ brick_to_bring_offline = []
+ for subvol in subvols:
+ self.assertTrue(subvol, "List is empty")
+ brick_to_bring_offline.extend(sample(subvol, 1))
+
+ ret = bring_bricks_offline(self.volname, brick_to_bring_offline)
+ self.assertTrue(ret, "Unable to bring brick: {} offline".format(
+ brick_to_bring_offline))
+
+ # Validate the brick is offline
+ ret = are_bricks_offline(self.mnode, self.volname,
+ brick_to_bring_offline)
+ self.assertTrue(ret, "Brick:{} is still online".format(
+ brick_to_bring_offline))
+
+ # Wait for 10 seconds for IO to be generated
+ sleep(10)
+
+ # Start volume with force to bring all bricks online
+ ret, _, _ = volume_start(self.mnode, self.volname, force=True)
+ self.assertEqual(ret, 0, "Volume start with force failed")
+ g.log.info("Volume: %s started successfully", self.volname)
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Volume %s : All process are not online",
+ self.volname))
+
+ # Monitor heal completion
+ self.assertTrue(monitor_heal_completion(self.mnode, self.volname,
+ interval_check=10),
+ "Heal failed after 20 mins")
+
+ # Check are there any files in split-brain and heal completion
+ self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname),
+ "Some files are in split brain for "
+ "volume: {}".format(self.volname))
+
+ # Expanding volume by adding bricks to the volume when IO in progress
+ ret = expand_volume(self.mnode, self.volname, self.servers,
+ self.all_servers_info)
+ self.assertTrue(ret, ("Failed to expand the volume when IO in "
+ "progress on volume %s", self.volname))
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume %s processes to "
+ "be online", self.volname))
+
+ # Start Rebalance
+ ret, _, _ = rebalance_start(self.mnode, self.volname)
+ self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
+ "%s", self.volname))
+ g.log.info("Successfully started rebalance on the "
+ "volume %s", self.volname)
+
+ # Without sleep the next step will fail with Glusterd Syncop locking.
+ sleep(2)
+
+ # Wait for rebalance to complete
+ ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+ timeout=1800)
+ self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
+ "%s", self.volname))
+ g.log.info("Rebalance is successfully complete on "
+ "the volume %s", self.volname)
+
+ # Validate IO
+ ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+ self.io_validation_complete = True
+ self.assertTrue(ret, "IO failed on some of the clients")
+ self.all_mounts_procs *= 0
+
+ # List all files and dirs created
+ ret = list_all_files_and_dirs_mounts(self.mounts)
+ self.assertTrue(ret, "Failed to list all files and dirs")
+
+ # Check arequal checksum of all the bricks is same
+ for subvol in subvols:
+ ret, arequal_from_the_bricks = collect_bricks_arequal(subvol)
+ self.assertTrue(ret, "Arequal is collected successfully across "
+ "the bricks in the subvol {}".format(subvol))
+ cmd = len(set(arequal_from_the_bricks))
+ if (self.volume_type == "arbiter" or
+ self.volume_type == "distributed-arbiter"):
+ cmd = len(set(arequal_from_the_bricks[:2]))
+ self.assertEqual(cmd, 1, "Arequal"
+ " is same on all the bricks in the subvol")
diff --git a/tests/functional/afr/heal/test_data_split_brain_resolution.py b/tests/functional/afr/heal/test_data_split_brain_resolution.py
index e1284cad6..73fd144c1 100644
--- a/tests/functional/afr/heal/test_data_split_brain_resolution.py
+++ b/tests/functional/afr/heal/test_data_split_brain_resolution.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
"""
from glusto.core import Glusto as g
+
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.misc.misc_libs import upload_scripts
@@ -45,7 +46,7 @@ class HealDataSplitBrain(GlusterBaseClass):
def setUpClass(cls):
# Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
+ cls.get_super_method(cls, 'setUpClass')()
# Override Volume
if cls.volume_type == "replicated":
@@ -57,11 +58,9 @@ class HealDataSplitBrain(GlusterBaseClass):
# Upload io scripts for running IO on mounts
g.log.info("Upload io scripts to clients %s for running IO on "
"mounts", cls.clients)
- script_local_path = ("/usr/share/glustolibs/io/scripts/"
- "file_dir_ops.py")
cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
"file_dir_ops.py")
- ret = upload_scripts(cls.clients, script_local_path)
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
if not ret:
raise ExecutionError("Failed to upload IO scripts "
"to clients %s" % cls.clients)
@@ -75,17 +74,19 @@ class HealDataSplitBrain(GlusterBaseClass):
raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
g.log.info("Successful in Setup Volume and Mount Volume")
- @classmethod
- def tearDownClass(cls):
-
- # Cleanup Volume
- g.log.info("Starting to clean up Volume %s", cls.volname)
- ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Cleanup and umount volume
+ g.log.info("Starting to Unmount Volume and Cleanup Volume")
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
if not ret:
- raise ExecutionError("Failed to create volume")
- g.log.info("Successful in cleaning up Volume %s", cls.volname)
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
- GlusterBaseClass.tearDownClass.im_func(cls)
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
def verify_brick_arequals(self):
g.log.info("Fetching bricks for the volume: %s", self.volname)
diff --git a/tests/functional/afr/heal/test_dir_time_stamp_restoration.py b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py
new file mode 100644
index 000000000..6a4ef2a19
--- /dev/null
+++ b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py
@@ -0,0 +1,160 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA.
+
+"""
+Description:
+ Check if parent directory timestamps are restored after an entry heal.
+"""
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import (
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ select_volume_bricks_to_bring_offline,
+ get_all_bricks)
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.glusterdir import (mkdir, rmdir)
+from glustolibs.gluster.glusterfile import (get_fattr, get_file_stat)
+from glustolibs.gluster.volume_libs import set_volume_options
+from glustolibs.gluster.heal_libs import monitor_heal_completion
+
+
+@runs_on([['replicated'],
+ ['glusterfs']])
+class TestDirTimeStampRestore(GlusterBaseClass):
+
+ def setUp(self):
+ self.get_super_method(self, 'setUp')()
+
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts,
+ volume_create_force=False)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ self.bricks_list = get_all_bricks(self.mnode, self.volname)
+
+ def tearDown(self):
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ self.get_super_method(self, 'tearDown')()
+
+ def are_mdata_xattrs_equal(self):
+ """Check if atime/mtime/ctime in glusterfs.mdata xattr are identical"""
+ timestamps = []
+ for brick_path in self.bricks_list:
+ server, brick = brick_path.split(':')
+ fattr = get_fattr(server, '%s/%s' % (brick, "dir1"),
+ 'trusted.glusterfs.mdata')
+ self.assertIsNotNone(fattr, 'Unable to get mdata xattr')
+ timestamps.append(fattr)
+
+ g.log.debug("mdata list = %s", ''.join(map(str, timestamps)))
+ return timestamps.count(timestamps[0]) == len(timestamps)
+
+ def are_stat_timestamps_equal(self):
+ """Check if atime/mtime/ctime in stat info are identical"""
+ timestamps = []
+ for brick_path in self.bricks_list:
+ server, brick = brick_path.split(':')
+ stat_data = get_file_stat(server, "%s/dir1" % brick)
+ ts_string = "{}-{}-{}".format(stat_data['epoch_atime'],
+ stat_data['epoch_mtime'],
+ stat_data['epoch_ctime'])
+ timestamps.append(ts_string)
+
+ g.log.debug("stat list = %s", ''.join(map(str, timestamps)))
+ return timestamps.count(timestamps[0]) == len(timestamps)
+
+ def perform_test(self, ctime):
+ """
+ Testcase steps:
+ 1. Enable/disable features,ctime based on function argument.
+ 2. Create a directory on the mount point.
+ 3. Kill a brick and create a file inside the directory.
+ 4. Bring the brick online.
+ 5. Trigger heal and wait for its completion.
+ 6. Verify that the atime, mtime and ctime of the directory are same on
+ all bricks of the replica.
+ """
+ if ctime:
+ option = {'features.ctime': 'on'}
+ else:
+ option = {'features.ctime': 'off'}
+ ret = set_volume_options(self.mnode, self.volname, option)
+ self.assertTrue(ret, 'failed to set option %s on %s'
+ % (option, self.volume))
+
+ client, m_point = (self.mounts[0].client_system,
+ self.mounts[0].mountpoint)
+
+ dirpath = '{}/dir1'.format(m_point)
+ ret = mkdir(client, dirpath)
+ self.assertTrue(ret, 'Unable to create a directory from mount point')
+
+ bricks_to_bring_offline = select_volume_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline))
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline))
+
+ cmd = 'touch {}/file1'.format(dirpath)
+ ret, _, _ = g.run(client, cmd)
+ self.assertEqual(ret, 0, 'Unable to create file from mount point')
+
+ ret = bring_bricks_online(
+ self.mnode, self.volname,
+ bricks_to_bring_offline,
+ bring_bricks_online_methods=['volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks {} online'.format
+ (bricks_to_bring_offline))
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Starting heal failed')
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ if ctime:
+ ret = self.are_mdata_xattrs_equal()
+ self.assertTrue(ret, "glusterfs.mdata mismatch for {}"
+ .format(dirpath))
+ else:
+ ret = self.are_stat_timestamps_equal()
+ self.assertTrue(ret, "stat mismatch for {}".format(dirpath))
+
+ ret = rmdir(client, dirpath, force=True)
+ self.assertTrue(ret, 'Unable to delete directory from mount point')
+
+ def test_dir_time_stamp_restoration(self):
+ """
+ Create pending entry self-heal on a replica volume and verify that
+ after the heal is complete, the atime, mtime and ctime of the parent
+ directory are identical on all bricks of the replica.
+
+ The test is run with features.ctime enabled as well as disabled.
+ """
+ self.perform_test(ctime=True)
+ self.perform_test(ctime=False)
diff --git a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py
index 64f5254a5..3fe682e59 100755
--- a/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py
+++ b/tests/functional/afr/heal/test_existing_shd_should_take_care_of_self_healing.py
@@ -46,7 +46,7 @@ class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass):
which is used in tests
"""
# calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUp.im_func(self)
+ self.get_super_method(self, 'setUp')()
# Setup Volume and Mount Volume
g.log.info("Starting to Setup Volume and Mount Volume")
@@ -74,8 +74,8 @@ class SelfHealDaemonProcessTestsWithHealing(GlusterBaseClass):
raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
g.log.info("Successful in Unmount Volume and Cleanup Volume")
- # calling GlusterBaseClass tearDownClass
- GlusterBaseClass.tearDownClass.im_func(self)
+ # calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
def test_existing_glustershd_should_take_care_of_self_healing(self):
"""
diff --git a/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py
new file mode 100644
index 000000000..163596bb7
--- /dev/null
+++ b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py
@@ -0,0 +1,175 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from time import sleep
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline,
+ bring_bricks_offline,
+ get_online_bricks_list,
+ are_bricks_online)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.gluster_init import restart_glusterd
+from glustolibs.gluster.glusterfile import set_fattr, get_fattr
+from glustolibs.gluster.heal_libs import (is_volume_in_split_brain,
+ monitor_heal_completion)
+from glustolibs.gluster.lib_utils import collect_bricks_arequal
+
+
+@runs_on([['replicated'], ['glusterfs']])
+class TestHealForConservativeMergeWithTwoBricksBlame(GlusterBaseClass):
+
+ def setUp(self):
+ # calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+
+ # Setup volume and mount it.
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+
+ def tearDown(self):
+ # Unmount and cleanup the volume
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Unable to unmount and cleanup volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _bring_brick_offline_and_check(self, brick):
+ """Brings brick offline an checks if it is offline or not"""
+ ret = bring_bricks_offline(self.volname, [brick])
+ self.assertTrue(ret, "Unable to bring brick: {} offline".format(brick))
+
+ # Validate the brick is offline
+ ret = are_bricks_offline(self.mnode, self.volname, [brick])
+ self.assertTrue(ret, "Brick:{} is still online".format(brick))
+
+ def _get_fattr_for_the_brick(self, brick):
+ """Get xattr of trusted.afr.volname-client-0 for the given brick"""
+ host, fqpath = brick.split(":")
+ fqpath = fqpath + "/dir1"
+ fattr = "trusted.afr.{}-client-0".format(self.volname)
+ return get_fattr(host, fqpath, fattr, encode="hex")
+
+ def _check_peers_status(self):
+ """Validates peers are connected or not"""
+ count = 0
+ while count < 4:
+ if self.validate_peers_are_connected():
+ return
+ sleep(5)
+ count += 1
+ self.fail("Peers are not in connected state")
+
+ def test_heal_for_conservative_merge_with_two_bricks_blame(self):
+ """
+ 1) Create 1x3 volume and fuse mount the volume
+ 2) On mount created a dir dir1
+ 3) Pkill glusterfsd on node n1 (b2 on node2 and b3 and node3 up)
+ 4) touch f{1..10} on the mountpoint
+ 5) b2 and b3 xattrs would be blaming b1 as files are created while
+ b1 is down
+ 6) Reset the b3 xattrs to NOT blame b1 by using setattr
+ 7) Now pkill glusterfsd of b2 on node2
+ 8) Restart glusterd on node1 to bring up b1
+ 9) Now bricks b1 online , b2 down, b3 online
+ 10) touch x{1..10} under dir1 itself
+ 11) Again reset xattr on node3 of b3 so that it doesn't blame b2,
+ as done for b1 in step 6
+ 12) Do restart glusterd on node2 hosting b2 to bring all bricks online
+ 13) Check for heal info, split-brain and arequal for the bricks
+ """
+ # pylint: disable=too-many-locals
+ # Create dir `dir1/` on mountpont
+ path = self.mounts[0].mountpoint + "/dir1"
+ ret = mkdir(self.mounts[0].client_system, path, parents=True)
+ self.assertTrue(ret, "Directory {} creation failed".format(path))
+
+ all_bricks = get_all_bricks(self.mnode, self.volname)
+ self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume")
+ brick1, brick2, brick3 = all_bricks
+
+ # Bring first brick offline
+ self._bring_brick_offline_and_check(brick1)
+
+ # touch f{1..10} files on the mountpoint
+ cmd = ("cd {mpt}; for i in `seq 1 10`; do touch f$i"
+ "; done".format(mpt=path))
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Unable to create files on mountpoint")
+
+ # Check b2 and b3 xattrs are blaming b1 and are same
+ self.assertEqual(self._get_fattr_for_the_brick(brick2),
+ self._get_fattr_for_the_brick(brick3),
+ "Both the bricks xattrs are not blaming "
+ "brick: {}".format(brick1))
+
+ # Reset the xattrs of dir1 on b3 for brick b1
+ first_xattr_to_reset = "trusted.afr.{}-client-0".format(self.volname)
+ xattr_value = "0x000000000000000000000000"
+ host, brick_path = brick3.split(":")
+ brick_path = brick_path + "/dir1"
+ ret = set_fattr(host, brick_path, first_xattr_to_reset, xattr_value)
+ self.assertTrue(ret, "Unable to set xattr for the directory")
+
+ # Kill brick2 on the node2
+ self._bring_brick_offline_and_check(brick2)
+
+ # Restart glusterd on node1 to bring the brick1 online
+ self.assertTrue(restart_glusterd([brick1.split(":")[0]]), "Unable to "
+ "restart glusterd")
+ # checking for peer status post glusterd restart
+ self._check_peers_status()
+
+ # Check if the brick b1 on node1 is online or not
+ online_bricks = get_online_bricks_list(self.mnode, self.volname)
+ self.assertIsNotNone(online_bricks, "Unable to fetch online bricks")
+ self.assertIn(brick1, online_bricks, "Brick:{} is still offline after "
+ "glusterd restart".format(brick1))
+
+ # Create 10 files under dir1 naming x{1..10}
+ cmd = ("cd {mpt}; for i in `seq 1 10`; do touch x$i"
+ "; done".format(mpt=path))
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Unable to create files on mountpoint")
+
+ # Reset the xattrs from brick3 on to brick2
+ second_xattr_to_reset = "trusted.afr.{}-client-1".format(self.volname)
+ ret = set_fattr(host, brick_path, second_xattr_to_reset, xattr_value)
+ self.assertTrue(ret, "Unable to set xattr for the directory")
+
+ # Bring brick2 online
+ self.assertTrue(restart_glusterd([brick2.split(":")[0]]), "Unable to "
+ "restart glusterd")
+ self._check_peers_status()
+
+ self.assertTrue(are_bricks_online(self.mnode, self.volname, [brick2]))
+
+ # Check are there any files in split-brain and heal completion
+ self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname),
+ "Some files are in split brain for "
+ "volume: {}".format(self.volname))
+ self.assertTrue(monitor_heal_completion(self.mnode, self.volname),
+ "Conservative merge of files failed")
+
+ # Check arequal checksum of all the bricks is same
+ ret, arequal_from_the_bricks = collect_bricks_arequal(all_bricks)
+ self.assertTrue(ret, "Arequal is collected successfully across the"
+ " bricks in the subvol {}".format(all_bricks))
+ self.assertEqual(len(set(arequal_from_the_bricks)), 1, "Arequal is "
+ "same on all the bricks in the subvol")
diff --git a/tests/functional/afr/heal/test_heal_info_no_hang.py b/tests/functional/afr/heal/test_heal_info_no_hang.py
new file mode 100644
index 000000000..82f8b0598
--- /dev/null
+++ b/tests/functional/afr/heal/test_heal_info_no_hang.py
@@ -0,0 +1,162 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA.
+
+"""
+Description:
+ heal info completes when there is ongoing I/O and a lot of pending heals.
+"""
+import random
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ get_all_bricks)
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.io.utils import run_linux_untar
+from glustolibs.gluster.glusterdir import mkdir
+
+
+@runs_on([['distributed-replicated'],
+ ['glusterfs']])
+class TestHealInfoNoHang(GlusterBaseClass):
+
+ def setUp(self):
+ self.get_super_method(self, 'setUp')()
+
+ self.is_io_running = False
+
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts,
+ volume_create_force=False)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ self.bricks_list = get_all_bricks(self.mnode, self.volname)
+ self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+
+ def tearDown(self):
+ if self.is_io_running:
+ if not self._wait_for_untar_completion():
+ g.log.error("I/O failed to stop on clients")
+
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ self.get_super_method(self, 'tearDown')()
+
+ def _wait_for_untar_completion(self):
+ """Wait for the kernel untar to complete"""
+ has_process_stopped = []
+ for proc in self.list_of_io_processes:
+ try:
+ ret, _, _ = proc.async_communicate()
+ if not ret:
+ has_process_stopped.append(False)
+ has_process_stopped.append(True)
+ except ValueError:
+ has_process_stopped.append(True)
+ return all(has_process_stopped)
+
+ def _does_heal_info_complete_within_timeout(self):
+ """Check if heal info CLI completes within a specific timeout"""
+ # We are just assuming 1 entry takes one second to process, which is
+ # a very high number but some estimate is better than a random magic
+ # value for timeout.
+ timeout = self.num_entries * 1
+
+ # heal_info_data = get_heal_info(self.mnode, self.volname)
+ cmd = "timeout %s gluster volume heal %s info" % (timeout,
+ self.volname)
+ ret, _, _ = g.run(self.mnode, cmd)
+ if ret:
+ return False
+ return True
+
+ def test_heal_info_no_hang(self):
+ """
+ Testcase steps:
+ 1. Start kernel untar on the mount
+ 2. While untar is going on, kill a brick of the replica.
+ 3. Wait for the untar to be over, resulting in pending heals.
+ 4. Get the approx. number of pending heals and save it
+ 5. Bring the brick back online.
+ 6. Trigger heal
+ 7. Run more I/Os with dd command
+ 8. Run heal info command and check that it completes successfully under
+ a timeout that is based on the no. of heals in step 4.
+ """
+ self.list_of_io_processes = []
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar
+ ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.list_of_io_processes += ret
+ self.is_io_running = True
+
+ # Kill brick resulting in heal backlog.
+ brick_to_bring_offline = random.choice(self.bricks_list)
+ ret = bring_bricks_offline(self.volname, brick_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline'
+ % brick_to_bring_offline)
+ ret = are_bricks_offline(self.mnode, self.volname,
+ [brick_to_bring_offline])
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % brick_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ brick_to_bring_offline)
+
+ ret = self._wait_for_untar_completion()
+ self.assertFalse(ret, "IO didn't complete or failed on client")
+ self.is_io_running = False
+
+ # Get approx. no. of entries to be healed.
+ cmd = ("gluster volume heal %s statistics heal-count | grep Number "
+ "| awk '{sum+=$4} END {print sum/2}'" % self.volname)
+ ret, self.num_entries, _ = g.run(self.mnode, cmd)
+ self.assertEqual(ret, 0, "Failed to get heal-count statistics")
+
+ # Restart the down bricks
+ ret = bring_bricks_online(self.mnode, self.volname,
+ brick_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring brick %s online' %
+ brick_to_bring_offline)
+ g.log.info('Bringing brick %s online is successful',
+ brick_to_bring_offline)
+ # Trigger heal
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Starting heal failed')
+ g.log.info('Index heal launched')
+
+ # Run more I/O
+ cmd = ("for i in `seq 1 10`; do dd if=/dev/urandom of=%s/file_$i "
+ "bs=1M count=100; done" % self.mounts[0].mountpoint)
+ ret = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+
+ # Get heal info
+ ret = self._does_heal_info_complete_within_timeout()
+ self.assertTrue(ret, 'Heal info timed out')
+ g.log.info('Heal info completed succesfully')
diff --git a/tests/functional/afr/heal/test_heal_info_while_accessing_file.py b/tests/functional/afr/heal/test_heal_info_while_accessing_file.py
index 2fa7b194c..24450702b 100644
--- a/tests/functional/afr/heal/test_heal_info_while_accessing_file.py
+++ b/tests/functional/afr/heal/test_heal_info_while_accessing_file.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -13,8 +13,8 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
from glusto.core import Glusto as g
+
from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.volume_libs import get_subvols
@@ -41,16 +41,14 @@ class TestSelfHeal(GlusterBaseClass):
@classmethod
def setUpClass(cls):
# Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
+ cls.get_super_method(cls, 'setUpClass')()
# Upload io scripts for running IO on mounts
g.log.info("Upload io scripts to clients %s for running IO on mounts",
cls.clients)
- script_local_path = ("/usr/share/glustolibs/io/scripts/"
- "file_dir_ops.py")
cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
"file_dir_ops.py")
- ret = upload_scripts(cls.clients, [script_local_path])
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
if not ret:
raise ExecutionError("Failed to upload IO scripts to clients %s"
% cls.clients)
@@ -73,7 +71,7 @@ class TestSelfHeal(GlusterBaseClass):
def setUp(self):
# Calling GlusterBaseClass setUp
- GlusterBaseClass.setUp.im_func(self)
+ self.get_super_method(self, 'setUp')()
self.all_mounts_procs = []
self.io_validation_complete = False
@@ -118,7 +116,7 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info("Successful in umounting the volume and Cleanup")
# Calling GlusterBaseClass teardown
- GlusterBaseClass.tearDown.im_func(self)
+ self.get_super_method(self, 'tearDown')()
def test_heal_info_shouldnot_list_files_being_accessed(self):
"""
@@ -152,8 +150,9 @@ class TestSelfHeal(GlusterBaseClass):
mount_obj.client_system, mount_obj.mountpoint)
# Creating files
- cmd = ("python %s create_files -f 100 %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ cmd = "/usr/bin/env python %s create_files -f 100 %s" % (
+ self.script_upload_path,
+ mount_obj.mountpoint)
proc = g.run_async(mount_obj.client_system, cmd,
user=mount_obj.user)
@@ -214,9 +213,7 @@ class TestSelfHeal(GlusterBaseClass):
# Compare dicts before accessing and while accessing
g.log.info('Comparing entries before modifying and while modifying...')
- ret = cmp(entries_before_accessing, entries_while_accessing)
- self.assertEqual(ret, 0, 'Entries before modifying and while modifying'
- 'are not equal')
+ self.assertDictEqual(entries_before_accessing, entries_while_accessing)
g.log.info('Comparison entries before modifying and while modifying'
'finished successfully.')
diff --git a/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py
new file mode 100644
index 000000000..efd2f8745
--- /dev/null
+++ b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py
@@ -0,0 +1,186 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass
+from glustolibs.gluster.volume_libs import (
+ log_volume_info_and_status, wait_for_volume_process_to_be_online,
+ setup_volume, cleanup_volume)
+from glustolibs.gluster.lib_utils import get_servers_bricks_dict
+from glustolibs.gluster.brick_libs import get_all_bricks
+from glustolibs.gluster.brick_ops import replace_brick
+from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
+ do_bricks_exist_in_shd_volfile,
+ is_shd_daemonized)
+from glustolibs.gluster.volume_ops import get_volume_list
+
+
+class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass):
+ """
+ SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which
+ verifies the self-heal daemon process on multiple volumes running.
+ """
+ def setUp(self):
+ """
+ setup volume and initialize necessary variables
+ which is used in tests
+ """
+ # Calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume for all the volume types
+ self.volume_configs = []
+ for volume_type in self.default_volume_type_config:
+ self.volume_configs.append(
+ {'name': 'testvol_%s' % volume_type,
+ 'servers': self.servers,
+ 'voltype': self.default_volume_type_config[volume_type]})
+
+ for volume_config in self.volume_configs[1:]:
+ ret = setup_volume(mnode=self.mnode,
+ all_servers_info=self.all_servers_info,
+ volume_config=volume_config,
+ multi_vol=True)
+ volname = volume_config['name']
+ if not ret:
+ raise ExecutionError("Failed to setup Volume"
+ " %s" % volname)
+ g.log.info("Successful in setting volume %s", volname)
+
+ # Verify volume's all process are online for 60 sec
+ ret = wait_for_volume_process_to_be_online(self.mnode, volname, 60)
+ if not ret:
+ raise ExecutionError("Volume %s : All process are not online"
+ % volname)
+ g.log.info("Successfully Verified volume %s processes are online",
+ volname)
+
+ # Verfiy glustershd process releases its parent process
+ ret = is_shd_daemonized(self.servers)
+ if not ret:
+ raise ExecutionError("Self Heal Daemon process was still"
+ " holding parent process.")
+ g.log.info("Self Heal Daemon processes are online")
+
+ self.glustershd = "/var/lib/glusterd/glustershd/glustershd-server.vol"
+
+ def tearDown(self):
+ """
+ Clean up the volume and umount volume from client
+ """
+
+ # Cleanup volume
+ volume_list = get_volume_list(self.mnode)
+ for volume in volume_list:
+ ret = cleanup_volume(self.mnode, volume)
+ if not ret:
+ raise ExecutionError("Failed to cleanup Volume %s" % volume)
+ g.log.info("Successfully Cleaned up all Volumes")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_impact_of_replace_brick_on_glustershd(self):
+ """
+ Test Script to verify the glustershd server vol file
+ has only entries for replicate volumes
+ 1.Create multiple volumes and start all volumes
+ 2.Check the glustershd processes - Only 1 glustershd should be listed
+ 3.Do replace brick on the replicate volume
+ 4.Confirm that the brick is replaced
+ 5.Check the glustershd processes - Only 1 glustershd should be listed
+ and pid should be different
+ 6.glustershd server vol should be updated with new bricks
+ """
+ # Check the self-heal daemon process
+ ret, glustershd_pids = get_self_heal_daemon_pid(self.servers)
+ self.assertTrue(ret, ("Either no self heal daemon process found or "
+ "more than one self heal daemon process "
+ "found : %s" % glustershd_pids))
+ g.log.info("Successful in getting single self heal daemon process"
+ " on all nodes %s", self.servers)
+
+ volume_list = get_volume_list(self.mnode)
+ for volume in volume_list:
+
+ # Log Volume Info and Status before replacing brick
+ ret = log_volume_info_and_status(self.mnode, volume)
+ self.assertTrue(ret, ("Logging volume info and status "
+ "failed on volume %s", volume))
+ g.log.info("Successful in logging volume info and status "
+ "of volume %s", volume)
+
+ # Selecting a random source brick to replace
+ src_brick = choice(get_all_bricks(self.mnode, volume))
+ src_node, original_brick = src_brick.split(":")
+
+ # Creating a random destination brick in such a way
+ # that the brick is select from the same node but always
+ # picks a different from the original brick
+ list_of_bricks = [
+ brick for brick in get_servers_bricks_dict(
+ src_node, self.all_servers_info)[src_node]
+ if brick not in original_brick]
+ dst_brick = ('{}:{}/{}_replaced'.format(
+ src_node, choice(list_of_bricks),
+ original_brick.split('/')[::-1][0]))
+
+ # Replace brick for the volume
+ ret, _, _ = replace_brick(self.mnode, volume,
+ src_brick, dst_brick)
+ self.assertFalse(ret, "Failed to replace brick "
+ "from the volume %s" % volume)
+ g.log.info("Successfully replaced faulty brick from "
+ "the volume %s", volume)
+
+ # Verify all volume process are online
+ ret = wait_for_volume_process_to_be_online(self.mnode, volume)
+ self.assertTrue(ret, "Volume %s : All process are not online"
+ % volume)
+ g.log.info("Volume %s : All process are online", volume)
+
+ # Check the self-heal daemon process after replacing brick
+ ret, pid_after_replace = get_self_heal_daemon_pid(self.servers)
+ self.assertTrue(ret, "Either no self heal daemon process "
+ "found or more than one self heal "
+ "daemon process found : %s" % pid_after_replace)
+ g.log.info("Successful in getting Single self heal "
+ " daemon process on all nodes %s", self.servers)
+
+ # Compare the glustershd pids
+ self.assertNotEqual(glustershd_pids, pid_after_replace,
+ "Self heal daemon process should be different "
+ "after replacing bricks in %s volume"
+ % volume)
+ g.log.info("EXPECTED: Self heal daemon process should be different"
+ " after replacing bricks in replicate volume")
+
+ # Get the bricks for the volume
+ bricks_list = get_all_bricks(self.mnode, volume)
+ g.log.info("Brick List : %s", bricks_list)
+
+ # Validate the bricks present in volume info with
+ # glustershd server volume file
+ ret = do_bricks_exist_in_shd_volfile(self.mnode, volume,
+ bricks_list)
+ self.assertTrue(ret, ("Brick List from volume info is "
+ "different from glustershd server "
+ "volume file. Please check log file "
+ "for details"))
+ g.log.info("Bricks in volume %s exists in glustershd server "
+ "volume file", volume)
diff --git a/tests/functional/afr/heal/test_metadata_split_brain_resolution.py b/tests/functional/afr/heal/test_metadata_split_brain_resolution.py
index 75c513a5f..7782a4de8 100644
--- a/tests/functional/afr/heal/test_metadata_split_brain_resolution.py
+++ b/tests/functional/afr/heal/test_metadata_split_brain_resolution.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -44,7 +44,7 @@ class HealMetadataSplitBrain(GlusterBaseClass):
def setUpClass(cls):
# Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
+ cls.get_super_method(cls, 'setUpClass')()
# Override Volume
if cls.volume_type == "replicated":
@@ -56,11 +56,9 @@ class HealMetadataSplitBrain(GlusterBaseClass):
# Upload io scripts for running IO on mounts
g.log.info("Upload io scripts to clients %s for running IO on "
"mounts", cls.clients)
- script_local_path = ("/usr/share/glustolibs/io/scripts/"
- "file_dir_ops.py")
cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
"file_dir_ops.py")
- ret = upload_scripts(cls.clients, script_local_path)
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
if not ret:
raise ExecutionError("Failed to upload IO scripts "
"to clients %s" % cls.clients)
@@ -74,17 +72,19 @@ class HealMetadataSplitBrain(GlusterBaseClass):
raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
g.log.info("Successful in Setup Volume and Mount Volume")
- @classmethod
- def tearDownClass(cls):
-
- # Cleanup Volume
- g.log.info("Starting to clean up Volume %s", cls.volname)
- ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Cleanup and umount volume
+ g.log.info("Starting to Unmount Volume and Cleanup Volume")
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
if not ret:
- raise ExecutionError("Failed to create volume")
- g.log.info("Successful in cleaning up Volume %s", cls.volname)
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
- GlusterBaseClass.tearDownClass.im_func(cls)
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
def verify_brick_arequals(self):
g.log.info("Fetching bricks for the volume: %s", self.volname)
diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
new file mode 100644
index 000000000..bbefe0cff
--- /dev/null
+++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py
@@ -0,0 +1,177 @@
+# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+""" Description:
+ Test Cases in this module tests the self heal daemon process.
+"""
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass
+from glustolibs.gluster.volume_libs import (
+ wait_for_volume_process_to_be_online, setup_volume, cleanup_volume,
+ get_volume_type_info)
+from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
+ is_shd_daemonized,)
+from glustolibs.gluster.volume_ops import (volume_stop, volume_start,
+ get_volume_list)
+
+
+class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass):
+ """
+ SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which
+ verifies the self-heal daemon process on multiple volumes running.
+ """
+ @classmethod
+ def setUpClass(cls):
+ """
+ setup volume and initialize necessary variables
+ which is used in tests
+ """
+ # calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ list_of_vol = ['distributed-dispersed', 'replicated',
+ 'dispersed', 'distributed', 'distributed-replicated']
+ cls.volume_configs = []
+ if cls.default_volume_type_config['distributed']['dist_count'] > 3:
+ cls.default_volume_type_config['distributed']['dist_count'] = 3
+
+ for volume_type in list_of_vol:
+ cls.volume_configs.append(
+ {'name': 'testvol_%s' % (volume_type),
+ 'servers': cls.servers,
+ 'voltype': cls.default_volume_type_config[volume_type]})
+ for volume_config in cls.volume_configs:
+ ret = setup_volume(mnode=cls.mnode,
+ all_servers_info=cls.all_servers_info,
+ volume_config=volume_config, multi_vol=True)
+ volname = volume_config['name']
+ if not ret:
+ raise ExecutionError("Failed to setup Volume"
+ " %s" % volname)
+ g.log.info("Successful in setting volume %s", volname)
+
+ # Verify volume's all process are online for 60 sec
+ g.log.info("Verifying volume's all process are online")
+ ret = wait_for_volume_process_to_be_online(cls.mnode, volname, 60)
+ if not ret:
+ raise ExecutionError("Volume %s : All process are not online"
+ % volname)
+ g.log.info("Successfully Verified volume %s processes are online",
+ volname)
+
+ # Verfiy glustershd process releases its parent process
+ g.log.info("Verifying Self Heal Daemon process is daemonized")
+ ret = is_shd_daemonized(cls.servers)
+ if not ret:
+ raise ExecutionError("Self Heal Daemon process was still"
+ " holding parent process.")
+ g.log.info("Self Heal Daemon processes are online")
+
+ @classmethod
+ def tearDownClass(cls):
+ """
+ Clean up the volume and umount volume from client
+ """
+
+ # stopping the volume
+ g.log.info("Starting to Cleanup all Volumes")
+ volume_list = get_volume_list(cls.mnode)
+ for volume in volume_list:
+ ret = cleanup_volume(cls.mnode, volume)
+ if not ret:
+ raise ExecutionError("Failed to cleanup Volume %s" % volume)
+ g.log.info("Volume: %s cleanup is done", volume)
+ g.log.info("Successfully Cleanedup all Volumes")
+
+ # calling GlusterBaseClass tearDownClass
+ cls.get_super_method(cls, 'tearDownClass')()
+
+ def test_no_glustershd_with_distribute(self):
+ """
+ Test Script to verify the glustershd server vol file
+ has only entries for replicate volumes
+
+ * Create multiple volumes and start all volumes
+ * Check the glustershd processes - Only 1 glustershd should be listed
+ * Stop all volumes
+ * Check the glustershd processes - No glustershd should be running
+ * Start the distribute volume only
+ * Check the glustershd processes - No glustershd should be running
+
+ """
+
+ nodes = self.servers
+
+ # check the self-heal daemon process
+ g.log.info("Starting to get self-heal daemon process on "
+ "nodes %s", nodes)
+ ret, pids = get_self_heal_daemon_pid(nodes)
+ self.assertTrue(ret, ("Either no self heal daemon process found or "
+ "more than One self heal daemon process "
+ "found : %s" % pids))
+ g.log.info("Successful in getting single self heal daemon process"
+ " on all nodes %s", nodes)
+
+ # stop all the volumes
+ g.log.info("Going to stop all the volumes")
+ volume_list = get_volume_list(self.mnode)
+ for volume in volume_list:
+ g.log.info("Stopping Volume : %s", volume)
+ ret = volume_stop(self.mnode, volume)
+ self.assertTrue(ret, ("Failed to stop volume %s" % volume))
+ g.log.info("Successfully stopped volume %s", volume)
+ g.log.info("Successfully stopped all the volumes")
+
+ # check the self-heal daemon process after stopping all volumes
+ g.log.info("Starting to get self-heal daemon process on "
+ "nodes %s", nodes)
+ ret, pids = get_self_heal_daemon_pid(nodes)
+ self.assertFalse(ret, ("Self heal daemon process is still running "
+ "after stopping all volumes "))
+ for node in pids:
+ self.assertEqual(pids[node][0], -1, ("Self heal daemon is still "
+ "running on node %s even "
+ "after stoppong all "
+ "volumes" % node))
+ g.log.info("EXPECTED: No self heal daemon process is "
+ "running after stopping all volumes")
+
+ # start the distribute volume only
+ for volume in volume_list:
+ volume_type_info = get_volume_type_info(self.mnode, volume)
+ volume_type = (volume_type_info['volume_type_info']['typeStr'])
+ if volume_type == 'Distribute':
+ g.log.info("starting to start distribute volume: %s", volume)
+ ret = volume_start(self.mnode, volume)
+ self.assertTrue(ret, ("Failed to start volume %s" % volume))
+ g.log.info("Successfully started volume %s", volume)
+ break
+
+ # check the self-heal daemon process after starting distribute volume
+ g.log.info("Starting to get self-heal daemon process on "
+ "nodes %s", nodes)
+ ret, pids = get_self_heal_daemon_pid(nodes)
+ self.assertFalse(ret, ("Self heal daemon process is still running "
+ "after stopping all volumes "))
+ for node in pids:
+ self.assertEqual(pids[node][0], -1, ("Self heal daemon is still "
+ "running on node %s even "
+ "after stopping all "
+ "volumes" % node))
+ g.log.info("EXPECTED: No self heal daemon process is running "
+ "after stopping all volumes")
diff --git a/tests/functional/afr/heal/test_self_heal.py b/tests/functional/afr/heal/test_self_heal.py
index fe060e4f5..4fb6dea7e 100755
--- a/tests/functional/afr/heal/test_self_heal.py
+++ b/tests/functional/afr/heal/test_self_heal.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -15,15 +15,13 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# pylint: disable=too-many-lines
-
from glusto.core import Glusto as g
from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
from glustolibs.gluster.exceptions import ExecutionError
-from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.gluster.volume_ops import get_volume_options
from glustolibs.gluster.volume_libs import (
verify_all_process_of_volume_are_online,
wait_for_volume_process_to_be_online)
-from glustolibs.gluster.volume_libs import expand_volume
from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline,
bring_bricks_offline,
bring_bricks_online,
@@ -34,8 +32,6 @@ from glustolibs.gluster.heal_libs import (
is_heal_complete,
is_volume_in_split_brain,
is_shd_daemonized)
-from glustolibs.gluster.rebalance_ops import (rebalance_start,
- wait_for_rebalance_to_complete)
from glustolibs.gluster.heal_ops import trigger_heal
from glustolibs.misc.misc_libs import upload_scripts
from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs,
@@ -43,27 +39,25 @@ from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs,
@runs_on([['replicated', 'distributed-replicated'],
- ['glusterfs', 'cifs', 'nfs']])
+ ['glusterfs', 'cifs']])
class TestSelfHeal(GlusterBaseClass):
"""
Description:
- Arbiter Test cases related to
- healing in default configuration of the volume
+ AFR Test cases related to healing in
+ default configuration of the volume
"""
@classmethod
def setUpClass(cls):
# Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
+ cls.get_super_method(cls, 'setUpClass')()
# Upload io scripts for running IO on mounts
g.log.info("Upload io scripts to clients %s for running IO on mounts",
cls.clients)
- script_local_path = ("/usr/share/glustolibs/io/scripts/"
- "file_dir_ops.py")
cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
"file_dir_ops.py")
- ret = upload_scripts(cls.clients, [script_local_path])
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
if not ret:
raise ExecutionError("Failed to upload IO scripts to clients %s"
% cls.clients)
@@ -86,7 +80,7 @@ class TestSelfHeal(GlusterBaseClass):
def setUp(self):
# Calling GlusterBaseClass setUp
- GlusterBaseClass.setUp.im_func(self)
+ self.get_super_method(self, 'setUp')()
self.all_mounts_procs = []
self.io_validation_complete = False
@@ -121,14 +115,17 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info("Successful in umounting the volume and Cleanup")
# Calling GlusterBaseClass teardown
- GlusterBaseClass.tearDown.im_func(self)
+ self.get_super_method(self, 'tearDown')()
- def test_data_self_heal_daemon_off(self):
+ def test_data_self_heal_command(self):
"""
Test Data-Self-Heal (heal command)
Description:
- - set the volume option
+ - get the client side healing volume options and check
+ if they have already been disabled by default
+ NOTE: Client side healing has been disabled by default
+ since GlusterFS 6.0
"metadata-self-heal": "off"
"entry-self-heal": "off"
"data-self-heal": "off"
@@ -137,7 +134,7 @@ class TestSelfHeal(GlusterBaseClass):
- set the volume option
"self-heal-daemon": "off"
- bring down all bricks processes from selected set
- - Get areeual after getting bricks offline and compare with
+ - Get arequal after getting bricks offline and compare with
arequal before getting bricks offline
- modify the data
- bring bricks online
@@ -146,8 +143,6 @@ class TestSelfHeal(GlusterBaseClass):
- check daemons and start healing
- check if heal is completed
- check for split-brain
- - add bricks
- - do rebalance
- create 5k files
- while creating files - kill bricks and bring bricks online one by one
in cycle
@@ -155,15 +150,16 @@ class TestSelfHeal(GlusterBaseClass):
"""
# pylint: disable=too-many-statements
- # Setting options
- g.log.info('Setting options...')
- options = {"metadata-self-heal": "off",
- "entry-self-heal": "off",
- "data-self-heal": "off"}
- ret = set_volume_options(self.mnode, self.volname, options)
- self.assertTrue(ret, 'Failed to set options %s' % options)
- g.log.info("Successfully set %s for volume %s",
- options, self.volname)
+ # Checking if Client side healing options are disabled by default
+ g.log.info('Checking Client side healing is disabled by default')
+ options = ('cluster.metadata-self-heal', 'cluster.data-self-heal',
+ 'cluster.entry-self-heal')
+ for option in options:
+ ret = get_volume_options(self.mnode, self.volname, option)[option]
+ self.assertTrue(bool(ret == 'off' or ret == 'off (DEFAULT)'),
+ "{} option is not disabled by default"
+ .format(option))
+ g.log.info("Client side healing options are disabled by default")
# Creating files on client side
for mount_obj in self.mounts:
@@ -171,8 +167,10 @@ class TestSelfHeal(GlusterBaseClass):
mount_obj.client_system, mount_obj.mountpoint)
# Create files
g.log.info('Creating files...')
- command = ("python %s create_files -f 100 --fixed-file-size 1k %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ command = ("/usr/bin/env python %s create_files -f 100 "
+ "--fixed-file-size 1k %s" % (
+ self.script_upload_path,
+ mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, command,
user=mount_obj.user)
@@ -193,20 +191,10 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info('Getting arequal before getting bricks offline '
'is successful')
- # Setting options
- g.log.info('Setting options...')
- options = {"self-heal-daemon": "off"}
- ret = set_volume_options(self.mnode, self.volname, options)
- self.assertTrue(ret, 'Failed to set options %s' % options)
- g.log.info("Option 'self-heal-daemon' is set to 'off' successfully")
-
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks']))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -243,8 +231,10 @@ class TestSelfHeal(GlusterBaseClass):
mount_obj.mountpoint)
# Create files
g.log.info('Creating files...')
- command = ("python %s create_files -f 100 --fixed-file-size 10k %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ command = ("/usr/bin/env python %s create_files -f 100 "
+ "--fixed-file-size 10k %s" % (
+ self.script_upload_path,
+ mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, command,
user=mount_obj.user)
@@ -267,13 +257,6 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info('Bringing bricks %s online is successful',
bricks_to_bring_offline)
- # Setting options
- g.log.info('Setting options...')
- options = {"self-heal-daemon": "on"}
- ret = set_volume_options(self.mnode, self.volname, options)
- self.assertTrue(ret, 'Failed to set options %s' % options)
- g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")
-
# Wait for volume processes to be online
g.log.info("Wait for volume processes to be online")
ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
@@ -282,7 +265,7 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info("Successful in waiting for volume %s processes to be "
"online", self.volname)
- # Verify volume's all process are online
+ # Verify volume's all processes are online
g.log.info("Verifying volume's all process are online")
ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
self.assertTrue(ret, ("Volume %s : All process are not online"
@@ -314,23 +297,6 @@ class TestSelfHeal(GlusterBaseClass):
self.assertFalse(ret, 'Volume is in split-brain state')
g.log.info('Volume is not in split-brain state')
- # Add bricks
- g.log.info("Start adding bricks to volume...")
- ret = expand_volume(self.mnode, self.volname, self.servers,
- self.all_servers_info)
- self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
- g.log.info("Expanding volume is successful on "
- "volume %s", self.volname)
-
- # Do rebalance
- ret, _, _ = rebalance_start(self.mnode, self.volname)
- self.assertEqual(ret, 0, 'Failed to start rebalance')
- g.log.info('Rebalance is started')
-
- ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
- self.assertTrue(ret, 'Rebalance is not completed')
- g.log.info('Rebalance is completed successfully')
-
# Create 1k files
self.all_mounts_procs = []
for mount_obj in self.mounts:
@@ -338,8 +304,9 @@ class TestSelfHeal(GlusterBaseClass):
mount_obj.mountpoint)
# Create files
g.log.info('Creating files...')
- command = ("python %s create_files -f 1000 %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ command = ("/usr/bin/env python %s create_files -f 1000 %s" % (
+ self.script_upload_path,
+ mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, command,
user=mount_obj.user)
@@ -402,50 +369,26 @@ class TestSelfHeal(GlusterBaseClass):
)
self.io_validation_complete = True
- def test_self_heal_50k_files_heal_command_by_add_brick(self):
+ def test_self_heal_50k_files_heal_default(self):
"""
- Test self-heal of 50k files (heal command
+ Test self-heal of 50k files by heal default
Description:
- - set the volume option
- "metadata-self-heal": "off"
- "entry-self-heal": "off"
- "data-self-heal": "off"
- "self-heal-daemon": "off"
- bring down all bricks processes from selected set
- create IO (50k files)
- Get arequal before getting bricks online
- - bring bricks online
- - set the volume option
- "self-heal-daemon": "on"
- - check for daemons
- - start healing
+ - check for daemons to come online
+ - heal daemon should pick up entries to heal automatically
- check if heal is completed
- check for split-brain
- get arequal after getting bricks online and compare with
arequal before getting bricks online
- - add bricks
- - do rebalance
- - get arequal after adding bricks and compare with
- arequal after getting bricks online
"""
# pylint: disable=too-many-locals,too-many-statements
- # Setting options
- g.log.info('Setting options...')
- options = {"metadata-self-heal": "off",
- "entry-self-heal": "off",
- "data-self-heal": "off",
- "self-heal-daemon": "off"}
- ret = set_volume_options(self.mnode, self.volname, options)
- self.assertTrue(ret, 'Failed to set options')
- g.log.info("Successfully set %s for volume %s", options, self.volname)
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks']))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -466,8 +409,9 @@ class TestSelfHeal(GlusterBaseClass):
mount_obj.client_system, mount_obj.mountpoint)
# Create 50k files
g.log.info('Creating files...')
- command = ("python %s create_files -f 50000 %s"
- % (self.script_upload_path, mount_obj.mountpoint))
+ command = ("/usr/bin/env python %s create_files -f 50000 %s" % (
+ self.script_upload_path,
+ mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, command,
user=mount_obj.user)
@@ -496,13 +440,6 @@ class TestSelfHeal(GlusterBaseClass):
g.log.info('Bringing bricks %s online is successful',
bricks_to_bring_offline)
- # Setting options
- g.log.info('Setting options...')
- options = {"self-heal-daemon": "on"}
- ret = set_volume_options(self.mnode, self.volname, options)
- self.assertTrue(ret, 'Failed to set options %s' % options)
- g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")
-
# Wait for volume processes to be online
g.log.info("Wait for volume processes to be online")
ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
@@ -524,11 +461,7 @@ class TestSelfHeal(GlusterBaseClass):
self.assertTrue(ret, "Either No self heal daemon process found")
g.log.info("All self-heal-daemons are online")
- # Start healing
- ret = trigger_heal(self.mnode, self.volname)
- self.assertTrue(ret, 'Heal is not started')
- g.log.info('Healing is started')
-
+ # Default Heal testing, wait for shd to pick up healing
# Monitor heal completion
ret = monitor_heal_completion(self.mnode, self.volname,
timeout_period=3600)
@@ -553,40 +486,8 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks online
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums before and '
- 'after bringing bricks online are not equal')
+ self.assertEqual(result_before_online, result_after_online,
+ 'Checksums before and after bringing bricks online '
+ 'are not equal')
g.log.info('Checksums before and after bringing bricks online '
'are equal')
-
- # Add bricks
- g.log.info("Start adding bricks to volume...")
- ret = expand_volume(self.mnode, self.volname, self.servers,
- self.all_servers_info)
- self.assertTrue(ret, ("Failed to expand the volume when IO in "
- "progress on volume %s", self.volname))
- g.log.info("Expanding volume is successful on volume %s", self.volname)
-
- # Do rebalance
- ret, _, _ = rebalance_start(self.mnode, self.volname)
- self.assertEqual(ret, 0, 'Failed to start rebalance')
- g.log.info('Rebalance is started')
-
- ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
- self.assertTrue(ret, 'Rebalance is not completed')
- g.log.info('Rebalance is completed successfully')
-
- # Get arequal after adding bricks
- g.log.info('Getting arequal after adding bricks...')
- ret, result_after_adding_bricks = collect_mounts_arequal(self.mounts)
- self.assertTrue(ret, 'Failed to get arequal')
- g.log.info('Getting arequal after getting bricks '
- 'is successful')
-
- # Checking arequals after bringing bricks online
- # and after adding bricks
- self.assertItemsEqual(result_after_online, result_after_adding_bricks,
- 'Checksums after bringing bricks online and '
- 'after adding bricks are not equal')
- g.log.info('Checksums after bringing bricks online and '
- 'after adding bricks are equal')
diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py
index edb4575eb..ea598b1fc 100755
--- a/tests/functional/afr/heal/test_self_heal_daemon_process.py
+++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -18,9 +18,11 @@
Test Cases in this module tests the self heal daemon process.
"""
-import time
import calendar
+import time
+
from glusto.core import Glusto as g
+
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.gluster.volume_libs import (
@@ -37,7 +39,8 @@ from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid,
is_shd_daemonized,
are_all_self_heal_daemons_are_online)
from glustolibs.gluster.volume_ops import (volume_stop, volume_start)
-from glustolibs.gluster.gluster_init import restart_glusterd
+from glustolibs.gluster.gluster_init import (
+ restart_glusterd, wait_for_glusterd_to_start)
from glustolibs.io.utils import validate_io_procs
from glustolibs.misc.misc_libs import upload_scripts
@@ -55,16 +58,14 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
@classmethod
def setUpClass(cls):
# Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
+ cls.get_super_method(cls, 'setUpClass')()
# Upload io scripts for running IO on mounts
g.log.info("Upload io scripts to clients %s for running IO on mounts",
cls.clients)
- script_local_path = ("/usr/share/glustolibs/io/scripts/"
- "file_dir_ops.py")
cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
"file_dir_ops.py")
- ret = upload_scripts(cls.clients, [script_local_path])
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
if not ret:
raise ExecutionError("Failed to upload IO scripts to clients %s"
% cls.clients)
@@ -78,7 +79,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
"""
# calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUp.im_func(self)
+ self.get_super_method(self, 'setUp')()
self.all_mounts_procs = []
self.io_validation_complete = False
@@ -103,7 +104,6 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
"""
Clean up the volume and umount volume from client
"""
-
# stopping the volume
g.log.info("Starting to Unmount Volume and Cleanup Volume")
ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
@@ -112,7 +112,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
g.log.info("Successful in Unmount Volume and Cleanup Volume")
# calling GlusterBaseClass tearDownClass
- GlusterBaseClass.tearDown.im_func(self)
+ self.get_super_method(self, 'tearDown')()
def test_glustershd_with_add_remove_brick(self):
"""
@@ -356,6 +356,10 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
g.log.info("Successfully restarted glusterd on all nodes %s",
nodes)
+ self.assertTrue(
+ wait_for_glusterd_to_start(self.servers),
+ "Failed to start glusterd on %s" % self.servers)
+
# check the self heal daemon process after restarting glusterd process
g.log.info("Starting to get self-heal daemon process on"
" nodes %s", nodes)
@@ -445,10 +449,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
# select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks']))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# bring bricks offline
g.log.info("Going to bring down the brick process "
@@ -529,10 +530,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks']))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -548,11 +546,14 @@ class SelfHealDaemonProcessTests(GlusterBaseClass):
bricks_to_bring_offline)
# Creating files for all volumes
+ self.all_mounts_procs = []
for mount_obj in self.mounts:
g.log.info("Starting IO on %s:%s",
mount_obj.client_system, mount_obj.mountpoint)
- cmd = ("python %s create_files -f 100 %s/test_dir"
- % (self.script_upload_path, mount_obj.mountpoint))
+ cmd = ("/usr/bin/env python %s create_files -f 100 "
+ "%s/test_dir" % (
+ self.script_upload_path,
+ mount_obj.mountpoint))
proc = g.run_async(mount_obj.client_system, cmd,
user=mount_obj.user)
self.all_mounts_procs.append(proc)
diff --git a/tests/functional/afr/heal/test_self_heal_with_link_files.py b/tests/functional/afr/heal/test_self_heal_with_link_files.py
new file mode 100644
index 000000000..d029c3d9e
--- /dev/null
+++ b/tests/functional/afr/heal/test_self_heal_with_link_files.py
@@ -0,0 +1,405 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online,
+ get_all_bricks)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain,
+ is_heal_complete)
+from glustolibs.gluster.lib_utils import collect_bricks_arequal
+from glustolibs.gluster.volume_libs import (get_subvols,
+ replace_brick_from_volume)
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']])
+class TestHealWithLinkFiles(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+
+ def tearDown(self):
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _create_files_and_dirs_on_mount_point(self, second_attempt=False):
+ """A function to create files and dirs on mount point"""
+ # Create a parent directory test_link_self_heal on mount point
+ if not second_attempt:
+ ret = mkdir(self.first_client,
+ '{}/{}'.format(self.mountpoint,
+ 'test_link_self_heal'))
+ self.assertTrue(ret, "Failed to create dir test_link_self_heal")
+
+ # Create dirctories and files inside directory test_link_self_heal
+ io_cmd = ("for i in `seq 1 5`; do mkdir dir.$i; "
+ "for j in `seq 1 10`; do dd if=/dev/random "
+ "of=dir.$i/file.$j bs=1k count=$j; done; done")
+ if second_attempt:
+ io_cmd = ("for i in `seq 1 5` ; do for j in `seq 1 10`; "
+ "do dd if=/dev/random of=sym_link_dir.$i/"
+ "new_file.$j bs=1k count=$j; done; done ")
+ cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create dirs and files inside")
+
+ def _create_soft_links_to_directories(self):
+ """Create soft links to directories"""
+ cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s "
+ "dir.$i sym_link_dir.$i; done".format(self.mountpoint))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create soft links to dirs")
+
+ def _verify_soft_links_to_dir(self, option=0):
+ """Verify soft links to dir"""
+
+ cmd_list = [
+ ("for i in `seq 1 5`; do stat -c %F sym_link_dir.$i | "
+ "grep -F 'symbolic link'; if [ $? -ne 0 ]; then exit 1;"
+ " fi ; done; for i in `seq 1 5` ; do readlink sym_link_dir.$i | "
+ "grep \"dir.$i\"; if [ $? -ne 0 ]; then exit 1; fi; done; "),
+ ("for i in `seq 1 5`; do for j in `seq 1 10`; do ls "
+ "dir.$i/new_file.$j; if [ $? -ne 0 ]; then exit 1; fi; done; "
+ "done")]
+
+ # Generate command to check according to option
+ if option == 2:
+ verify_cmd = "".join(cmd_list)
+ else:
+ verify_cmd = cmd_list[option]
+
+ cmd = ("cd {}/test_link_self_heal; {}".format(self.mountpoint,
+ verify_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Symlinks aren't proper")
+
+ def _create_hard_links_to_files(self, second_attempt=False):
+ """Create hard links to files"""
+ io_cmd = ("for i in `seq 1 5`;do for j in `seq 1 10`;do ln "
+ "dir.$i/file.$j dir.$i/link_file.$j;done; done")
+ if second_attempt:
+ io_cmd = ("for i in `seq 1 5`; do mkdir new_dir.$i; for j in "
+ "`seq 1 10`; do ln dir.$i/file.$j new_dir.$i/new_file."
+ "$j;done; done;")
+
+ cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to create hard links to files")
+
+ def _verify_hard_links_to_files(self, second_set=False):
+ """Verify if hard links to files"""
+ file_to_compare = "dir.$i/link_file.$j"
+ if second_set:
+ file_to_compare = "new_dir.$i/new_file.$j"
+
+ cmd = ("cd {}/test_link_self_heal;for i in `seq 1 5`; do for j in `seq"
+ " 1 10`;do if [ `stat -c %i dir.$i/file.$j` -ne `stat -c %i "
+ "{}` ];then exit 1; fi; done; done"
+ .format(self.mountpoint, file_to_compare))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, "Failed to verify hard links to files")
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(subvol[0])
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+
+ # Get arequal before getting bricks offline
+ ret, arequals = collect_mounts_arequal([self.mounts[0]])
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ ret, arequals = collect_bricks_arequal([brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, brick_list)
+
+ def _check_heal_is_completed_and_not_in_split_brain(self):
+ """Check if heal is completed and volume not in split brain"""
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check if volume is in split brian or not
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ def _check_if_there_are_files_and_dirs_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _wait_for_heal_is_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _replace_one_random_brick(self):
+ """Replace one random brick from the volume"""
+ brick = choice(get_all_bricks(self.mnode, self.volname))
+ ret = replace_brick_from_volume(self.mnode, self.volname,
+ self.servers, self.all_servers_info,
+ src_brick=brick)
+ self.assertTrue(ret, "Failed to replace brick %s " % brick)
+ g.log.info("Successfully replaced brick %s", brick)
+
+ def test_self_heal_of_hard_links(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create hard links for the files created in step 2.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring brack all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if hard links are proper or not.
+ 12. Do a lookup on mount point.
+ 13. Bring down brick processes accoding to the volume type.
+ 14. Create a second set of hard links to the files.
+ 15. Check if heal info is showing all the files and dirs to be healed.
+ 16. Bring brack all brick processes which were killed.
+ 17. Wait for heal to complete on the volume.
+ 18. Check if heal is complete and check if volume is in split brain.
+ 19. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 20. Verify both set of hard links are proper or not.
+ 21. Do a lookup on mount point.
+ 22. Pick a random brick and replace it.
+ 23. Wait for heal to complete on the volume.
+ 24. Check if heal is complete and check if volume is in split brain.
+ 25. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 26. Verify both set of hard links are proper or not.
+ 27. Do a lookup on mount point.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+ for attempt in (False, True):
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create hardlinks for the files created in step 2
+ self._create_hard_links_to_files(second_attempt=attempt)
+
+ # Check if heal info is showing all the files and dirs to
+ # be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume
+ # type for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Verify if hard links are proper or not
+ self._verify_hard_links_to_files()
+ if attempt:
+ self._verify_hard_links_to_files(second_set=attempt)
+
+ # Pick a random brick and replace it
+ self._replace_one_random_brick()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume
+ # type for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Verify if hard links are proper or not
+ self._verify_hard_links_to_files()
+ self._verify_hard_links_to_files(second_set=True)
+
+ def test_self_heal_of_soft_links(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create soft links for the dirs created in step 2.
+ 6. Verify if soft links are proper or not.
+ 7. Add files through the soft links.
+ 8. Verify if the soft links are proper or not.
+ 9. Check if heal info is showing all the files and dirs to be healed.
+ 10. Bring brack all brick processes which were killed.
+ 11. Wait for heal to complete on the volume.
+ 12. Check if heal is complete and check if volume is in split brain.
+ 13. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 14. Verify if soft links are proper or not.
+ 15. Do a lookup on mount point.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create soft links for the dirs created in step 2
+ self._create_soft_links_to_directories()
+
+ # Verify if soft links are proper or not
+ self._verify_soft_links_to_dir()
+
+ # Add files through the soft links
+ self._create_files_and_dirs_on_mount_point(second_attempt=True)
+
+ # Verify if the soft links are proper or not
+ self._verify_soft_links_to_dir(option=1)
+
+ # Check if heal info is showing all the files and dirs to
+ # be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Verify if soft links are proper or not
+ self._verify_soft_links_to_dir(option=2)
diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
new file mode 100644
index 000000000..37bd2ec52
--- /dev/null
+++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py
@@ -0,0 +1,600 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ are_bricks_online,
+ get_all_bricks)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain,
+ is_heal_complete,
+ enable_granular_heal,
+ disable_granular_heal)
+from glustolibs.gluster.lib_utils import (add_user, del_user, group_del,
+ group_add, collect_bricks_arequal)
+from glustolibs.gluster.volume_ops import get_volume_options
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']])
+class TestHealWithLinkFiles(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ self.first_client = self.mounts[0].client_system
+ self.mountpoint = self.mounts[0].mountpoint
+ self.user_group_created = False
+
+ # If test case running is test_self_heal_meta_data
+ # create user and group
+ test_name_splitted = self.id().split('.')
+ test_id = test_name_splitted[len(test_name_splitted) - 1]
+ if test_id == 'test_self_heal_meta_data':
+
+ # Create non-root group
+ if not group_add(self.first_client, 'qa_all'):
+ raise ExecutionError("Failed to create group qa_all")
+
+ # Create non-root users
+ self.users = ('qa_func', 'qa_system', 'qa_perf')
+ for user in self.users:
+ if not add_user(self.first_client, user, group='qa_all'):
+ raise ExecutionError("Failed to create user {}"
+ .format(user))
+
+ self.user_group_created = True
+ g.log.info("Successfully created all users.")
+
+ # Setup Volume
+ if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to setup and mount volume")
+
+ def tearDown(self):
+
+ # Delete non-root users and group if created
+ if self.user_group_created:
+
+ # Delete non-root users
+ for user in self.users:
+ del_user(self.first_client, user)
+ g.log.info("Successfully deleted all users")
+
+ # Delete non-root group
+ group_del(self.first_client, 'qa_all')
+
+ if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+ raise ExecutionError("Failed to cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def _set_granular_heal_to_on_or_off(self, enabled=False):
+ """Set granular heal to ON or OFF"""
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if enabled:
+ if granular['cluster.granular-entry-heal'] != 'on':
+ ret = enable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+ else:
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to off")
+
+ def _run_cmd(self, io_cmd, err_msg):
+ """Run cmd and show error message if it fails"""
+ cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd))
+ ret, _, _ = g.run(self.first_client, cmd)
+ self.assertFalse(ret, err_msg)
+
+ def _create_files_and_dirs_on_mount_point(self, index, second_set=False):
+ """A function to create files and dirs on mount point"""
+ # Create a parent directory test_self_heal on mount point
+ if not second_set:
+ ret = mkdir(self.first_client, '{}/{}'.format(
+ self.mountpoint, 'test_self_heal'))
+ self.assertTrue(ret, "Failed to create dir test_self_heal")
+
+ # Create dirctories and files inside directory test_self_heal
+ io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random"
+ " of=file.$i count=1K bs=$i; done",
+
+ "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;"
+ " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j"
+ ";done;done",
+
+ "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq "
+ "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;"
+ " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k"
+ " bs=1k count=$k; done; done; done;",
+
+ "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq"
+ " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j "
+ "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i"
+ " count=1K bs=$i; done ;")
+ self._run_cmd(
+ io_cmd[index], "Failed to create dirs and files inside")
+
+ def _delete_files_and_dirs(self):
+ """Delete files and dirs from mount point"""
+ io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done")
+ self._run_cmd(io_cmd, "Failed to delete dirs and files")
+
+ def _rename_files_and_dirs(self):
+ """Rename files and dirs from mount point"""
+ io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;"
+ " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j "
+ "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i "
+ "renamed_dir.$i; done;")
+ self._run_cmd(io_cmd, "Failed to rename dirs and files")
+
+ def _change_meta_deta_of_dirs_and_files(self):
+ """Change meta data of dirs and files"""
+ cmds = (
+ # Change permission
+ "for i in `seq 1 100`; do chmod 555 dir.$i; done; "
+ "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 "
+ "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for "
+ "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;",
+
+ # Change ownership
+ "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; "
+ "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; "
+ "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;",
+
+ # Change group
+ "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd,
+ "Failed to change meta data on dirs and files")
+ g.log.info("Successfully changed meta data on dirs and files")
+
+ def _verify_meta_data_of_files_and_dirs(self):
+ """Verify meta data of files and dirs"""
+ cmds = (
+ # Verify permissions
+ "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";"
+ " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do "
+ "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]"
+ "; then exit 1; fi; done; done; for i in `seq 51 100`; do "
+ "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then "
+ "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j"
+ " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;"
+ "done;",
+
+ # Verify ownership
+ "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;"
+ " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F "
+ "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;",
+
+ # Verify group
+ "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in "
+ "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F "
+ "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;")
+
+ for io_cmd in cmds:
+ self._run_cmd(io_cmd, "Meta data of dirs and files not proper")
+
+ def _set_and_remove_extended_attributes(self, remove=False):
+ """Set and remove extended attributes"""
+ # Command to set extended attribute to files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 "
+ "5`;do setfattr -n trusted.name -v "
+ "testing_xattr_selfheal_on_files dir.$i/file.$j; done; "
+ "done;")
+ err_msg = "Failed to set extended attributes to files and dirs"
+ if remove:
+ # Command to remove extended attribute set on files and dirs
+ io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name "
+ "dir.$i; for j in `seq 1 5`; do setfattr -x "
+ "trusted.name dir.$i/file.$j ; done ; done ;")
+ err_msg = "Failed to remove extended attributes to files and dirs"
+
+ self._run_cmd(io_cmd, err_msg)
+
+ def _verify_if_extended_attributes_are_proper(self, remove=False):
+ """Verify if extended attributes are set or remove properly"""
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text "
+ "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? "
+ "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do "
+ "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F "
+ "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then "
+ "exit 1; fi; done; done;")
+ err_msg = "Extended attributes on files and dirs are not proper"
+ if remove:
+ io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e "
+ "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in"
+ " `seq 1 5`; do getfattr -n trusted.name -e text "
+ "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; "
+ "done;")
+ err_msg = "Extended attributes set to files and dirs not removed"
+ self._run_cmd(io_cmd, err_msg)
+
+ def _remove_files_and_create_dirs_with_the_same_name(self):
+ """Remove files and create dirs with the same name"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir "
+ "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;")
+ self._run_cmd(io_cmd,
+ "Failed to remove files and create dirs with same name")
+
+ def _verify_if_dirs_are_proper_or_not(self):
+ """Verify if dirs are proper or not"""
+ io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in "
+ "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | "
+ "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; "
+ "done; done; done;")
+ self._run_cmd(io_cmd, "Dirs created instead of files aren't proper")
+
+ def _bring_bricks_offline(self):
+ """Brings bricks offline and confirms if they are offline"""
+ # Select bricks to bring offline from a replica set
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ subvols = subvols_dict['volume_subvols']
+ self.bricks_to_bring_offline = []
+ for subvol in subvols:
+ self.bricks_to_bring_offline.append(subvol[0])
+
+ # Bring bricks offline
+ ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ self.bricks_to_bring_offline)
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % self.bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ self.bricks_to_bring_offline)
+
+ def _restart_volume_and_bring_all_offline_bricks_online(self):
+ """Restart volume and bring all offline bricks online"""
+ ret = bring_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=[
+ 'volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ self.bricks_to_bring_offline)
+
+ # Check if bricks are back online or not
+ ret = are_bricks_online(self.mnode, self.volname,
+ self.bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks not online %s even after restart' %
+ self.bricks_to_bring_offline)
+
+ g.log.info('Bringing bricks %s online is successful',
+ self.bricks_to_bring_offline)
+
+ def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal,
+ brick_list):
+ """
+ Compare an inital arequal checksum with bricks from a given brick list
+ """
+ init_val = arequal[0].splitlines()[-1].split(':')[-1]
+ ret, arequals = collect_bricks_arequal(brick_list)
+ self.assertTrue(ret, 'Failed to get arequal on bricks')
+ for brick_arequal in arequals:
+ brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
+ self.assertEqual(init_val, brick_total, 'Arequals not matching')
+
+ @staticmethod
+ def _add_dir_path_to_brick_list(brick_list):
+ """Add test_self_heal at the end of brick path"""
+ dir_brick_list = []
+ for brick in brick_list:
+ dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal'))
+ return dir_brick_list
+
+ def _check_arequal_checksum_for_the_volume(self):
+ """
+ Check if arequals of mount point and bricks are
+ are the same.
+ """
+ if self.volume_type == "replicated":
+ # Check arequals for "replicated"
+ brick_list = get_all_bricks(self.mnode, self.volname)
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+
+ # Get arequal before getting bricks offline
+ work_dir = '{}/test_self_heal'.format(self.mountpoint)
+ ret, arequals = collect_mounts_arequal([self.mounts[0]],
+ path=work_dir)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Get arequal on bricks and compare with mount_point_total
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ # Check arequals for "distributed-replicated"
+ if self.volume_type == "distributed-replicated":
+ # Get the subvolumes
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ num_subvols = len(subvols_dict['volume_subvols'])
+
+ # Get arequals and compare
+ for i in range(0, num_subvols):
+ # Get arequal for first brick
+ brick_list = subvols_dict['volume_subvols'][i]
+ dir_brick_list = self._add_dir_path_to_brick_list(brick_list)
+ ret, arequals = collect_bricks_arequal([dir_brick_list[0]])
+ self.assertTrue(ret, 'Failed to get arequal on first brick')
+
+ # Get arequal for every brick and compare with first brick
+ self._check_arequal_on_bricks_with_a_specific_arequal(
+ arequals, dir_brick_list)
+
+ def _check_heal_is_completed_and_not_in_split_brain(self):
+ """Check if heal is completed and volume not in split brain"""
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check if volume is in split brian or not
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ def _check_if_there_are_files_and_dirs_to_be_healed(self):
+ """Check if there are files and dirs to be healed"""
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertFalse(ret, 'Heal is completed')
+ g.log.info('Heal is pending')
+
+ def _wait_for_heal_is_completed(self):
+ """Check if heal is completed"""
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ def _check_heal_status_restart_vol_wait_and_check_data(self):
+ """
+ Perform repatative steps mentioned below:
+ 1 Check if heal info is showing all the files and dirs to be healed
+ 2 Bring back all brick processes which were killed
+ 3 Wait for heal to complete on the volume
+ 4 Check if heal is complete and check if volume is in split brain
+ 5 Collect and compare arequal-checksum according to the volume type
+ for bricks
+ """
+ # Check if heal info is showing all the files and dirs to be healed
+ self._check_if_there_are_files_and_dirs_to_be_healed()
+
+ # Bring back all brick processes which were killed
+ self._restart_volume_and_bring_all_offline_bricks_online()
+
+ # Wait for heal to complete on the volume
+ self._wait_for_heal_is_completed()
+
+ # Check if heal is complete and check if volume is in split brain
+ self._check_heal_is_completed_and_not_in_split_brain()
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ def _run_test_self_heal_entry_heal(self):
+ """Run steps of test_self_heal_entry_heal"""
+ # Create a directory and create files and directories inside it on
+ # mount point
+ self._create_files_and_dirs_on_mount_point(0)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Create a new set of files and directories on mount point
+ self._create_files_and_dirs_on_mount_point(3, second_set=True)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Delete files and directories from mount point
+ self._delete_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Rename the existing files and dirs
+ self._rename_files_and_dirs()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ def test_self_heal_entry_heal(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Create a new set of files and directories on mount point.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Bring down brick processes accoding to the volume type.
+ 12. Delete files and directories from mount point.
+ 13. Check if heal info is showing all the files and dirs to be healed.
+ 14. Bring back all brick processes which were killed.
+ 15. Wait for heal to complete on the volume.
+ 16. Check if heal is complete and check if volume is in split brain.
+ 17. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 18. Bring down brick processes accoding to the volume type.
+ 19. Rename the existing files and dirs.
+ 20. Check if heal info is showing all the files and dirs to be healed.
+ 21. Bring back all brick processes which were killed.
+ 22. Wait for heal to complete on the volume.
+ 23. Check if heal is complete and check if volume is in split brain.
+ 24. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+
+ Note:
+ Do this test with both Granular-entry-heal set enable and disable.
+ """
+ for value in (False, True):
+ if value:
+ # Cleanup old data from mount point
+ ret, _, _ = g.run(self.first_client,
+ 'rm -rf {}/*'.format(self.mountpoint))
+ self.assertFalse(ret, 'Failed to cleanup mount point')
+ g.log.info("Testing with granular heal set to enabled")
+ self._set_granular_heal_to_on_or_off(enabled=value)
+ self._run_test_self_heal_entry_heal()
+
+ def test_self_heal_meta_data(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Change the meta data of files and dirs.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if the meta data of files and dirs.
+ 12. Bring down brick processes accoding to the volume type.
+ 13. Set extended attributes on the files and dirs.
+ 14. Verify if the extended attributes are set properly or not.
+ 15. Check if heal info is showing all the files and dirs to be healed.
+ 16. Bring back all brick processes which were killed.
+ 17. Wait for heal to complete on the volume.
+ 18. Check if heal is complete and check if volume is in split brain.
+ 19. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 20. Verify if extended attributes are consitent or not.
+ 21. Bring down brick processes accoding to the volume type
+ 22. Remove extended attributes on the files and dirs.
+ 23. Verify if extended attributes were removed properly.
+ 24. Check if heal info is showing all the files and dirs to be healed.
+ 25. Bring back all brick processes which were killed.
+ 26. Wait for heal to complete on the volume.
+ 27. Check if heal is complete and check if volume is in split brain.
+ 28. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 29. Verify if extended attributes are removed or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(1)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Change the meta data of files and dirs
+ self._change_meta_deta_of_dirs_and_files()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if the meta data of files and dirs
+ self._verify_meta_data_of_files_and_dirs()
+
+ for value in (False, True):
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Set or remove extended attributes on the files and dirs
+ self._set_and_remove_extended_attributes(remove=value)
+
+ # Verify if the extended attributes are set properly or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if extended attributes are consitent or not
+ self._verify_if_extended_attributes_are_proper(remove=value)
+
+ def test_self_heal_of_dir_with_files_removed(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Create a directory and create files and directories inside it
+ on mount point.
+ 3. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 4. Bring down brick processes accoding to the volume type.
+ 5. Remove all files and create dir which have name of files.
+ 6. Check if heal info is showing all the files and dirs to be healed.
+ 7. Bring back all brick processes which were killed.
+ 8. Wait for heal to complete on the volume.
+ 9. Check if heal is complete and check if volume is in split brain.
+ 10. Collect and compare arequal-checksum according to the volume type
+ for bricks.
+ 11. Verify if dirs are healed properly or not.
+ """
+ # Create a directory and create files and directories inside it
+ # on mount point
+ self._create_files_and_dirs_on_mount_point(2)
+
+ # Collect and compare arequal-checksum according to the volume type
+ # for bricks
+ self._check_arequal_checksum_for_the_volume()
+
+ # Bring down brick processes accoding to the volume type
+ self._bring_bricks_offline()
+
+ # Remove all files and create dir which have name of files
+ self._remove_files_and_create_dirs_with_the_same_name()
+
+ self._check_heal_status_restart_vol_wait_and_check_data()
+
+ # Verify if dirs are healed properly or not
+ self._verify_if_dirs_are_proper_or_not()
diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
new file mode 100644
index 000000000..a449e396f
--- /dev/null
+++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py
@@ -0,0 +1,250 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+""" Description:
+ Test Cases in this module tests the self heal daemon process.
+"""
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (
+ bring_bricks_offline, bring_bricks_online,
+ select_volume_bricks_to_bring_offline, get_online_bricks_list)
+from glustolibs.gluster.heal_libs import (
+ get_self_heal_daemon_pid, is_shd_daemonized,
+ monitor_heal_completion, bring_self_heal_daemon_process_offline,
+ disable_granular_heal)
+from glustolibs.gluster.heal_ops import (get_heal_info_summary,
+ trigger_heal_full)
+from glustolibs.io.utils import validate_io_procs
+from glustolibs.misc.misc_libs import upload_scripts
+from glustolibs.gluster.volume_ops import (set_volume_options,
+ get_volume_options)
+from glustolibs.gluster.mount_ops import mount_volume, umount_volume
+
+
+@runs_on([['replicated'], ['glusterfs']])
+class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass):
+ """
+ SelfHealDaemonProcessTestsWithSingleVolume contains tests which
+ verifies the self-heal daemon process on a single volume
+ """
+
+ def setUp(self):
+
+ # Calling GlusterBaseClass setUpClass
+ self.get_super_method(self, 'setUp')()
+
+ # Upload script
+ self.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(self.clients, [self.script_upload_path])
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts to clients")
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ # Verify glustershd process releases its parent process
+ ret = is_shd_daemonized(self.servers)
+ if not ret:
+ raise ExecutionError("Self Heal Daemon process was still"
+ " holding parent process.")
+ g.log.info("Self Heal Daemon processes are online")
+
+ def tearDown(self):
+ """
+ Clean up the volume and umount volume from client
+ """
+ # Stopping the volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
+ g.log.info("Successful in Unmount Volume and Cleanup Volume")
+
+ # Calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_server_side_healing_happens_only_when_glustershd_running(self):
+ """
+ Test Script which verifies that the server side healing must happen
+ only if the heal daemon is running on the node where source brick
+ resides.
+
+ * Create and start the Replicate volume
+ * Check the glustershd processes - Only 1 glustershd should be listed
+ * Bring down the bricks without affecting the cluster
+ * Create files on volume
+ * kill the glustershd on node where bricks is running
+ * bring the bricks up which was killed in previous steps
+ * check the heal info - heal info must show pending heal info, heal
+ shouldn't happen since glustershd is down on source node
+ * issue heal
+ * trigger client side heal
+ * heal should complete successfully
+ """
+ # pylint: disable=too-many-locals,too-many-statements,too-many-lines
+
+ # Disable granular heal if not disabled already
+ granular = get_volume_options(self.mnode, self.volname,
+ 'granular-entry-heal')
+ if granular['cluster.granular-entry-heal'] == 'on':
+ ret = disable_granular_heal(self.mnode, self.volname)
+ self.assertTrue(ret,
+ "Unable to set granular-entry-heal to on")
+
+ # Setting Volume options
+ options = {"metadata-self-heal": "on",
+ "entry-self-heal": "on",
+ "data-self-heal": "on"}
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, 'Failed to set options %s' % options)
+ g.log.info("Successfully set %s for volume %s",
+ options, self.volname)
+
+ # Check the self-heal daemon process
+ ret, pids = get_self_heal_daemon_pid(self.servers)
+ self.assertTrue(ret, ("Either No self heal daemon process found or "
+ "more than One self heal daemon process "
+ "found : %s" % pids))
+ g.log.info("Successful in verifying self heal daemon process"
+ " on all nodes %s", self.servers)
+
+ # Select the bricks to bring offline
+ bricks_to_bring_offline = (select_volume_bricks_to_bring_offline
+ (self.mnode, self.volname))
+ g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline)
+
+ # Bring down the selected bricks
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, "Failed to bring down the bricks")
+ g.log.info("Brought down the brick process "
+ "for %s", bricks_to_bring_offline)
+
+ # Write files on all mounts
+ all_mounts_procs, num_files_to_write = [], 100
+ for mount_obj in self.mounts:
+ cmd = ("/usr/bin/env python %s create_files "
+ "-f %d --base-file-name file %s" % (self.script_upload_path,
+ num_files_to_write,
+ mount_obj.mountpoint))
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ all_mounts_procs.append(proc)
+
+ # Validate IO
+ ret = validate_io_procs(all_mounts_procs, self.mounts)
+ self.assertTrue(ret, "IO failed on some of the clients")
+ g.log.info("IO is successful on all mounts")
+
+ # Get online bricks list
+ online_bricks = get_online_bricks_list(self.mnode, self.volname)
+ g.log.info("Online Bricks for volume %s : %s",
+ self.volname, online_bricks)
+
+ # Get the nodes where bricks are running
+ bring_offline_glustershd_nodes = []
+ for brick in online_bricks:
+ bring_offline_glustershd_nodes.append(brick.split(":")[0])
+ g.log.info("self heal deamon on nodes %s to be killed",
+ bring_offline_glustershd_nodes)
+
+ # Kill the self heal daemon process on nodes
+ ret = bring_self_heal_daemon_process_offline(
+ bring_offline_glustershd_nodes)
+ self.assertTrue(ret, ("Unable to bring self heal daemon process"
+ " offline for nodes %s"
+ % bring_offline_glustershd_nodes))
+ g.log.info("Sucessfully brought down self heal process for "
+ "nodes %s", bring_offline_glustershd_nodes)
+
+ # Check the heal info
+ heal_info = get_heal_info_summary(self.mnode, self.volname)
+ g.log.info("Successfully got heal info %s for the volume %s",
+ heal_info, self.volname)
+
+ # Bring bricks online
+ ret = bring_bricks_online(self.mnode, self.volname,
+ bricks_to_bring_offline, 'glusterd_restart')
+ self.assertTrue(ret, ("Failed to bring bricks: %s online"
+ % bricks_to_bring_offline))
+
+ # Issue heal
+ ret = trigger_heal_full(self.mnode, self.volname)
+ self.assertFalse(ret, ("Able to trigger heal on volume %s where "
+ "self heal daemon is not running"
+ % self.volname))
+ g.log.info("Expected : Unable to trigger heal on volume %s where "
+ "self heal daemon is not running", self.volname)
+
+ # Wait for 130 sec to heal
+ ret = monitor_heal_completion(self.mnode, self.volname, 130)
+ self.assertFalse(ret, ("Heal Completed on volume %s" % self.volname))
+ g.log.info("Expected : Heal pending on volume %s", self.volname)
+
+ # Check the heal info
+ heal_info_after_triggering_heal = get_heal_info_summary(self.mnode,
+ self.volname)
+ g.log.info("Successfully got heal info for the volume %s",
+ self.volname)
+
+ # Compare with heal pending with the files wrote
+ for node in online_bricks:
+ self.assertGreaterEqual(
+ int(heal_info_after_triggering_heal[node]['numberOfEntries']),
+ num_files_to_write,
+ ("Some of the files are healed from source bricks %s where "
+ "self heal daemon is not running" % node))
+ g.log.info("EXPECTED: No files are healed from source bricks where "
+ "self heal daemon is not running")
+
+ # Unmount and Mount volume again as volume options were set
+ # after mounting the volume
+ for mount_obj in self.mounts:
+ ret, _, _ = umount_volume(mount_obj.client_system,
+ mount_obj.mountpoint)
+ self.assertEqual(ret, 0, "Failed to unmount %s"
+ % mount_obj.client_system)
+ ret, _, _ = mount_volume(self.volname,
+ mtype='glusterfs',
+ mpoint=mount_obj.mountpoint,
+ mserver=self.mnode,
+ mclient=mount_obj.client_system)
+ self.assertEqual(ret, 0, "Failed to mount %s"
+ % mount_obj.client_system)
+
+ all_mounts_procs = []
+ for mount_obj in self.mounts:
+ cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;"
+ " done " % (mount_obj.mountpoint))
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ all_mounts_procs.append(proc)
+
+ # Validate IO
+ ret = validate_io_procs(all_mounts_procs, self.mounts)
+ self.assertTrue(ret, "Reads failed on some of the clients")
+ g.log.info("Reads successful on all mounts")
+
+ # Wait for heal to complete
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, "Unable to heal the pending entries")
+ g.log.info("Successfully healed the pending entries for volume %s",
+ self.volname)