summaryrefslogtreecommitdiffstats
path: root/tests/functional/arbiter
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional/arbiter')
-rwxr-xr-xtests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py118
-rwxr-xr-xtests/functional/arbiter/brick_cases/test_rmvrf_files.py5
-rw-r--r--tests/functional/arbiter/test_afr_read_write.py192
-rw-r--r--tests/functional/arbiter/test_brick_down_cyclic.py140
-rw-r--r--tests/functional/arbiter/test_data_delete.py110
-rw-r--r--tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py10
-rwxr-xr-xtests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py10
-rwxr-xr-xtests/functional/arbiter/test_data_self_heal_algorithm_full_default.py10
-rw-r--r--tests/functional/arbiter/test_data_self_heal_daemon_off.py5
-rw-r--r--tests/functional/arbiter/test_entry_self_heal_heal_command.py6
-rw-r--r--tests/functional/arbiter/test_gfid_self_heal.py206
-rw-r--r--tests/functional/arbiter/test_gluster_clone_heal.py209
-rwxr-xr-xtests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py202
-rwxr-xr-xtests/functional/arbiter/test_metadata_self_heal.py41
-rw-r--r--tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py244
-rwxr-xr-xtests/functional/arbiter/test_mount_point_while_deleting_files.py67
-rw-r--r--tests/functional/arbiter/test_self_heal_50k_files.py140
-rw-r--r--tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py19
-rw-r--r--tests/functional/arbiter/test_self_heal_daemon.py256
-rwxr-xr-xtests/functional/arbiter/test_self_heal_differing_in_file_type.py19
-rw-r--r--tests/functional/arbiter/test_self_heal_symbolic_links.py19
-rw-r--r--tests/functional/arbiter/test_split_brain.py165
-rw-r--r--tests/functional/arbiter/test_verify_metadata_and_data_heal.py297
23 files changed, 2357 insertions, 133 deletions
diff --git a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py
index 947c6eef7..24c014502 100755
--- a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py
+++ b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py
@@ -22,6 +22,12 @@ from glustolibs.gluster.volume_libs import (
expand_volume, wait_for_volume_process_to_be_online,
verify_all_process_of_volume_are_online, shrink_volume, get_subvols)
from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.io.utils import run_linux_untar
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete,
+ is_volume_in_split_brain)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_ops import trigger_heal
@runs_on([['replicated', 'distributed-replicated'],
@@ -37,22 +43,29 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass):
# Calling GlusterBaseClass setUp
self.get_super_method(self, 'setUp')()
+ # Set I/O flag to false
+ self.is_io_running = False
+
# Setup Volume
- g.log.info("Starting to Setup Volume")
- ret = self.setup_volume()
+ g.log.info("Starting to Setup and Mount Volume")
+ # Creating Volume and mounting the volume
+ ret = self.setup_volume_and_mount_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Setup_Volume")
- g.log.info("Successful in Setup Volume")
+ raise ExecutionError("Volume creation or mount failed: %s"
+ % self.volname)
self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
def tearDown(self):
- # Cleanup Volume
- g.log.info("Starting to Unmount Volume and Cleanup Volume")
- ret = self.cleanup_volume()
+ # Wait for I/O if not completed
+ if self.is_io_running:
+ if not self._wait_for_untar_completion():
+ g.log.error("I/O failed to stop on clients")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Cleanup Volume")
- g.log.info("Successful Cleanup Volume")
+ raise ExecutionError("Unable to delete volume % s" % self.volname)
# Calling GlusterBaseClass tearDown
self.get_super_method(self, 'tearDown')()
@@ -67,10 +80,22 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass):
g.log.info('Clearing brick %s is successful', brick)
g.log.info('Clearing for all brick is successful')
- def test_replicated_to_arbiter_volume(self):
+ def _wait_for_untar_completion(self):
+ """Wait for untar to complete"""
+ has_process_stopped = []
+ for proc in self.io_process:
+ try:
+ ret, _, _ = proc.async_communicate()
+ if not ret:
+ has_process_stopped.append(False)
+ has_process_stopped.append(True)
+ except ValueError:
+ has_process_stopped.append(True)
+ return all(has_process_stopped)
+
+ def _convert_replicated_to_arbiter_volume(self):
"""
- Description:-
- Reduce the replica count from replica 3 to arbiter
+ Helper module to convert replicated to arbiter volume.
"""
# pylint: disable=too-many-statements
# Remove brick to reduce the replica count from replica 3
@@ -99,7 +124,7 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass):
g.log.info("Adding bricks to convert to Arbiter Volume")
replica_arbiter = {'replica_count': 1, 'arbiter_count': 1}
ret = expand_volume(self.mnode, self.volname, self.servers,
- self.all_servers_info, add_to_hot_tier=False,
+ self.all_servers_info, force=True,
**replica_arbiter)
self.assertTrue(ret, "Failed to expand the volume %s" % self.volname)
g.log.info("Changing volume to arbiter volume is successful %s",
@@ -119,3 +144,70 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass):
self.assertTrue(ret, "Volume %s : All process are not online"
% self.volname)
g.log.info("Volume %s : All process are online", self.volname)
+
+ def test_replicated_to_arbiter_volume(self):
+ """
+ Description:-
+ Reduce the replica count from replica 3 to arbiter
+ """
+ # pylint: disable=too-many-statements
+ self._convert_replicated_to_arbiter_volume()
+
+ def test_replica_to_arbiter_volume_with_io(self):
+ """
+ Description: Replica 3 to arbiter conversion with ongoing IO's
+
+ Steps :
+ 1) Create a replica 3 volume and start volume.
+ 2) Set client side self heal off.
+ 3) Fuse mount the volume.
+ 4) Create directory dir1 and write data.
+ Example: untar linux tar from the client into the dir1
+ 5) When IO's is running, execute remove-brick command,
+ and convert replica 3 to replica 2 volume
+ 6) Execute add-brick command and convert to arbiter volume,
+ provide the path of new arbiter brick.
+ 7) Issue gluster volume heal.
+ 8) Heal should be completed with no files in split-brain.
+ """
+
+ # pylint: disable=too-many-statements
+ # Create a dir to start untar
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar
+ self.io_process = run_linux_untar(self.clients[0],
+ self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.is_io_running = True
+
+ # Convert relicated to arbiter volume
+ self._convert_replicated_to_arbiter_volume()
+
+ # Wait for IO to complete.
+ ret = self._wait_for_untar_completion()
+ self.assertFalse(ret, "IO didn't complete or failed on client")
+ self.is_io_running = False
+
+ # Start healing
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not started')
+ g.log.info('Healing is started')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
diff --git a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py
index 9dbaa74fc..8d7304b0b 100755
--- a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py
+++ b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py
@@ -145,10 +145,7 @@ class TestRmrfMount(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Killing one brick from the volume set
g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline)
diff --git a/tests/functional/arbiter/test_afr_read_write.py b/tests/functional/arbiter/test_afr_read_write.py
new file mode 100644
index 000000000..09e6a3a2a
--- /dev/null
+++ b/tests/functional/arbiter/test_afr_read_write.py
@@ -0,0 +1,192 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import sample
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import (
+ verify_all_process_of_volume_are_online,
+ wait_for_volume_process_to_be_online)
+from glustolibs.gluster.brick_libs import (
+ get_all_bricks,
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.io.utils import validate_io_procs
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestAfrReadWrite(GlusterBaseClass):
+
+ """
+ Description:
+ Arbiter test writes and reads from a file
+ """
+ def setUp(self):
+ # Calling GlusterBaseClass
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Cleanup and umount volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def _bring_bricks_online_heal(self, mnode, volname, bricks_list):
+ """
+ Bring bricks online and monitor heal completion
+ """
+ # Bring bricks online
+ ret = bring_bricks_online(
+ mnode, volname, bricks_list,
+ bring_bricks_online_methods=['volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks online')
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(mnode, volname)
+ self.assertTrue(ret, ("Failed to wait for volume {} processes to "
+ "be online".format(volname)))
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(mnode, volname)
+ self.assertTrue(ret, ("Volume {} : All process are not online".format
+ (volname)))
+ g.log.info("Volume %s : All process are online", volname)
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(mnode, volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(mnode, volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+
+ def test_afr_read_write(self):
+ """
+ Test read and write of file
+ Description:
+ - Get the bricks from the volume
+ - Creating directory test_write_and_read_file
+ - Write from 1st client
+ - Read from 2nd client
+ - Select brick to bring offline
+ - Bring brick offline
+ - Validating IO's on client1
+ - Validating IO's on client2
+ - Bring bricks online
+ - Wait for volume processes to be online
+ - Verify volume's all process are online
+ - Monitor heal completion
+ - Check for split-brain
+ - Bring 2nd brick offline
+ - Check if brick is offline
+ - Write from 1st client
+ - Read from 2nd client
+ - Bring bricks online
+ - Wait for volume processes to be online
+ - Verify volume's all process are online
+ - Monitor heal completion
+ - Check for split-brain
+
+ - Get arequal after getting bricks online
+ """
+ # pylint: disable=too-many-branches,too-many-statements,too-many-locals
+ # Get the bricks from the volume
+ bricks_list = get_all_bricks(self.mnode, self.volname)
+ g.log.info("Brick List : %s", bricks_list)
+
+ # Creating directory test_write_and_read_file
+ ret = mkdir(self.mounts[0].client_system,
+ "{}/test_write_and_read_file"
+ .format(self.mounts[0].mountpoint))
+ self.assertTrue(ret, "Failed to create directory")
+ g.log.info("Directory 'test_write_and_read_file' on %s created "
+ "successfully", self.mounts[0])
+
+ # Write from 1st client
+ cmd_to_write = (
+ 'cd %s/test_write_and_read_file ; for i in `seq 1 5000` ;'
+ 'do echo -e "Date:`date`\n" >> test_file ;echo -e "'
+ '`cal`\n" >> test_file ; done ; cd ..'
+ % self.mounts[0].mountpoint)
+ proc1 = g.run_async(self.mounts[0].client_system,
+ cmd_to_write)
+
+ # Read from 2nd client
+ cmd = ('cd %s/ ;for i in {1..30};'
+ 'do cat test_write_and_read_file/test_file;done'
+ % self.mounts[1].mountpoint)
+ proc2 = g.run_async(self.mounts[1].client_system, cmd)
+
+ # Bring brick offline
+ bricks_to_bring_offline = sample(bricks_list, 2)
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[0])
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline))
+
+ # Check brick is offline
+ ret = are_bricks_offline(self.mnode, self.volname,
+ [bricks_to_bring_offline[0]])
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline[0]))
+
+ # Validating IO's
+ for proc, mount in zip([proc1, proc2], self.mounts):
+ ret = validate_io_procs([proc], mount)
+ self.assertTrue(ret, "IO failed on client")
+ g.log.info("Successfully validated all IO's")
+
+ self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list)
+
+ # Bring down second brick
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[1])
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline[1]))
+
+ # Check if brick is offline
+ ret = are_bricks_offline(self.mnode, self.volname,
+ [bricks_to_bring_offline[1]])
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline[1]))
+
+ # Write from 1st client
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd_to_write)
+ self.assertEqual(ret, 0, "Failed to write to file")
+ g.log.info("Successfully written to file")
+
+ # Read from 2nd client
+ cmd = ('cd %s/ ;cat test_write_and_read_file/test_file'
+ % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to read file on mountpoint")
+ g.log.info("Successfully read file on mountpoint")
+
+ self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list)
diff --git a/tests/functional/arbiter/test_brick_down_cyclic.py b/tests/functional/arbiter/test_brick_down_cyclic.py
new file mode 100644
index 000000000..8639a4dc5
--- /dev/null
+++ b/tests/functional/arbiter/test_brick_down_cyclic.py
@@ -0,0 +1,140 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# pylint: disable=too-many-statements, too-many-locals
+import time
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.heal_libs import (is_volume_in_split_brain,
+ is_heal_complete)
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ get_all_bricks,
+ are_bricks_online)
+from glustolibs.gluster.heal_libs import (
+ monitor_heal_completion, are_all_self_heal_daemons_are_online)
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestBrickDownHeal(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Setup Volume and Mount Volume
+ ret = cls.setup_volume_and_mount_volume(cls.mounts, True)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+
+ @classmethod
+ def tearDownClass(cls):
+ """
+ Cleanup Volume
+ """
+ ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to create volume")
+
+ cls.get_super_method(cls, 'tearDownClass')()
+
+ def test_brick_down_heal(self):
+ """
+ - Run IO's from client on a single file
+ - Now bring down bricks in cyclic order
+ - kill brick 1, sleep for 5 seconds, bring brick 1 up, wait for 10s
+ - Now repeat step3 for brick2 and brick 3
+ - Repeat the cycle a few times
+ - Trigger heal, check for split brain using command
+ """
+ # Write IO's
+ self.all_mounts_procs = []
+ cmd = ("for i in `seq 1 10`;"
+ "do dd if=/dev/urandom of=%s/file$i bs=1K count=1;"
+ "done" % self.mounts[0].mountpoint)
+ proc = g.run_async(self.mounts[0].client_system, cmd)
+ self.all_mounts_procs.append(proc)
+
+ # Killing bricks in cyclic order
+ bricks_list = get_all_bricks(self.mnode, self.volname)
+
+ # Total number of cyclic brick-down cycles to be executed
+ number_of_cycles = 0
+ while number_of_cycles < 3:
+ number_of_cycles += 1
+ for brick in bricks_list:
+ # Bring brick offline
+ g.log.info('Bringing bricks %s offline', brick)
+ ret = bring_bricks_offline(self.volname, [brick])
+ self.assertTrue(ret, ("Failed to bring bricks %s offline"
+ % brick))
+
+ ret = are_bricks_offline(self.mnode, self.volname, [brick])
+ self.assertTrue(ret, 'Bricks %s are not offline' % brick)
+ g.log.info('Bringing bricks %s offline is successful', brick)
+
+ # Introducing 5 second sleep when brick is down
+ g.log.info("Waiting for 5 seconds, with ongoing IO while "
+ "brick %s is offline", brick)
+ ret = time.sleep(5)
+
+ # Bring brick online
+ g.log.info('Bringing bricks %s online', brick)
+ ret = bring_bricks_online(self.mnode, self.volname, [brick])
+ self.assertTrue(ret, ("Failed to bring bricks %s online "
+ % brick))
+ g.log.info('Bricks %s are online', brick)
+
+ # Introducing 10 second sleep when brick is up
+ g.log.info("Waiting for 10 seconds,when "
+ "brick %s is online", brick)
+ ret = time.sleep(10)
+
+ # Check if bricks are online
+ ret = are_bricks_online(self.mnode, self.volname, bricks_list)
+ self.assertTrue(ret, 'Bricks %s are not online' % bricks_list)
+ g.log.info('Bricks %s are online', bricks_list)
+
+ # Check daemons
+ g.log.info('Checking daemons...')
+ ret = are_all_self_heal_daemons_are_online(self.mnode,
+ self.volname)
+ self.assertTrue(ret, ("Some of the self-heal Daemons are "
+ "offline"))
+ g.log.info('All self-heal Daemons are online')
+
+ # Trigger self heal
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Unable to trigger heal on volume')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
diff --git a/tests/functional/arbiter/test_data_delete.py b/tests/functional/arbiter/test_data_delete.py
new file mode 100644
index 000000000..4753efcbc
--- /dev/null
+++ b/tests/functional/arbiter/test_data_delete.py
@@ -0,0 +1,110 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import get_all_bricks
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestDataDelete(GlusterBaseClass):
+ """
+ Description:
+ Test data delete/rename on arbiter volume
+ """
+ def setUp(self):
+ # Calling GlusterBaseClass
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+
+ # Cleanup and umount volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_data_delete(self):
+ """
+ Test steps:
+ - Get brick list
+ - Create files and rename
+ - Check if brick path contains old files
+ - Delete files from mountpoint
+ - Check .glusterfs/indices/xattrop is empty
+ - Check if brickpath is empty
+ """
+
+ # pylint: disable=too-many-branches,too-many-statements,too-many-locals
+ # Get the bricks from the volume
+ bricks_list = get_all_bricks(self.mnode, self.volname)
+ g.log.info("Brick List : %s", bricks_list)
+
+ # Create files and rename
+ cmd = ('cd %s ;for i in `seq 1 100` ;do mkdir -pv directory$i;'
+ 'cd directory$i;dd if=/dev/urandom of=file$i bs=1M count=5;'
+ 'mv file$i renamed$i;done;' % (self.mounts[0].mountpoint))
+ ret, _, _ = g.run(self.clients[0], cmd)
+ self.assertEqual(ret, 0, "Fail: Not able to create files on "
+ "{}".format(self.mounts[0].mountpoint))
+ g.log.info("Files created successfully and renamed")
+
+ # Check if brickpath contains old files
+ for brick in bricks_list:
+ brick_node, brick_path = brick.split(":")
+ cmd = ("ls -1 %s |grep file |wc -l " % brick_path)
+ ret, out, _ = g.run(brick_node, cmd)
+ self.assertEqual(0, int(out.strip()), "Brick path {} contains old "
+ "file in node {}".format(brick_path, brick_node))
+ g.log.info("Brick path contains renamed files")
+
+ # Delete files from mountpoint
+ cmd = ('rm -rf -v %s/*' % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to delete files")
+ g.log.info("Files deleted successfully for %s", self.mounts[0])
+
+ # Check .glusterfs/indices/xattrop is empty
+ for brick in bricks_list:
+ brick_node, brick_path = brick.split(":")
+ cmd = ("ls -1 %s/.glusterfs/indices/xattrop/ | "
+ "grep -ve \"xattrop-\" | wc -l" % brick_path)
+ ret, out, _ = g.run(brick_node, cmd)
+ self.assertEqual(0, int(out.strip()), ".glusterfs/indices/"
+ "xattrop is not empty")
+ g.log.info("No pending heals on bricks")
+
+ # Check if brickpath is empty
+ for brick in bricks_list:
+ brick_node, brick_path = brick.split(":")
+ cmd = ("ls -1 %s |wc -l " % brick_path)
+ ret, out, _ = g.run(brick_node, cmd)
+ self.assertEqual(0, int(out.strip()), "Brick path {} is not empty "
+ "in node {}".format(brick_path, brick_node))
+ g.log.info("Brick path is empty on all nodes")
diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py
index 17c2ba4d5..bbb30f271 100644
--- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py
+++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py
@@ -133,10 +133,7 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -230,7 +227,8 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks online
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums are not equal')
+ self.assertEqual(sorted(result_before_online),
+ sorted(result_after_online),
+ 'Checksums are not equal')
g.log.info('Checksums before bringing bricks online '
'and after bringing bricks online are equal')
diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py
index 132b9df8a..0aa440af1 100755
--- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py
+++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py
@@ -161,10 +161,7 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -272,6 +269,7 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks offline
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums are not equal')
+ self.assertEqual(sorted(result_before_online),
+ sorted(result_after_online),
+ 'Checksums are not equal')
g.log.info('Checksums are equal')
diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py
index 82538d42a..f4f13931a 100755
--- a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py
+++ b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py
@@ -132,10 +132,7 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -229,7 +226,8 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks online
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums are not equal')
+ self.assertEqual(sorted(result_before_online),
+ sorted(result_after_online),
+ 'Checksums are not equal')
g.log.info('Checksums before bringing bricks online '
'and after bringing bricks online are equal')
diff --git a/tests/functional/arbiter/test_data_self_heal_daemon_off.py b/tests/functional/arbiter/test_data_self_heal_daemon_off.py
index df2e58aa6..9faae85ca 100644
--- a/tests/functional/arbiter/test_data_self_heal_daemon_off.py
+++ b/tests/functional/arbiter/test_data_self_heal_daemon_off.py
@@ -164,10 +164,7 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
diff --git a/tests/functional/arbiter/test_entry_self_heal_heal_command.py b/tests/functional/arbiter/test_entry_self_heal_heal_command.py
index ced2bc19c..64c6c2339 100644
--- a/tests/functional/arbiter/test_entry_self_heal_heal_command.py
+++ b/tests/functional/arbiter/test_entry_self_heal_heal_command.py
@@ -177,10 +177,8 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = (
+ bricks_to_bring_offline_dict['volume_bricks'])
# Bring brick offline
g.log.info('Bringing bricks %s offline...',
diff --git a/tests/functional/arbiter/test_gfid_self_heal.py b/tests/functional/arbiter/test_gfid_self_heal.py
new file mode 100644
index 000000000..9ed4a8767
--- /dev/null
+++ b/tests/functional/arbiter/test_gfid_self_heal.py
@@ -0,0 +1,206 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import (
+ verify_all_process_of_volume_are_online,
+ wait_for_volume_process_to_be_online)
+from glustolibs.gluster.brick_libs import (
+ select_volume_bricks_to_bring_offline,
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain)
+from glustolibs.io.utils import (collect_mounts_arequal)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.misc.misc_libs import upload_scripts
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestGFIDSelfHeal(GlusterBaseClass):
+
+ """
+ Description:
+ Arbiter Test cases related to GFID self heal
+ """
+ @classmethod
+ def setUpClass(cls):
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Upload io scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts to clients %s"
+ % cls.clients)
+ g.log.info("Successfully uploaded IO scripts to clients %s",
+ cls.clients)
+
+ def setUp(self):
+ # Calling GlusterBaseClass
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Cleanup and umount volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_gfid_self_heal(self):
+ """
+ Test GFID self heal
+ Description:
+ - Creating directory test_compilation
+ - Write Deep directories and files
+ - Get arequal before getting bricks offline
+ - Select bricks to bring offline
+ - Bring brick offline
+ - Delete directory on mountpoint where data is writte
+ - Create the same directory and write same data
+ - Bring bricks online
+ - Wait for volume processes to be online
+ - Verify volume's all process are online
+ - Monitor heal completion
+ - Check for split-brain
+ - Get arequal after getting bricks online
+ """
+ # pylint: disable=too-many-branches,too-many-statements,too-many-locals
+ # Creating directory test_compilation
+ ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal"
+ .format(self.mounts[0].mountpoint))
+ self.assertTrue(ret, "Failed to create directory")
+ g.log.info("Directory 'test_gfid_self_heal' on %s created "
+ "successfully", self.mounts[0])
+
+ # Write Deep directories and files
+ count = 1
+ for mount_obj in self.mounts:
+ cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
+ "--dirname-start-num %d --dir-depth 2 "
+ "--dir-length 10 --max-num-of-dirs 5 "
+ "--num-of-files 5 %s/dir1" % (
+ self.script_upload_path, count,
+ mount_obj.mountpoint))
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to create files on mountpoint")
+ g.log.info("Successfully created files on mountpoint")
+ count += 10
+
+ # Get arequal before getting bricks offline
+ ret, result_before_offline = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Arequal after getting bricks offline '
+ 'is %s', result_before_offline)
+
+ # Select bricks to bring offline
+ bricks_to_bring_offline = select_volume_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline))
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline))
+ g.log.info('Bringing bricks %s offline is successful',
+ bricks_to_bring_offline)
+
+ # Delete directory on mountpoint where data is written
+ cmd = ('rm -rf -v %s/test_gfid_self_heal' % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to delete directory")
+ g.log.info("Directory deleted successfully for %s", self.mounts[0])
+
+ # Create the same directory and write same data
+ ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal"
+ .format(self.mounts[0].mountpoint))
+ self.assertTrue(ret, "Failed to create directory")
+ g.log.info("Directory 'test_gfid_self_heal' on %s created "
+ "successfully", self.mounts[0])
+
+ # Write the same files again
+ count = 1
+ for mount_obj in self.mounts:
+ cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
+ "--dirname-start-num %d --dir-depth 2 "
+ "--dir-length 10 --max-num-of-dirs 5 "
+ "--num-of-files 5 %s/dir1" % (
+ self.script_upload_path, count,
+ mount_obj.mountpoint))
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to create files on mountpoint")
+ g.log.info("Successfully created files on mountpoint")
+ count += 10
+
+ # Bring bricks online
+ ret = bring_bricks_online(
+ self.mnode, self.volname,
+ bricks_to_bring_offline,
+ bring_bricks_online_methods=['volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks {} online'.format
+ (bricks_to_bring_offline))
+ g.log.info('Bringing bricks %s online is successful',
+ bricks_to_bring_offline)
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume {} processes to "
+ "be online".format(self.volname)))
+ g.log.info("Successful in waiting for volume %s processes to be "
+ "online", self.volname)
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Volume {} : All process are not online".format
+ (self.volname)))
+ g.log.info("Volume %s : All process are online", self.volname)
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ # Get arequal after getting bricks online
+ ret, result_after_online = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Arequal after getting bricks online '
+ 'is %s', result_after_online)
diff --git a/tests/functional/arbiter/test_gluster_clone_heal.py b/tests/functional/arbiter/test_gluster_clone_heal.py
new file mode 100644
index 000000000..94603c701
--- /dev/null
+++ b/tests/functional/arbiter/test_gluster_clone_heal.py
@@ -0,0 +1,209 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import (
+ verify_all_process_of_volume_are_online,
+ wait_for_volume_process_to_be_online)
+from glustolibs.gluster.brick_libs import (
+ select_volume_bricks_to_bring_offline,
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain)
+from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs)
+from glustolibs.gluster.glusterdir import mkdir
+
+
+@runs_on([['arbiter', 'distributed-arbiter',
+ 'replicated', 'distributed-replicated'], ['glusterfs']])
+class TestGlusterCloneHeal(GlusterBaseClass):
+ """
+ Description:
+ Arbiter Test cases related to self heal
+ of data and hardlink
+ """
+ def setUp(self):
+ # Calling GlusterBaseClass
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Cleanup and umount volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_gluster_clone_heal(self):
+ """
+ Test gluster compilation on mount point(Heal command)
+ - Creating directory test_compilation
+ - Compile gluster on mountpoint
+ - Select bricks to bring offline
+ - Bring brick offline
+ - Validate IO
+ - Bring bricks online
+ - Wait for volume processes to be online
+ - Verify volume's all process are online
+ - Monitor heal completion
+ - Check for split-brain
+ - Get arequal after getting bricks online
+ - Compile gluster on mountpoint again
+ - Select bricks to bring offline
+ - Bring brick offline
+ - Validate IO
+ - Bring bricks online
+ - Wait for volume processes to be online
+ - Verify volume's all process are online
+ - Monitor heal completion
+ - Check for split-brain
+ - Get arequal after getting bricks online
+ """
+ # pylint: disable=too-many-branches,too-many-statements,too-many-locals
+ # Creating directory test_compilation
+ ret = mkdir(self.mounts[0].client_system, "{}/test_compilation"
+ .format(self.mounts[0].mountpoint))
+ self.assertTrue(ret, "Failed to create directory")
+ g.log.info("Directory 'test_compilation' on %s created "
+ "successfully", self.mounts[0])
+
+ # Compile gluster on mountpoint
+ cmd = ("cd %s/test_compilation ; rm -rf glusterfs; git clone"
+ " git://github.com/gluster/glusterfs.git ; cd glusterfs ;"
+ " ./autogen.sh ;./configure CFLAGS='-g3 -O0 -DDEBUG'; make ;"
+ " cd ../..;" % self.mounts[0].mountpoint)
+ proc = g.run_async(self.mounts[0].client_system, cmd)
+
+ # Select bricks to bring offline
+ bricks_to_bring_offline = select_volume_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline))
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline))
+ g.log.info('Bringing bricks %s offline is successful',
+ bricks_to_bring_offline)
+
+ # Validate IO
+ self.assertTrue(
+ validate_io_procs([proc], self.mounts[0]),
+ "IO failed on some of the clients"
+ )
+
+ # Bring bricks online
+ ret = bring_bricks_online(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} online'.format
+ (bricks_to_bring_offline))
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume {} processes to "
+ "be online".format(self.volname)))
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Volume {} : All process are not online".format
+ (self.volname)))
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ # Get arequal after getting bricks online
+ ret, result_after_online = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info("Arequal of mountpoint %s", result_after_online)
+
+ # Compile gluster on mountpoint again
+ proc1 = g.run_async(self.mounts[0].client_system, cmd)
+
+ # Select bricks to bring offline
+ bricks_to_bring_offline = select_volume_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline))
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline))
+
+ # Validate IO
+ self.assertTrue(
+ validate_io_procs([proc1], self.mounts[0]),
+ "IO failed on some of the clients"
+ )
+
+ # Bring bricks online
+ ret = bring_bricks_online(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} online'.format
+ (bricks_to_bring_offline))
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume {} processes to "
+ "be online".format(self.volname)))
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Volume {} : All process are not online".format
+ (self.volname)))
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+
+ # Get arequal after getting bricks online
+ ret, result_after_online = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info("Arequal of mountpoint %s", result_after_online)
diff --git a/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py
new file mode 100755
index 000000000..8e11af6e4
--- /dev/null
+++ b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py
@@ -0,0 +1,202 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete,
+ is_volume_in_split_brain,
+ is_shd_daemon_running)
+from glustolibs.misc.misc_libs import upload_scripts
+from glustolibs.io.utils import (collect_mounts_arequal,
+ validate_io_procs,
+ list_all_files_and_dirs_mounts,
+ wait_for_io_to_complete)
+from glustolibs.gluster.gluster_init import (start_glusterd,
+ stop_glusterd)
+from glustolibs.misc.misc_libs import kill_process
+
+
+@runs_on([['arbiter', 'distributed-arbiter'],
+ ['glusterfs', 'nfs']])
+class TestArbiterSelfHeal(GlusterBaseClass):
+ """
+ Description:
+ Arbiter Test cases related to
+ healing in default configuration of the volume
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Upload io scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(cls.clients, [cls.script_upload_path])
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts to clients %s"
+ % cls.clients)
+ g.log.info("Successfully uploaded IO scripts to clients %s",
+ cls.clients)
+
+ def setUp(self):
+ # Calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+
+ self.all_mounts_procs = []
+ self.io_validation_complete = False
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def tearDown(self):
+ """
+ If test method failed before validating IO, tearDown waits for the
+ IO's to complete and checks for the IO exit status
+
+ Cleanup and umount volume
+ """
+ if not self.io_validation_complete:
+ ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
+ if not ret:
+ raise ExecutionError("IO failed on some of the clients")
+ g.log.info("IO is successful on all mounts")
+
+ # List all files and dirs created
+ ret = list_all_files_and_dirs_mounts(self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to list all files and dirs")
+
+ # Cleanup and umount volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_heal_full_after_deleting_files(self):
+ """
+ - Create IO
+ - Calculate arequal from mount
+ - kill glusterd process and glustershd process on arbiter nodes
+ - Delete data from backend from the arbiter nodes
+ - Start glusterd process and force start the volume
+ to bring the processes online
+ - Check if heal is completed
+ - Check for split-brain
+ - Calculate arequal checksum and compare it
+ """
+ # pylint: disable=too-many-locals,too-many-statements
+ # Creating files on client side
+ for mount_obj in self.mounts:
+ g.log.info("Generating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+ # Create dirs with file
+ command = ("/usr/bin/env python %s create_deep_dirs_with_files "
+ "-d 2 -l 2 -n 2 -f 20 %s"
+ % (self.script_upload_path, mount_obj.mountpoint))
+
+ proc = g.run_async(mount_obj.client_system, command,
+ user=mount_obj.user)
+ self.all_mounts_procs.append(proc)
+ self.io_validation_complete = False
+
+ # Validate IO
+ ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+ self.assertTrue(ret, "IO failed on some of the clients")
+ self.io_validation_complete = True
+ g.log.info("IO is successful on all mounts")
+
+ # Get arequal before killing gluster processes on arbiter node
+ ret, result_before_killing_procs = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal before getting bricks offline '
+ 'is successful')
+
+ # Kill glusterd process and glustershd process on arbiter node
+ subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+ for subvol in subvols:
+ arbiter = subvol[-1]
+ node, brick_path = arbiter.split(':')
+ # Stop glusterd
+ ret = stop_glusterd(node)
+ self.assertTrue(ret, "Failed to stop the glusterd on arbiter node")
+ # Stop glustershd
+ ret = kill_process(node, "glustershd")
+ if not ret:
+ # Validate glustershd process is not running
+ self.assertFalse(
+ is_shd_daemon_running(self.mnode, node, self.volname),
+ "The glustershd process is still running.")
+ g.log.info('Killed glusterd and glustershd for all arbiter '
+ 'brick successfully')
+
+ # Delete data from backend from the arbiter node
+ for subvol in subvols:
+ arbiter = subvol[-1]
+ # Clearing the arbiter bricks
+ node, brick_path = arbiter.split(':')
+ ret, _, err = g.run(node, 'rm -rf %s/*' % brick_path)
+ self.assertFalse(
+ ret, err)
+ g.log.info('Clearing for all arbiter brick is successful')
+
+ # Start glusterd process on each arbiter
+ for subvol in subvols:
+ arbiter = subvol[-1]
+ node, brick_path = arbiter.split(':')
+ ret = start_glusterd(node)
+ self.assertTrue(
+ ret, "Failed to start glusterd on the arbiter node")
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ # Get arequal after healing
+ ret, result_after_healing = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Getting arequal after getting bricks online '
+ 'is successful')
+
+ # Comparing arequals before before killing arbiter processes
+ # and after healing
+ self.assertEqual(
+ result_before_killing_procs, result_after_healing,
+ 'Arequals arequals before before killing arbiter '
+ 'processes and after healing are not equal')
+
+ g.log.info('Arequals before killing arbiter '
+ 'processes and after healing are equal')
diff --git a/tests/functional/arbiter/test_metadata_self_heal.py b/tests/functional/arbiter/test_metadata_self_heal.py
index 81a098fff..0b2708438 100755
--- a/tests/functional/arbiter/test_metadata_self_heal.py
+++ b/tests/functional/arbiter/test_metadata_self_heal.py
@@ -207,10 +207,7 @@ class TestMetadataSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -332,8 +329,9 @@ class TestMetadataSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks online
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums are not equal')
+ self.assertEqual(sorted(result_before_online),
+ sorted(result_after_online),
+ 'Checksums are not equal')
g.log.info('Checksums before bringing bricks online '
'and after bringing bricks online are equal')
@@ -356,11 +354,6 @@ class TestMetadataSelfHeal(GlusterBaseClass):
ret, out, err = g.run(node, command)
file_list = out.split()
- g.log.info('Checking for user and group on %s...', node)
- conn = g.rpyc_get_connection(node)
- if conn is None:
- raise Exception("Unable to get connection on node %s" % node)
-
for file_name in file_list:
file_to_check = '%s/%s/%s' % (nodes_to_check[node],
test_meta_data_self_heal_folder,
@@ -368,26 +361,30 @@ class TestMetadataSelfHeal(GlusterBaseClass):
g.log.info('Checking for permissions, user and group for %s',
file_name)
+
# Check for permissions
- permissions = oct(
- conn.modules.os.stat(file_to_check).st_mode)[-3:]
- self.assertEqual(permissions, '444',
+ cmd = ("stat -c '%a %n' {} | awk '{{print $1}}'"
+ .format(file_to_check))
+ ret, permissions, _ = g.run(node, cmd)
+ self.assertEqual(permissions.split('\n')[0], '444',
'Permissions %s is not equal to 444'
% permissions)
g.log.info("Permissions are '444' for %s", file_name)
# Check for user
- uid = conn.modules.os.stat(file_to_check).st_uid
- username = conn.modules.pwd.getpwuid(uid).pw_name
- self.assertEqual(username, 'qa', 'User %s is not equal qa'
+ cmd = ("ls -ld {} | awk '{{print $3}}'"
+ .format(file_to_check))
+ ret, username, _ = g.run(node, cmd)
+ self.assertEqual(username.split('\n')[0],
+ 'qa', 'User %s is not equal qa'
% username)
g.log.info("User is 'qa' for %s", file_name)
# Check for group
- gid = conn.modules.os.stat(file_to_check).st_gid
- groupname = conn.modules.grp.getgrgid(gid).gr_name
- self.assertEqual(groupname, 'qa', 'Group %s is not equal qa'
+ cmd = ("ls -ld {} | awk '{{print $4}}'"
+ .format(file_to_check))
+ ret, groupname, _ = g.run(node, cmd)
+ self.assertEqual(groupname.split('\n')[0],
+ 'qa', 'Group %s is not equal qa'
% groupname)
g.log.info("Group is 'qa' for %s", file_name)
-
- g.rpyc_close_connection(host=node)
diff --git a/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py
new file mode 100644
index 000000000..8e4df5e9f
--- /dev/null
+++ b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py
@@ -0,0 +1,244 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import os
+import copy
+from socket import gethostbyname
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline,
+ get_all_bricks)
+from glustolibs.gluster.heal_libs import is_volume_in_split_brain
+from glustolibs.gluster.heal_ops import get_heal_info_summary
+from glustolibs.gluster.glusterfile import get_file_stat
+from glustolibs.gluster.volume_ops import get_volume_info
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.gluster.glusterfile import get_pathinfo
+from glustolibs.gluster.lib_utils import (collect_bricks_arequal,
+ add_user, del_user)
+from glustolibs.io.utils import collect_mounts_arequal
+
+
+@runs_on([['replicated', 'distributed-replicated', 'arbiter',
+ 'distributed-arbiter'],
+ ['glusterfs']])
+class TestMetadataSelfHealOpenfd(GlusterBaseClass):
+ def setUp(self):
+ # Calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+ self.user = "qa"
+ self.nodes = []
+ self.nodes = copy.deepcopy(self.servers)
+ self.nodes.append(self.clients[0])
+
+ # Create user for changing ownership
+ for node in self.nodes:
+ ret = add_user(node, self.user)
+ self.assertTrue(ret, "Failed to create user")
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup and Mount_Volume")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ for node in self.nodes:
+ del_user(node, self.user)
+
+ # Cleanup and umount volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ def _verify_stat_info(self, nodes_to_check, test_file):
+ """
+ Helper method to verify stat on all bricks and client.
+ """
+ for node in nodes_to_check:
+ filepath = nodes_to_check[node] + "/" + test_file
+ stat_dict = get_file_stat(node, filepath)
+ self.assertIsNotNone(stat_dict, "stat on {} failed"
+ .format(test_file))
+ self.assertEqual(stat_dict['username'], self.user,
+ "Expected qa but found {}"
+ .format(stat_dict['username']))
+ self.assertEqual(stat_dict['groupname'], self.user,
+ "Expected gid qa but found {}"
+ .format(stat_dict['groupname']))
+ self.assertEqual(stat_dict['access'], '777',
+ "Expected permission 777 but found {}"
+ .format(stat_dict['access']))
+
+ def test_metadata_self_heal_on_open_fd(self):
+ """
+ Description: Pro-active metadata self heal on open fd
+
+ Steps :
+ 1) Create a volume.
+ 2) Mount the volume using FUSE.
+ 3) Create test executable on volume mount.
+ 4) While test execution is in progress, bring down brick1.
+ 5) From mount point, change ownership, permission, group id of
+ the test file.
+ 6) While test execution is in progress, bring back brick1 online.
+ 7) Do stat on the test file to check ownership, permission,
+ group id on mount point and on bricks
+ 8) Stop test execution.
+ 9) Do stat on the test file to check ownership, permission,
+ group id on mount point and on bricks.
+ 10) There should be no pending heals in the heal info command.
+ 11) There should be no split-brain.
+ 12) Calculate arequal of the bricks and mount point and it
+ should be same.
+ """
+ # pylint: disable=too-many-statements,too-many-locals
+ # pylint: disable=too-many-branches
+ bricks_list = get_all_bricks(self.mnode, self.volname)
+ self.assertIsNotNone(bricks_list, 'Brick list is None')
+ client = self.clients[0]
+
+ # Create test executable file on mount point
+ m_point = self.mounts[0].mountpoint
+ test_file = "testfile.sh"
+ cmd = ("echo 'while true; do echo 'Press CTRL+C to stop execution';"
+ " done' >> {}/{}".format(m_point, test_file))
+ ret, _, _ = g.run(client, cmd)
+ self.assertEqual(ret, 0, "Failed to create test file")
+
+ # Execute the test file
+ cmd = "cd {}; sh {}".format(m_point, test_file)
+ g.run_async(client, cmd)
+
+ # Get pid of the test file
+ _cmd = "ps -aux | grep -v grep | grep testfile.sh | awk '{print $2}'"
+ ret, out, _ = g.run(client, _cmd)
+ self.assertEqual(ret, 0, "Failed to get pid of test file execution")
+
+ # Bring brick1 offline
+ ret = bring_bricks_offline(self.volname, [bricks_list[1]])
+ self.assertTrue(ret, 'Failed to bring bricks {} '
+ 'offline'.format(bricks_list[1]))
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ [bricks_list[1]])
+ self.assertTrue(ret, 'Bricks {} are not '
+ 'offline'.format(bricks_list[1]))
+
+ # change uid, gid and permission from client
+ cmd = "chown {} {}/{}".format(self.user, m_point, test_file)
+ ret, _, _ = g.run(client, cmd)
+ self.assertEqual(ret, 0, "chown failed")
+
+ cmd = "chgrp {} {}/{}".format(self.user, m_point, test_file)
+ ret, _, _ = g.run(client, cmd)
+ self.assertEqual(ret, 0, "chgrp failed")
+
+ cmd = "chmod 777 {}/{}".format(m_point, test_file)
+ ret, _, _ = g.run(client, cmd)
+ self.assertEqual(ret, 0, "chown failed")
+
+ # Bring brick1 online
+ ret = bring_bricks_online(self.mnode, self.volname,
+ [bricks_list[1]])
+ self.assertTrue(ret, 'Failed to bring bricks {} online'
+ .format(bricks_list[1]))
+
+ ret = get_pathinfo(client, "{}/{}"
+ .format(m_point, test_file))
+ self.assertIsNotNone(ret, "Unable to get "
+ "trusted.glusterfs.pathinfo of file")
+ nodes_to_check = {}
+ bricks_list = []
+ for brick in ret['brickdir_paths']:
+ node, brick_path = brick.split(':')
+ if node[0:2].isdigit():
+ nodes_to_check[node] = os.path.dirname(brick_path)
+ path = node + ":" + os.path.dirname(brick_path)
+ else:
+ nodes_to_check[gethostbyname(node)] = (os.path.dirname(
+ brick_path))
+ path = gethostbyname(node) + ":" + os.path.dirname(brick_path)
+ bricks_list.append(path)
+ nodes_to_check[client] = m_point
+
+ # Verify that the changes are successful on bricks and client
+ self._verify_stat_info(nodes_to_check, test_file)
+
+ # Kill the test executable file
+ for pid in out.split('\n')[:-1]:
+ cmd = "kill -s 9 {}".format(pid)
+ ret, _, _ = g.run(client, cmd)
+ self.assertEqual(ret, 0, "Failed to kill test file execution")
+
+ # Verify that the changes are successful on bricks and client
+ self._verify_stat_info(nodes_to_check, test_file)
+
+ # Verify there are no pending heals
+ heal_info = get_heal_info_summary(self.mnode, self.volname)
+ self.assertIsNotNone(heal_info, 'Unable to get heal info')
+ for brick in bricks_list:
+ self.assertEqual(int(heal_info[brick]['numberOfEntries']),
+ 0, ("Pending heal on brick {} ".format(brick)))
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ # Get arequal for mount
+ ret, arequals = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ mount_point_total = arequals[0].splitlines()[-1].split(':')[-1]
+
+ # Collecting data bricks
+ vol_info = get_volume_info(self.mnode, self.volname)
+ self.assertIsNotNone(vol_info, 'Unable to get volume info')
+ data_brick_list = []
+ for brick in bricks_list:
+ for brick_info in vol_info[self.volname]["bricks"]["brick"]:
+ if brick_info["name"] == brick:
+ if brick_info["isArbiter"] == "0":
+ data_brick_list.append(brick)
+ bricks_list = data_brick_list
+
+ # Get arequal on bricks and compare with mount_point_total
+ # It should be the same
+ arbiter = self.volume_type.find('arbiter') >= 0
+ subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+ stop = len(subvols[0]) - 1 if arbiter else len(subvols[0])
+ for subvol in subvols:
+ subvol = [i for i in subvol if i in bricks_list]
+ if subvol:
+ ret, arequal = collect_bricks_arequal(subvol[0:stop])
+ self.assertTrue(ret, 'Unable to get arequal checksum '
+ 'on {}'.format(subvol[0:stop]))
+ self.assertEqual(len(set(arequal)), 1, 'Mismatch of arequal '
+ 'checksum among {} is '
+ 'identified'.format(subvol[0:stop]))
+ brick_total = arequal[-1].splitlines()[-1].split(':')[-1]
+ self.assertEqual(brick_total, mount_point_total,
+ "Arequals for mountpoint and {} "
+ "are not equal".format(subvol[0:stop]))
diff --git a/tests/functional/arbiter/test_mount_point_while_deleting_files.py b/tests/functional/arbiter/test_mount_point_while_deleting_files.py
index 6acb8e0c8..68f880663 100755
--- a/tests/functional/arbiter/test_mount_point_while_deleting_files.py
+++ b/tests/functional/arbiter/test_mount_point_while_deleting_files.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -34,8 +34,7 @@ from glustolibs.gluster.mount_ops import (mount_volume,
from glustolibs.misc.misc_libs import upload_scripts
-@runs_on([['arbiter'],
- ['glusterfs']])
+@runs_on([['arbiter'], ['glusterfs']])
class VolumeSetDataSelfHealTests(GlusterBaseClass):
@classmethod
def setUpClass(cls):
@@ -57,6 +56,7 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass):
# Setup Volumes
cls.volume_configs = []
cls.mounts_dict_list = []
+ cls.client = cls.clients[0]
# Define two replicated volumes
for i in range(1, 3):
@@ -67,24 +67,22 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass):
cls.volume_configs.append(volume_config)
# Redefine mounts
- for client in cls.all_clients_info.keys():
- mount = {
- 'protocol': cls.mount_type,
- 'server': cls.mnode,
- 'volname': volume_config['name'],
- 'client': cls.all_clients_info[client],
- 'mountpoint': (os.path.join(
- "/mnt", '_'.join([volume_config['name'],
- cls.mount_type]))),
- 'options': ''
- }
- cls.mounts_dict_list.append(mount)
-
- cls.mounts = create_mount_objs(cls.mounts_dict_list)
+ mount = {
+ 'protocol': cls.mount_type,
+ 'server': cls.mnode,
+ 'volname': volume_config['name'],
+ 'client': cls.all_clients_info[cls.client],
+ 'mountpoint': (os.path.join(
+ "/mnt", '_'.join([volume_config['name'],
+ cls.mount_type]))),
+ 'options': ''
+ }
+ cls.mounts_dict_list.append(mount)
+
+ cls.mounts = create_mount_objs(cls.mounts_dict_list)
# Create and mount volumes
cls.mount_points = []
- cls.client = cls.clients[0]
for volume_config in cls.volume_configs:
# Setup volume
@@ -146,39 +144,33 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass):
raise ExecutionError("Failed to list all files and dirs")
g.log.info("Listing all files and directories is successful")
- @classmethod
- def tearDownClass(cls):
- """
- Clean up the volume and umount volume from client
- """
# umount all volumes
- for mount_obj in cls.mounts:
+ for mount_point in self.mount_points:
ret, _, _ = umount_volume(
- mount_obj.client_system, mount_obj.mountpoint)
+ self.client, mount_point)
if ret:
raise ExecutionError(
"Failed to umount on volume %s "
- % cls.volname)
+ % self.volname)
g.log.info("Successfully umounted %s on client %s",
- cls.volname, mount_obj.client_system)
- ret = rmdir(mount_obj.client_system, mount_obj.mountpoint)
+ self.volname, self.client)
+ ret = rmdir(self.client, mount_point)
if not ret:
raise ExecutionError(
- ret, "Failed to remove directory mount directory.")
+ "Failed to remove directory mount directory.")
g.log.info("Mount directory is removed successfully")
# stopping all volumes
- g.log.info("Starting to Cleanup all Volumes")
- volume_list = get_volume_list(cls.mnode)
+ volume_list = get_volume_list(self.mnode)
for volume in volume_list:
- ret = cleanup_volume(cls.mnode, volume)
+ ret = cleanup_volume(self.mnode, volume)
if not ret:
raise ExecutionError("Failed to cleanup Volume %s" % volume)
g.log.info("Volume: %s cleanup is done", volume)
g.log.info("Successfully Cleanedup all Volumes")
- # calling GlusterBaseClass tearDownClass
- cls.get_super_method(cls, 'tearDownClass')()
+ # calling GlusterBaseClass tearDown
+ self.get_super_method(self, 'tearDown')()
def test_mount_point_not_go_to_rofs(self):
"""
@@ -218,10 +210,8 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass):
for volname in volume_list:
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = (
+ bricks_to_bring_offline_dict['volume_bricks'])
# bring bricks offline
g.log.info("Going to bring down the brick process for %s",
@@ -251,3 +241,4 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass):
self.assertTrue(
validate_io_procs(self.all_mounts_procs, self.mounts),
"IO failed on some of the clients")
+ self.io_validation_complete = True
diff --git a/tests/functional/arbiter/test_self_heal_50k_files.py b/tests/functional/arbiter/test_self_heal_50k_files.py
new file mode 100644
index 000000000..887959fa0
--- /dev/null
+++ b/tests/functional/arbiter/test_self_heal_50k_files.py
@@ -0,0 +1,140 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import (
+ verify_all_process_of_volume_are_online,
+ wait_for_volume_process_to_be_online)
+from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline,
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain)
+from glustolibs.io.utils import validate_io_procs
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestSelfHeal50kFiles(GlusterBaseClass):
+ """
+ Description:
+ Arbiter self heal of 50k files
+ """
+ def setUp(self):
+ # Calling GlusterBaseClass
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume and Mount Volume
+ g.log.info("Starting to Setup Volume and Mount Volume")
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+
+ # Cleanup and umount volume
+ g.log.info("Starting to Unmount Volume and Cleanup Volume")
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_self_heal_50k_files(self):
+ """
+ Description:
+ - Select bricks to bring offline
+ - Bring brick offline
+ - Create 50k files
+ - Validate IO
+ - Bring bricks online
+ - Monitor heal
+ - Check for split-brain
+ - Validate IO
+ """
+ # pylint: disable=too-many-statements,too-many-locals
+ # Select bricks to bring offline
+ bricks_to_bring_offline_dict = select_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+ bricks_to_bring_offline)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks %s are not offline'
+ % bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s offline is successful',
+ bricks_to_bring_offline)
+
+ # Create 50k files
+ command = ("cd %s ; "
+ "for i in `seq 1 50000` ; "
+ "do dd if=/dev/urandom of=test.$i "
+ "bs=100k count=1 ; "
+ "done ;"
+ % self.mounts[0].mountpoint)
+ proc = g.run_async(self.mounts[0].client_system, command,
+ user=self.mounts[0].user)
+
+ # Validate IO
+ self.assertTrue(
+ validate_io_procs([proc], self.mounts[0]),
+ "IO failed on some of the clients"
+ )
+
+ # Bring brick online
+ ret = bring_bricks_online(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks %s online' %
+ bricks_to_bring_offline)
+ g.log.info('Bringing bricks %s online is successful',
+ bricks_to_bring_offline)
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume %s processes to "
+ "be online", self.volname))
+ g.log.info("Successful in waiting for volume %s processes to be "
+ "online", self.volname)
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Volume %s : All process are not online"
+ % self.volname))
+ g.log.info("Volume %s : All process are online", self.volname)
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3000)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
diff --git a/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py
index 06f1f42c0..da98c4b7f 100644
--- a/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py
+++ b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py
@@ -107,10 +107,7 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
@@ -211,9 +208,10 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks online
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums before and '
- 'after bringing bricks online are not equal')
+ self.assertEqual(sorted(result_before_online),
+ sorted(result_after_online),
+ 'Checksums before and '
+ 'after bringing bricks online are not equal')
g.log.info('Checksums before and after bringing bricks online '
'are equal')
@@ -242,8 +240,9 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals after bringing bricks online
# and after adding bricks
- self.assertItemsEqual(result_after_online, result_after_adding_bricks,
- 'Checksums after bringing bricks online and '
- 'after adding bricks are not equal')
+ self.assertEqual(sorted(result_after_online),
+ sorted(result_after_adding_bricks),
+ 'Checksums after bringing bricks online'
+ 'and after adding bricks are not equal')
g.log.info('Checksums after bringing bricks online and '
'after adding bricks are equal')
diff --git a/tests/functional/arbiter/test_self_heal_daemon.py b/tests/functional/arbiter/test_self_heal_daemon.py
new file mode 100644
index 000000000..37470e41c
--- /dev/null
+++ b/tests/functional/arbiter/test_self_heal_daemon.py
@@ -0,0 +1,256 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.volume_libs import (
+ verify_all_process_of_volume_are_online,
+ wait_for_volume_process_to_be_online)
+from glustolibs.gluster.brick_libs import (
+ select_volume_bricks_to_bring_offline,
+ bring_bricks_offline,
+ bring_bricks_online,
+ are_bricks_offline)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_volume_in_split_brain)
+from glustolibs.io.utils import (collect_mounts_arequal)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.glusterfile import get_file_stat
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestSelfHealDaemon(GlusterBaseClass):
+ """
+ Description:
+ Arbiter Test cases related to self heal
+ of data and hardlink
+ """
+ def setUp(self):
+ # Calling GlusterBaseClass
+ self.get_super_method(self, 'setUp')()
+
+ # Setup Volume and Mount Volume
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def tearDown(self):
+ """
+ Cleanup and umount volume
+ """
+ # Cleanup and umount volume
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ self.get_super_method(self, 'tearDown')()
+
+ def test_self_heal_daemon(self):
+ """
+ Test Data-Self-Heal(heal command)
+ Description:
+ - Create directory test_hardlink_self_heal
+ - Create directory test_data_self_heal
+ - Creating files for hardlinks and data files
+ - Get arequal before getting bricks offline
+ - Select bricks to bring offline
+ - Bring brick offline
+ - Create hardlinks and append data to data files
+ - Bring brick online
+ - Wait for volume processes to be online
+ - Verify volume's all process are online
+ - Monitor heal completion
+ - Check for split-brain
+ - Get arequal after getting bricks online
+ - Select bricks to bring offline
+ - Bring brick offline
+ - Truncate data to data files and verify hardlinks
+ - Bring brick online
+ - Wait for volume processes to be online
+ - Verify volume's all process are online
+ - Monitor heal completion
+ - Check for split-brain
+ - Get arequal again
+
+ """
+ # pylint: disable=too-many-branches,too-many-statements,too-many-locals
+ # Creating directory test_hardlink_self_heal
+ ret = mkdir(self.mounts[0].client_system, "{}/test_hardlink_self_heal"
+ .format(self.mounts[0].mountpoint))
+ self.assertTrue(ret, "Failed to create directory")
+ g.log.info("Directory 'test_hardlink_self_heal' on %s created "
+ "successfully", self.mounts[0])
+
+ # Creating directory test_data_self_heal
+ ret = mkdir(self.mounts[0].client_system, "{}/test_data_self_heal"
+ .format(self.mounts[0].mountpoint))
+ self.assertTrue(ret, "Failed to create directory")
+ g.log.info("Directory test_hardlink_self_heal on %s created "
+ "successfully", self.mounts[0])
+
+ # Creating files for hardlinks and data files
+ cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5`;'
+ 'do mkdir dir.$i ; for j in `seq 1 10` ; do dd if='
+ '/dev/urandom of=dir.$i/file.$j bs=1k count=$j;done; done;'
+ 'cd ..' % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to create file on mountpoint")
+ g.log.info("Successfully created files on mountpoint")
+
+ cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;'
+ 'do dd if=/dev/urandom of=file.$i bs=128K count=$i;done;'
+ 'cd ..' % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to create file on mountpoint")
+ g.log.info("Successfully created files on mountpoint")
+
+ # Get arequal before getting bricks offline
+ ret, result_before_online = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Arequal before getting bricks online-%s',
+ result_before_online)
+
+ # Select bricks to bring offline
+ bricks_to_bring_offline = select_volume_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(bricks_to_bring_offline))
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks {} are not offline'.
+ format(bricks_to_bring_offline))
+ g.log.info('Bringing bricks %s offline is successful',
+ bricks_to_bring_offline)
+
+ # Append data to data files and create hardlinks
+ cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;'
+ 'do dd if=/dev/urandom of=file.$i bs=512K count=$i ; done ;'
+ 'cd .. ' % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to modify data files.")
+ g.log.info("Successfully modified data files")
+
+ cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5` ;do '
+ 'for j in `seq 1 10`;do ln dir.$i/file.$j dir.$i/link_file.$j;'
+ 'done ; done ; cd .. ' % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Hardlinks creation failed")
+ g.log.info("Successfully created hardlinks of files")
+
+ # Bring bricks online
+ ret = bring_bricks_online(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} online'.format
+ (bricks_to_bring_offline))
+ g.log.info('Bringing bricks %s online is successful',
+ bricks_to_bring_offline)
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume {} processes to "
+ "be online".format(self.volname)))
+ g.log.info("Successful in waiting for volume %s processes to be "
+ "online", self.volname)
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Volume {} : All process are not online".format
+ (self.volname)))
+ g.log.info("Volume %s : All process are online", self.volname)
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
+
+ # Get arequal after getting bricks online
+ ret, result_after_online = collect_mounts_arequal(self.mounts)
+ self.assertTrue(ret, 'Failed to get arequal')
+ g.log.info('Arequal after getting bricks online '
+ 'is %s', result_after_online)
+
+ # Select bricks to bring offline
+ bricks_to_bring_offline = select_volume_bricks_to_bring_offline(
+ self.mnode, self.volname)
+ self.assertIsNotNone(bricks_to_bring_offline, "List is empty")
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.format
+ (bricks_to_bring_offline))
+
+ ret = are_bricks_offline(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Bricks {} are not offline'.format
+ (bricks_to_bring_offline))
+ g.log.info('Bringing bricks %s offline is successful',
+ bricks_to_bring_offline)
+
+ # Truncate data to data files and verify hardlinks
+ cmd = ('cd %s/test_data_self_heal ; for i in `seq 1 100` ;'
+ 'do truncate -s $(( $i * 128)) file.$i ; done ; cd ..'
+ % self.mounts[0].mountpoint)
+ ret, _, _ = g.run(self.mounts[0].client_system, cmd)
+ self.assertEqual(ret, 0, "Failed to truncate files")
+ g.log.info("Successfully truncated files on mountpoint")
+
+ file_path = ('%s/test_hardlink_self_heal/dir{1..5}/file{1..10}'
+ % (self.mounts[0].mountpoint))
+ link_path = ('%s/test_hardlink_self_heal/dir{1..5}/link_file{1..10}'
+ % (self.mounts[0].mountpoint))
+ file_stat = get_file_stat(self.mounts[0], file_path)
+ link_stat = get_file_stat(self.mounts[0], link_path)
+ self.assertEqual(file_stat, link_stat, "Verification of hardlinks "
+ "failed")
+ g.log.info("Successfully verified hardlinks")
+
+ # Bring brick online
+ ret = bring_bricks_online(self.mnode, self.volname,
+ bricks_to_bring_offline)
+ self.assertTrue(ret, 'Failed to bring bricks {} online'.format
+ (bricks_to_bring_offline))
+ g.log.info('Bringing bricks %s online is successful',
+ bricks_to_bring_offline)
+
+ # Wait for volume processes to be online
+ ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume {} processes to "
+ "be online".format(self.volname)))
+ g.log.info("Successful in waiting for volume %s processes to be "
+ "online", self.volname)
+
+ # Verify volume's all process are online
+ ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Volume {} : All process are not online".format
+ (self.volname)))
+ g.log.info("Volume %s : All process are online", self.volname)
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal has not yet completed')
diff --git a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py
index d8d93a9ee..0c49bcd8f 100755
--- a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py
+++ b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py
@@ -152,10 +152,7 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -179,9 +176,10 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks offline
# and after bringing bricks offline
- self.assertItemsEqual(result_before_offline, result_after_offline,
- 'Checksums before and after '
- 'bringing bricks offline are not equal')
+ self.assertEqual(sorted(result_before_offline),
+ sorted(result_after_offline),
+ 'Checksums before and after bringing bricks'
+ ' offline are not equal')
g.log.info('Checksums before and after '
'bringing bricks offline are equal')
@@ -271,8 +269,9 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks online
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums before and '
- 'after bringing bricks online are not equal')
+ self.assertEqual(sorted(result_before_online),
+ sorted(result_after_online),
+ 'Checksums before and after bringing bricks'
+ ' online are not equal')
g.log.info('Checksums before and after bringing bricks online '
'are equal')
diff --git a/tests/functional/arbiter/test_self_heal_symbolic_links.py b/tests/functional/arbiter/test_self_heal_symbolic_links.py
index 6907f8805..655ea7564 100644
--- a/tests/functional/arbiter/test_self_heal_symbolic_links.py
+++ b/tests/functional/arbiter/test_self_heal_symbolic_links.py
@@ -169,10 +169,7 @@ class TestSelfHeal(GlusterBaseClass):
# Select bricks to bring offline
bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
self.mnode, self.volname))
- bricks_to_bring_offline = list(filter(None, (
- bricks_to_bring_offline_dict['hot_tier_bricks'] +
- bricks_to_bring_offline_dict['cold_tier_bricks'] +
- bricks_to_bring_offline_dict['volume_bricks'])))
+ bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']
# Bring brick offline
g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
@@ -196,9 +193,10 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks offline
# and after bringing bricks offline
- self.assertItemsEqual(result_before_offline, result_after_offline,
- 'Checksums before and '
- 'after bringing bricks online are not equal')
+ self.assertEqual(sorted(result_before_offline),
+ sorted(result_after_offline),
+ 'Checksums before and after bringing bricks '
+ 'online are not equal')
g.log.info('Checksums before and after bringing bricks online '
'are equal')
@@ -323,8 +321,9 @@ class TestSelfHeal(GlusterBaseClass):
# Checking arequals before bringing bricks online
# and after bringing bricks online
- self.assertItemsEqual(result_before_online, result_after_online,
- 'Checksums before and '
- 'after bringing bricks online are not equal')
+ self.assertEqual(sorted(result_before_online),
+ sorted(result_after_online),
+ 'Checksums before and after bringing bricks '
+ 'online are not equal')
g.log.info('Checksums before and after bringing bricks online '
'are equal')
diff --git a/tests/functional/arbiter/test_split_brain.py b/tests/functional/arbiter/test_split_brain.py
new file mode 100644
index 000000000..e2684be49
--- /dev/null
+++ b/tests/functional/arbiter/test_split_brain.py
@@ -0,0 +1,165 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# pylint: disable=too-many-statements, too-many-locals
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ wait_for_bricks_to_be_online)
+from glustolibs.misc.misc_libs import upload_scripts
+from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.io.utils import validate_io_procs
+from glustolibs.gluster.heal_libs import is_volume_in_split_brain
+from glustolibs.gluster.volume_libs import get_subvols
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestSplitBrain(GlusterBaseClass):
+
+ @classmethod
+ def setUpClass(cls):
+
+ # Calling GlusterBaseClass setUpClass
+ cls.get_super_method(cls, 'setUpClass')()
+
+ # Upload io scripts for running IO on mounts
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts "
+ "to clients %s" % cls.clients)
+
+ # Setup Volume and Mount Volume
+ ret = cls.setup_volume_and_mount_volume(cls.mounts, True)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+
+ @classmethod
+ def tearDownClass(cls):
+ """
+ Cleanup Volume
+ """
+ ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
+ if not ret:
+ raise ExecutionError("Failed to create volume")
+
+ cls.get_super_method(cls, 'tearDownClass')()
+
+ def _bring_bricks_online(self):
+ """
+ Bring bricks online and monitor heal completion
+ """
+ # Bring bricks online
+ ret = bring_bricks_online(
+ self.mnode,
+ self.volname,
+ self.bricks_to_bring_offline,
+ bring_bricks_online_methods=['volume_start_force'])
+ self.assertTrue(ret, 'Failed to bring bricks online')
+
+ # Wait for volume processes to be online
+ ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
+ self.assertTrue(ret, ("Failed to wait for volume {} processes to "
+ "be online".format(self.volname)))
+
+ def test_split_brain(self):
+
+ """
+ Description: Create split-brain on files and check if IO's fail
+ - Disable self-heal and cluster-quorum-type
+ - Get the bricks from the volume
+ - Write IO and validate IO
+ - Bring 1st set of brick offline(1 Data brick and arbiter brick)
+ - Write IO and validate IO
+ - Bring 2nd set of bricks offline(1 Data brick and arbiter brick)
+ - Write IO and validate IO
+ - Check volume is in split-brain
+ - Write IO and validate IO - should fail
+ - Enable self-heal and cluster-quorum-type
+ - Write IO and validate IO - should fail
+ """
+ # Disable self-heal and cluster-quorum-type
+ options = {"self-heal-daemon": "off",
+ "cluster.quorum-type": "none"}
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, ("Unable to set volume option %s for "
+ "volume %s" % (options, self.volname)))
+
+ # Get the bricks from the volume
+ sub_vols = get_subvols(self.mnode, self.volname)
+ self.bricks_to_bring_offline = list(sub_vols['volume_subvols'][0])
+
+ # Write IO's
+ write_cmd = ("/usr/bin/env python %s create_files -f 1 "
+ "--base-file-name test_file --fixed-file-size 1k %s" %
+ (self.script_upload_path,
+ self.mounts[0].mountpoint))
+ ret, _, _ = g.run(self.mounts[0].client_system, write_cmd)
+
+ # Bring 1st set of brick offline(1 Data brick and arbiter brick)
+ for bricks in ((0, -1), (1, -1)):
+ down_bricks = []
+ for brick in bricks:
+ down_bricks.append(self.bricks_to_bring_offline[brick])
+ ret = bring_bricks_offline(self.volname, down_bricks)
+ self.assertTrue(ret, 'Failed to bring bricks {} offline'.
+ format(down_bricks))
+ proc = g.run_async(self.mounts[0].client_system, write_cmd)
+
+ # Validate I/O
+ self.assertTrue(
+ validate_io_procs([proc], self.mounts),
+ "IO failed on some of the clients"
+ )
+
+ # Bring bricks online
+ self._bring_bricks_online()
+
+ # Check volume is in split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertTrue(ret, "unable to create split-brain scenario")
+ g.log.info("Successfully created split brain scenario")
+
+ # Write IO's
+ proc2 = g.run_async(self.mounts[0].client_system, write_cmd)
+
+ # Validate I/O
+ self.assertFalse(
+ validate_io_procs([proc2], self.mounts),
+ "IO passed on split-brain"
+ )
+ g.log.info("Expected - IO's failed due to split-brain")
+
+ # Enable self-heal and cluster-quorum-type
+ options = {"self-heal-daemon": "on",
+ "cluster.quorum-type": "auto"}
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, ("Unable to set volume option %s for "
+ "volume %s" % (options, self.volname)))
+
+ # Write IO's
+ proc3 = g.run_async(self.mounts[0].client_system, write_cmd)
+
+ # Validate I/O
+ self.assertFalse(
+ validate_io_procs([proc3], self.mounts),
+ "IO passed on split-brain"
+ )
+ g.log.info("Expected - IO's failed due to split-brain")
diff --git a/tests/functional/arbiter/test_verify_metadata_and_data_heal.py b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py
new file mode 100644
index 000000000..d48e36e73
--- /dev/null
+++ b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py
@@ -0,0 +1,297 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ get_online_bricks_list)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_libs import (
+ is_heal_complete, is_volume_in_split_brain, monitor_heal_completion,
+ wait_for_self_heal_daemons_to_be_online)
+from glustolibs.gluster.heal_ops import (disable_self_heal_daemon,
+ enable_self_heal_daemon, trigger_heal)
+from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal,
+ del_user, group_add, group_del)
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.io.utils import list_all_files_and_dirs_mounts
+
+
+@runs_on([['arbiter', 'replicated'], ['glusterfs']])
+class TestMetadataAndDataHeal(GlusterBaseClass):
+ '''Description: Verify shd heals files after performing metadata and data
+ operations while a brick was down'''
+ def _dac_helper(self, host, option):
+ '''Helper for creating, deleting users and groups'''
+
+ # Permission/Ownership changes required only for `test_metadata..`
+ # tests, using random group and usernames
+ if 'metadata' not in self.test_dir:
+ return
+
+ if option == 'create':
+ # Groups
+ for group in ('qa_func', 'qa_system'):
+ if not group_add(host, group):
+ raise ExecutionError('Unable to {} group {} on '
+ '{}'.format(option, group, host))
+
+ # User
+ if not add_user(host, 'qa_all', group='qa_func'):
+ raise ExecutionError('Unable to {} user {} under {} on '
+ '{}'.format(option, 'qa_all', 'qa_func',
+ host))
+ elif option == 'delete':
+ # Groups
+ for group in ('qa_func', 'qa_system'):
+ if not group_del(host, group):
+ raise ExecutionError('Unable to {} group {} on '
+ '{}'.format(option, group, host))
+
+ # User
+ if not del_user(host, 'qa_all'):
+ raise ExecutionError('Unable to {} user on {}'.format(
+ option, host))
+
+ def setUp(self):
+ self.get_super_method(self, 'setUp')()
+
+ # A single mount is enough for all the tests
+ self.mounts = self.mounts[0:1]
+ self.client = self.mounts[0].client_system
+
+ # Use testcase name as test directory
+ self.test_dir = self.id().split('.')[-1]
+ self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir
+
+ if not self.setup_volume_and_mount_volume(mounts=self.mounts):
+ raise ExecutionError('Failed to setup and mount '
+ '{}'.format(self.volname))
+
+ # Crete group and user names required for the test
+ self._dac_helper(host=self.client, option='create')
+
+ def tearDown(self):
+ # Delete group and user names created as part of setup
+ self._dac_helper(host=self.client, option='delete')
+
+ if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts):
+ raise ExecutionError('Not able to unmount and cleanup '
+ '{}'.format(self.volname))
+
+ self.get_super_method(self, 'tearDown')()
+
+ def _perform_io_and_disable_self_heal(self):
+ '''Refactor of steps common to all tests: Perform IO, disable heal'''
+ ret = mkdir(self.client, self.fqpath)
+ self.assertTrue(ret,
+ 'Directory creation failed on {}'.format(self.client))
+ self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c '
+ # Create 6 dir's, 6 files and 6 files in each subdir with 10K data
+ file_io = ('''cd {0}; for i in `seq 1 6`;
+ do mkdir dir.$i; {1} 10K > file.$i;
+ for j in `seq 1 6`;
+ do {1} 10K > dir.$i/file.$j; done;
+ done;'''.format(self.fqpath, self.io_cmd))
+ ret, _, err = g.run(self.client, file_io)
+ self.assertEqual(ret, 0, 'Unable to create directories and data files')
+ self.assertFalse(err, '{0} failed with {1}'.format(file_io, err))
+
+ # Disable self heal deamon
+ self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname),
+ 'Disabling self-heal-daemon falied')
+
+ def _perform_brick_ops_and_enable_self_heal(self, op_type):
+ '''Refactor of steps common to all tests: Brick down and perform
+ metadata/data operations'''
+ # First brick in the subvol will always be online and used for self
+ # heal, so make keys match brick index
+ self.op_cmd = {
+ # Metadata Operations (owner and permission changes)
+ 'metadata': {
+ 2:
+ '''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \
+ dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''',
+ 3:
+ '''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \
+ dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''',
+ # 4 - Will be used for final data consistency check
+ 4:
+ '''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \
+ dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''',
+ },
+ # Data Operations (append data to the files)
+ 'data': {
+ 2:
+ '''cd {0}; for i in `seq 1 3`;
+ do {1} 2K >> file.$i;
+ for j in `seq 1 3`;
+ do {1} 2K >> dir.$i/file.$j; done;
+ done;''',
+ 3:
+ '''cd {0}; for i in `seq 1 3`;
+ do {1} 3K >> file.$i;
+ for j in `seq 1 3`;
+ do {1} 3K >> dir.$i/file.$j; done;
+ done;''',
+ # 4 - Will be used for final data consistency check
+ 4:
+ '''cd {0}; for i in `seq 1 6`;
+ do {1} 4K >> file.$i;
+ for j in `seq 1 6`;
+ do {1} 4K >> dir.$i/file.$j; done;
+ done;''',
+ },
+ }
+ bricks = get_online_bricks_list(self.mnode, self.volname)
+ self.assertIsNotNone(bricks,
+ 'Not able to get list of bricks in the volume')
+
+ # Make first brick always online and start operations from second brick
+ for index, brick in enumerate(bricks[1:], start=2):
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, brick)
+ self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks))
+
+ # Perform metadata/data operation
+ cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd)
+ ret, _, err = g.run(self.client, cmd)
+ self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err))
+ self.assertFalse(err, '{0} failed with {1}'.format(cmd, err))
+
+ # Bring brick online
+ ret = bring_bricks_online(
+ self.mnode,
+ self.volname,
+ brick,
+ bring_bricks_online_methods='volume_start_force')
+
+ # Assert metadata/data operations resulted in pending heals
+ self.assertFalse(is_heal_complete(self.mnode, self.volname))
+
+ # Enable and wait self heal daemon to be online
+ self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname),
+ 'Enabling self heal daemon failed')
+ self.assertTrue(
+ wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname),
+ 'Not all self heal daemons are online')
+
+ def _validate_heal_completion_and_arequal(self, op_type):
+ '''Refactor of steps common to all tests: Validate heal from heal
+ commands, verify arequal, perform IO and verify arequal after IO'''
+
+ # Validate heal completion
+ self.assertTrue(monitor_heal_completion(self.mnode, self.volname),
+ 'Self heal is not completed within timeout')
+ self.assertFalse(
+ is_volume_in_split_brain(self.mnode, self.volname),
+ 'Volume is in split brain even after heal completion')
+
+ subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+ self.assertTrue(subvols, 'Not able to get list of subvols')
+ arbiter = self.volume_type.find('arbiter') >= 0
+ stop = len(subvols[0]) - 1 if arbiter else len(subvols[0])
+
+ # Validate arequal
+ self._validate_arequal_and_perform_lookup(subvols, stop)
+
+ # Perform some additional metadata/data operations
+ cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd)
+ ret, _, err = g.run(self.client, cmd)
+ self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err))
+ self.assertFalse(err, '{0} failed with {1}'.format(cmd, err))
+
+ # Validate arequal after additional operations
+ self._validate_arequal_and_perform_lookup(subvols, stop)
+
+ def _validate_arequal_and_perform_lookup(self, subvols, stop):
+ '''Refactor of steps common to all tests: Validate arequal from bricks
+ backend and perform a lookup of all files from mount'''
+ for subvol in subvols:
+ ret, arequal = collect_bricks_arequal(subvol[0:stop])
+ self.assertTrue(
+ ret, 'Unable to get `arequal` checksum on '
+ '{}'.format(subvol[0:stop]))
+ self.assertEqual(
+ len(set(arequal)), 1, 'Mismatch of `arequal` '
+ 'checksum among {} is identified'.format(subvol[0:stop]))
+
+ # Perform a lookup of all files and directories on mounts
+ self.assertTrue(list_all_files_and_dirs_mounts(self.mounts),
+ 'Failed to list all files and dirs from mount')
+
+ def test_metadata_heal_from_shd(self):
+ '''Description: Verify files heal after switching on `self-heal-daemon`
+ when metadata operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform
+ metadata operations
+ 3. Set `self-heal-daemon` to `on` and wait for heal completion
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'metadata'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info('Pass: Verification of metadata heal after switching on '
+ '`self heal daemon` is complete')
+
+ def test_metadata_heal_from_heal_cmd(self):
+ '''Description: Verify files heal after triggering heal command when
+ metadata operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform
+ metadata operations
+ 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal`
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'metadata'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+
+ # Invoke `glfsheal`
+ self.assertTrue(trigger_heal(self.mnode, self.volname),
+ 'Unable to trigger index heal on the volume')
+
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info(
+ 'Pass: Verification of metadata heal via `glfsheal` is complete')
+
+ def test_data_heal_from_shd(self):
+ '''Description: Verify files heal after triggering heal command when
+ data operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data
+ operations
+ 3. Set `self-heal-daemon` to `on` and wait for heal completion
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'data'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info('Pass: Verification of data heal after switching on '
+ '`self heal daemon` is complete')