diff options
Diffstat (limited to 'tests/functional/arbiter')
23 files changed, 2357 insertions, 133 deletions
diff --git a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py index 947c6eef7..24c014502 100755 --- a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py +++ b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py @@ -22,6 +22,12 @@ from glustolibs.gluster.volume_libs import ( expand_volume, wait_for_volume_process_to_be_online,
verify_all_process_of_volume_are_online, shrink_volume, get_subvols)
from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.io.utils import run_linux_untar
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete,
+ is_volume_in_split_brain)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_ops import trigger_heal
@runs_on([['replicated', 'distributed-replicated'],
@@ -37,22 +43,29 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): # Calling GlusterBaseClass setUp
self.get_super_method(self, 'setUp')()
+ # Set I/O flag to false
+ self.is_io_running = False
+
# Setup Volume
- g.log.info("Starting to Setup Volume")
- ret = self.setup_volume()
+ g.log.info("Starting to Setup and Mount Volume")
+ # Creating Volume and mounting the volume
+ ret = self.setup_volume_and_mount_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Setup_Volume")
- g.log.info("Successful in Setup Volume")
+ raise ExecutionError("Volume creation or mount failed: %s"
+ % self.volname)
self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
def tearDown(self):
- # Cleanup Volume
- g.log.info("Starting to Unmount Volume and Cleanup Volume")
- ret = self.cleanup_volume()
+ # Wait for I/O if not completed
+ if self.is_io_running:
+ if not self._wait_for_untar_completion():
+ g.log.error("I/O failed to stop on clients")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Cleanup Volume")
- g.log.info("Successful Cleanup Volume")
+ raise ExecutionError("Unable to delete volume % s" % self.volname)
# Calling GlusterBaseClass tearDown
self.get_super_method(self, 'tearDown')()
@@ -67,10 +80,22 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): g.log.info('Clearing brick %s is successful', brick)
g.log.info('Clearing for all brick is successful')
- def test_replicated_to_arbiter_volume(self):
+ def _wait_for_untar_completion(self):
+ """Wait for untar to complete"""
+ has_process_stopped = []
+ for proc in self.io_process:
+ try:
+ ret, _, _ = proc.async_communicate()
+ if not ret:
+ has_process_stopped.append(False)
+ has_process_stopped.append(True)
+ except ValueError:
+ has_process_stopped.append(True)
+ return all(has_process_stopped)
+
+ def _convert_replicated_to_arbiter_volume(self):
"""
- Description:-
- Reduce the replica count from replica 3 to arbiter
+ Helper module to convert replicated to arbiter volume.
"""
# pylint: disable=too-many-statements
# Remove brick to reduce the replica count from replica 3
@@ -99,7 +124,7 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): g.log.info("Adding bricks to convert to Arbiter Volume")
replica_arbiter = {'replica_count': 1, 'arbiter_count': 1}
ret = expand_volume(self.mnode, self.volname, self.servers,
- self.all_servers_info, add_to_hot_tier=False,
+ self.all_servers_info, force=True,
**replica_arbiter)
self.assertTrue(ret, "Failed to expand the volume %s" % self.volname)
g.log.info("Changing volume to arbiter volume is successful %s",
@@ -119,3 +144,70 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): self.assertTrue(ret, "Volume %s : All process are not online"
% self.volname)
g.log.info("Volume %s : All process are online", self.volname)
+
+ def test_replicated_to_arbiter_volume(self):
+ """
+ Description:-
+ Reduce the replica count from replica 3 to arbiter
+ """
+ # pylint: disable=too-many-statements
+ self._convert_replicated_to_arbiter_volume()
+
+ def test_replica_to_arbiter_volume_with_io(self):
+ """
+ Description: Replica 3 to arbiter conversion with ongoing IO's
+
+ Steps :
+ 1) Create a replica 3 volume and start volume.
+ 2) Set client side self heal off.
+ 3) Fuse mount the volume.
+ 4) Create directory dir1 and write data.
+ Example: untar linux tar from the client into the dir1
+ 5) When IO's is running, execute remove-brick command,
+ and convert replica 3 to replica 2 volume
+ 6) Execute add-brick command and convert to arbiter volume,
+ provide the path of new arbiter brick.
+ 7) Issue gluster volume heal.
+ 8) Heal should be completed with no files in split-brain.
+ """
+
+ # pylint: disable=too-many-statements
+ # Create a dir to start untar
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar
+ self.io_process = run_linux_untar(self.clients[0],
+ self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.is_io_running = True
+
+ # Convert relicated to arbiter volume
+ self._convert_replicated_to_arbiter_volume()
+
+ # Wait for IO to complete.
+ ret = self._wait_for_untar_completion()
+ self.assertFalse(ret, "IO didn't complete or failed on client")
+ self.is_io_running = False
+
+ # Start healing
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not started')
+ g.log.info('Healing is started')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
diff --git a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py index 9dbaa74fc..8d7304b0b 100755 --- a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py +++ b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py @@ -145,10 +145,7 @@ class TestRmrfMount(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Killing one brick from the volume set g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline) diff --git a/tests/functional/arbiter/test_afr_read_write.py b/tests/functional/arbiter/test_afr_read_write.py new file mode 100644 index 000000000..09e6a3a2a --- /dev/null +++ b/tests/functional/arbiter/test_afr_read_write.py @@ -0,0 +1,192 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestAfrReadWrite(GlusterBaseClass): + + """ + Description: + Arbiter test writes and reads from a file + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_online_heal(self, mnode, volname, bricks_list): + """ + Bring bricks online and monitor heal completion + """ + # Bring bricks online + ret = bring_bricks_online( + mnode, volname, bricks_list, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks online') + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(mnode, volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(mnode, volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (volname))) + g.log.info("Volume %s : All process are online", volname) + + # Monitor heal completion + ret = monitor_heal_completion(mnode, volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(mnode, volname) + self.assertFalse(ret, 'Volume is in split-brain state') + + def test_afr_read_write(self): + """ + Test read and write of file + Description: + - Get the bricks from the volume + - Creating directory test_write_and_read_file + - Write from 1st client + - Read from 2nd client + - Select brick to bring offline + - Bring brick offline + - Validating IO's on client1 + - Validating IO's on client2 + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Bring 2nd brick offline + - Check if brick is offline + - Write from 1st client + - Read from 2nd client + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Creating directory test_write_and_read_file + ret = mkdir(self.mounts[0].client_system, + "{}/test_write_and_read_file" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_write_and_read_file' on %s created " + "successfully", self.mounts[0]) + + # Write from 1st client + cmd_to_write = ( + 'cd %s/test_write_and_read_file ; for i in `seq 1 5000` ;' + 'do echo -e "Date:`date`\n" >> test_file ;echo -e "' + '`cal`\n" >> test_file ; done ; cd ..' + % self.mounts[0].mountpoint) + proc1 = g.run_async(self.mounts[0].client_system, + cmd_to_write) + + # Read from 2nd client + cmd = ('cd %s/ ;for i in {1..30};' + 'do cat test_write_and_read_file/test_file;done' + % self.mounts[1].mountpoint) + proc2 = g.run_async(self.mounts[1].client_system, cmd) + + # Bring brick offline + bricks_to_bring_offline = sample(bricks_list, 2) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[0]) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + # Check brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_to_bring_offline[0]]) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline[0])) + + # Validating IO's + for proc, mount in zip([proc1, proc2], self.mounts): + ret = validate_io_procs([proc], mount) + self.assertTrue(ret, "IO failed on client") + g.log.info("Successfully validated all IO's") + + self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list) + + # Bring down second brick + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[1]) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline[1])) + + # Check if brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_to_bring_offline[1]]) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline[1])) + + # Write from 1st client + ret, _, _ = g.run(self.mounts[0].client_system, cmd_to_write) + self.assertEqual(ret, 0, "Failed to write to file") + g.log.info("Successfully written to file") + + # Read from 2nd client + cmd = ('cd %s/ ;cat test_write_and_read_file/test_file' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to read file on mountpoint") + g.log.info("Successfully read file on mountpoint") + + self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list) diff --git a/tests/functional/arbiter/test_brick_down_cyclic.py b/tests/functional/arbiter/test_brick_down_cyclic.py new file mode 100644 index 000000000..8639a4dc5 --- /dev/null +++ b/tests/functional/arbiter/test_brick_down_cyclic.py @@ -0,0 +1,140 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +import time +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks, + are_bricks_online) +from glustolibs.gluster.heal_libs import ( + monitor_heal_completion, are_all_self_heal_daemons_are_online) + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestBrickDownHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @classmethod + def tearDownClass(cls): + """ + Cleanup Volume + """ + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + + cls.get_super_method(cls, 'tearDownClass')() + + def test_brick_down_heal(self): + """ + - Run IO's from client on a single file + - Now bring down bricks in cyclic order + - kill brick 1, sleep for 5 seconds, bring brick 1 up, wait for 10s + - Now repeat step3 for brick2 and brick 3 + - Repeat the cycle a few times + - Trigger heal, check for split brain using command + """ + # Write IO's + self.all_mounts_procs = [] + cmd = ("for i in `seq 1 10`;" + "do dd if=/dev/urandom of=%s/file$i bs=1K count=1;" + "done" % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd) + self.all_mounts_procs.append(proc) + + # Killing bricks in cyclic order + bricks_list = get_all_bricks(self.mnode, self.volname) + + # Total number of cyclic brick-down cycles to be executed + number_of_cycles = 0 + while number_of_cycles < 3: + number_of_cycles += 1 + for brick in bricks_list: + # Bring brick offline + g.log.info('Bringing bricks %s offline', brick) + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s offline" + % brick)) + + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, 'Bricks %s are not offline' % brick) + g.log.info('Bringing bricks %s offline is successful', brick) + + # Introducing 5 second sleep when brick is down + g.log.info("Waiting for 5 seconds, with ongoing IO while " + "brick %s is offline", brick) + ret = time.sleep(5) + + # Bring brick online + g.log.info('Bringing bricks %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s online " + % brick)) + g.log.info('Bricks %s are online', brick) + + # Introducing 10 second sleep when brick is up + g.log.info("Waiting for 10 seconds,when " + "brick %s is online", brick) + ret = time.sleep(10) + + # Check if bricks are online + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, 'Bricks %s are not online' % bricks_list) + g.log.info('Bricks %s are online', bricks_list) + + # Check daemons + g.log.info('Checking daemons...') + ret = are_all_self_heal_daemons_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Some of the self-heal Daemons are " + "offline")) + g.log.info('All self-heal Daemons are online') + + # Trigger self heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger heal on volume') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') diff --git a/tests/functional/arbiter/test_data_delete.py b/tests/functional/arbiter/test_data_delete.py new file mode 100644 index 000000000..4753efcbc --- /dev/null +++ b/tests/functional/arbiter/test_data_delete.py @@ -0,0 +1,110 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestDataDelete(GlusterBaseClass): + """ + Description: + Test data delete/rename on arbiter volume + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_data_delete(self): + """ + Test steps: + - Get brick list + - Create files and rename + - Check if brick path contains old files + - Delete files from mountpoint + - Check .glusterfs/indices/xattrop is empty + - Check if brickpath is empty + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Create files and rename + cmd = ('cd %s ;for i in `seq 1 100` ;do mkdir -pv directory$i;' + 'cd directory$i;dd if=/dev/urandom of=file$i bs=1M count=5;' + 'mv file$i renamed$i;done;' % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Fail: Not able to create files on " + "{}".format(self.mounts[0].mountpoint)) + g.log.info("Files created successfully and renamed") + + # Check if brickpath contains old files + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |grep file |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} contains old " + "file in node {}".format(brick_path, brick_node)) + g.log.info("Brick path contains renamed files") + + # Delete files from mountpoint + cmd = ('rm -rf -v %s/*' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete files") + g.log.info("Files deleted successfully for %s", self.mounts[0]) + + # Check .glusterfs/indices/xattrop is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s/.glusterfs/indices/xattrop/ | " + "grep -ve \"xattrop-\" | wc -l" % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), ".glusterfs/indices/" + "xattrop is not empty") + g.log.info("No pending heals on bricks") + + # Check if brickpath is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} is not empty " + "in node {}".format(brick_path, brick_node)) + g.log.info("Brick path is empty on all nodes") diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py index 17c2ba4d5..bbb30f271 100644 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py @@ -133,10 +133,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -230,7 +227,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py index 132b9df8a..0aa440af1 100755 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py @@ -161,10 +161,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -272,6 +269,7 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py index 82538d42a..f4f13931a 100755 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py @@ -132,10 +132,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -229,7 +226,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_daemon_off.py b/tests/functional/arbiter/test_data_self_heal_daemon_off.py index df2e58aa6..9faae85ca 100644 --- a/tests/functional/arbiter/test_data_self_heal_daemon_off.py +++ b/tests/functional/arbiter/test_data_self_heal_daemon_off.py @@ -164,10 +164,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) diff --git a/tests/functional/arbiter/test_entry_self_heal_heal_command.py b/tests/functional/arbiter/test_entry_self_heal_heal_command.py index ced2bc19c..64c6c2339 100644 --- a/tests/functional/arbiter/test_entry_self_heal_heal_command.py +++ b/tests/functional/arbiter/test_entry_self_heal_heal_command.py @@ -177,10 +177,8 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = ( + bricks_to_bring_offline_dict['volume_bricks']) # Bring brick offline g.log.info('Bringing bricks %s offline...', diff --git a/tests/functional/arbiter/test_gfid_self_heal.py b/tests/functional/arbiter/test_gfid_self_heal.py new file mode 100644 index 000000000..9ed4a8767 --- /dev/null +++ b/tests/functional/arbiter/test_gfid_self_heal.py @@ -0,0 +1,206 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestGFIDSelfHeal(GlusterBaseClass): + + """ + Description: + Arbiter Test cases related to GFID self heal + """ + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_gfid_self_heal(self): + """ + Test GFID self heal + Description: + - Creating directory test_compilation + - Write Deep directories and files + - Get arequal before getting bricks offline + - Select bricks to bring offline + - Bring brick offline + - Delete directory on mountpoint where data is writte + - Create the same directory and write same data + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_compilation + ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_gfid_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Write Deep directories and files + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Successfully created files on mountpoint") + count += 10 + + # Get arequal before getting bricks offline + ret, result_before_offline = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks offline ' + 'is %s', result_before_offline) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Delete directory on mountpoint where data is written + cmd = ('rm -rf -v %s/test_gfid_self_heal' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete directory") + g.log.info("Directory deleted successfully for %s", self.mounts[0]) + + # Create the same directory and write same data + ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_gfid_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Write the same files again + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Successfully created files on mountpoint") + count += 10 + + # Bring bricks online + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks online ' + 'is %s', result_after_online) diff --git a/tests/functional/arbiter/test_gluster_clone_heal.py b/tests/functional/arbiter/test_gluster_clone_heal.py new file mode 100644 index 000000000..94603c701 --- /dev/null +++ b/tests/functional/arbiter/test_gluster_clone_heal.py @@ -0,0 +1,209 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['arbiter', 'distributed-arbiter', + 'replicated', 'distributed-replicated'], ['glusterfs']]) +class TestGlusterCloneHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to self heal + of data and hardlink + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_gluster_clone_heal(self): + """ + Test gluster compilation on mount point(Heal command) + - Creating directory test_compilation + - Compile gluster on mountpoint + - Select bricks to bring offline + - Bring brick offline + - Validate IO + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + - Compile gluster on mountpoint again + - Select bricks to bring offline + - Bring brick offline + - Validate IO + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_compilation + ret = mkdir(self.mounts[0].client_system, "{}/test_compilation" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_compilation' on %s created " + "successfully", self.mounts[0]) + + # Compile gluster on mountpoint + cmd = ("cd %s/test_compilation ; rm -rf glusterfs; git clone" + " git://github.com/gluster/glusterfs.git ; cd glusterfs ;" + " ./autogen.sh ;./configure CFLAGS='-g3 -O0 -DDEBUG'; make ;" + " cd ../..;" % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info("Arequal of mountpoint %s", result_after_online) + + # Compile gluster on mountpoint again + proc1 = g.run_async(self.mounts[0].client_system, cmd) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + # Validate IO + self.assertTrue( + validate_io_procs([proc1], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info("Arequal of mountpoint %s", result_after_online) diff --git a/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py new file mode 100755 index 000000000..8e11af6e4 --- /dev/null +++ b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py @@ -0,0 +1,202 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain, + is_shd_daemon_running) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (collect_mounts_arequal, + validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) +from glustolibs.gluster.gluster_init import (start_glusterd, + stop_glusterd) +from glustolibs.misc.misc_libs import kill_process + + +@runs_on([['arbiter', 'distributed-arbiter'], + ['glusterfs', 'nfs']]) +class TestArbiterSelfHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to + healing in default configuration of the volume + """ + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_heal_full_after_deleting_files(self): + """ + - Create IO + - Calculate arequal from mount + - kill glusterd process and glustershd process on arbiter nodes + - Delete data from backend from the arbiter nodes + - Start glusterd process and force start the volume + to bring the processes online + - Check if heal is completed + - Check for split-brain + - Calculate arequal checksum and compare it + """ + # pylint: disable=too-many-locals,too-many-statements + # Creating files on client side + for mount_obj in self.mounts: + g.log.info("Generating data for %s:%s", + mount_obj.client_system, mount_obj.mountpoint) + # Create dirs with file + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -n 2 -f 20 %s" + % (self.script_upload_path, mount_obj.mountpoint)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Get arequal before killing gluster processes on arbiter node + ret, result_before_killing_procs = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Kill glusterd process and glustershd process on arbiter node + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + for subvol in subvols: + arbiter = subvol[-1] + node, brick_path = arbiter.split(':') + # Stop glusterd + ret = stop_glusterd(node) + self.assertTrue(ret, "Failed to stop the glusterd on arbiter node") + # Stop glustershd + ret = kill_process(node, "glustershd") + if not ret: + # Validate glustershd process is not running + self.assertFalse( + is_shd_daemon_running(self.mnode, node, self.volname), + "The glustershd process is still running.") + g.log.info('Killed glusterd and glustershd for all arbiter ' + 'brick successfully') + + # Delete data from backend from the arbiter node + for subvol in subvols: + arbiter = subvol[-1] + # Clearing the arbiter bricks + node, brick_path = arbiter.split(':') + ret, _, err = g.run(node, 'rm -rf %s/*' % brick_path) + self.assertFalse( + ret, err) + g.log.info('Clearing for all arbiter brick is successful') + + # Start glusterd process on each arbiter + for subvol in subvols: + arbiter = subvol[-1] + node, brick_path = arbiter.split(':') + ret = start_glusterd(node) + self.assertTrue( + ret, "Failed to start glusterd on the arbiter node") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after healing + ret, result_after_healing = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after getting bricks online ' + 'is successful') + + # Comparing arequals before before killing arbiter processes + # and after healing + self.assertEqual( + result_before_killing_procs, result_after_healing, + 'Arequals arequals before before killing arbiter ' + 'processes and after healing are not equal') + + g.log.info('Arequals before killing arbiter ' + 'processes and after healing are equal') diff --git a/tests/functional/arbiter/test_metadata_self_heal.py b/tests/functional/arbiter/test_metadata_self_heal.py index 81a098fff..0b2708438 100755 --- a/tests/functional/arbiter/test_metadata_self_heal.py +++ b/tests/functional/arbiter/test_metadata_self_heal.py @@ -207,10 +207,7 @@ class TestMetadataSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -332,8 +329,9 @@ class TestMetadataSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') @@ -356,11 +354,6 @@ class TestMetadataSelfHeal(GlusterBaseClass): ret, out, err = g.run(node, command) file_list = out.split() - g.log.info('Checking for user and group on %s...', node) - conn = g.rpyc_get_connection(node) - if conn is None: - raise Exception("Unable to get connection on node %s" % node) - for file_name in file_list: file_to_check = '%s/%s/%s' % (nodes_to_check[node], test_meta_data_self_heal_folder, @@ -368,26 +361,30 @@ class TestMetadataSelfHeal(GlusterBaseClass): g.log.info('Checking for permissions, user and group for %s', file_name) + # Check for permissions - permissions = oct( - conn.modules.os.stat(file_to_check).st_mode)[-3:] - self.assertEqual(permissions, '444', + cmd = ("stat -c '%a %n' {} | awk '{{print $1}}'" + .format(file_to_check)) + ret, permissions, _ = g.run(node, cmd) + self.assertEqual(permissions.split('\n')[0], '444', 'Permissions %s is not equal to 444' % permissions) g.log.info("Permissions are '444' for %s", file_name) # Check for user - uid = conn.modules.os.stat(file_to_check).st_uid - username = conn.modules.pwd.getpwuid(uid).pw_name - self.assertEqual(username, 'qa', 'User %s is not equal qa' + cmd = ("ls -ld {} | awk '{{print $3}}'" + .format(file_to_check)) + ret, username, _ = g.run(node, cmd) + self.assertEqual(username.split('\n')[0], + 'qa', 'User %s is not equal qa' % username) g.log.info("User is 'qa' for %s", file_name) # Check for group - gid = conn.modules.os.stat(file_to_check).st_gid - groupname = conn.modules.grp.getgrgid(gid).gr_name - self.assertEqual(groupname, 'qa', 'Group %s is not equal qa' + cmd = ("ls -ld {} | awk '{{print $4}}'" + .format(file_to_check)) + ret, groupname, _ = g.run(node, cmd) + self.assertEqual(groupname.split('\n')[0], + 'qa', 'Group %s is not equal qa' % groupname) g.log.info("Group is 'qa' for %s", file_name) - - g.rpyc_close_connection(host=node) diff --git a/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py new file mode 100644 index 000000000..8e4df5e9f --- /dev/null +++ b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py @@ -0,0 +1,244 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import os +import copy +from socket import gethostbyname +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.heal_ops import get_heal_info_summary +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.volume_ops import get_volume_info +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.glusterfile import get_pathinfo +from glustolibs.gluster.lib_utils import (collect_bricks_arequal, + add_user, del_user) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['replicated', 'distributed-replicated', 'arbiter', + 'distributed-arbiter'], + ['glusterfs']]) +class TestMetadataSelfHealOpenfd(GlusterBaseClass): + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.user = "qa" + self.nodes = [] + self.nodes = copy.deepcopy(self.servers) + self.nodes.append(self.clients[0]) + + # Create user for changing ownership + for node in self.nodes: + ret = add_user(node, self.user) + self.assertTrue(ret, "Failed to create user") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup and Mount_Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + for node in self.nodes: + del_user(node, self.user) + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + def _verify_stat_info(self, nodes_to_check, test_file): + """ + Helper method to verify stat on all bricks and client. + """ + for node in nodes_to_check: + filepath = nodes_to_check[node] + "/" + test_file + stat_dict = get_file_stat(node, filepath) + self.assertIsNotNone(stat_dict, "stat on {} failed" + .format(test_file)) + self.assertEqual(stat_dict['username'], self.user, + "Expected qa but found {}" + .format(stat_dict['username'])) + self.assertEqual(stat_dict['groupname'], self.user, + "Expected gid qa but found {}" + .format(stat_dict['groupname'])) + self.assertEqual(stat_dict['access'], '777', + "Expected permission 777 but found {}" + .format(stat_dict['access'])) + + def test_metadata_self_heal_on_open_fd(self): + """ + Description: Pro-active metadata self heal on open fd + + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Create test executable on volume mount. + 4) While test execution is in progress, bring down brick1. + 5) From mount point, change ownership, permission, group id of + the test file. + 6) While test execution is in progress, bring back brick1 online. + 7) Do stat on the test file to check ownership, permission, + group id on mount point and on bricks + 8) Stop test execution. + 9) Do stat on the test file to check ownership, permission, + group id on mount point and on bricks. + 10) There should be no pending heals in the heal info command. + 11) There should be no split-brain. + 12) Calculate arequal of the bricks and mount point and it + should be same. + """ + # pylint: disable=too-many-statements,too-many-locals + # pylint: disable=too-many-branches + bricks_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(bricks_list, 'Brick list is None') + client = self.clients[0] + + # Create test executable file on mount point + m_point = self.mounts[0].mountpoint + test_file = "testfile.sh" + cmd = ("echo 'while true; do echo 'Press CTRL+C to stop execution';" + " done' >> {}/{}".format(m_point, test_file)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to create test file") + + # Execute the test file + cmd = "cd {}; sh {}".format(m_point, test_file) + g.run_async(client, cmd) + + # Get pid of the test file + _cmd = "ps -aux | grep -v grep | grep testfile.sh | awk '{print $2}'" + ret, out, _ = g.run(client, _cmd) + self.assertEqual(ret, 0, "Failed to get pid of test file execution") + + # Bring brick1 offline + ret = bring_bricks_offline(self.volname, [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring bricks {} ' + 'offline'.format(bricks_list[1])) + + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Bricks {} are not ' + 'offline'.format(bricks_list[1])) + + # change uid, gid and permission from client + cmd = "chown {} {}/{}".format(self.user, m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chown failed") + + cmd = "chgrp {} {}/{}".format(self.user, m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chgrp failed") + + cmd = "chmod 777 {}/{}".format(m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chown failed") + + # Bring brick1 online + ret = bring_bricks_online(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring bricks {} online' + .format(bricks_list[1])) + + ret = get_pathinfo(client, "{}/{}" + .format(m_point, test_file)) + self.assertIsNotNone(ret, "Unable to get " + "trusted.glusterfs.pathinfo of file") + nodes_to_check = {} + bricks_list = [] + for brick in ret['brickdir_paths']: + node, brick_path = brick.split(':') + if node[0:2].isdigit(): + nodes_to_check[node] = os.path.dirname(brick_path) + path = node + ":" + os.path.dirname(brick_path) + else: + nodes_to_check[gethostbyname(node)] = (os.path.dirname( + brick_path)) + path = gethostbyname(node) + ":" + os.path.dirname(brick_path) + bricks_list.append(path) + nodes_to_check[client] = m_point + + # Verify that the changes are successful on bricks and client + self._verify_stat_info(nodes_to_check, test_file) + + # Kill the test executable file + for pid in out.split('\n')[:-1]: + cmd = "kill -s 9 {}".format(pid) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to kill test file execution") + + # Verify that the changes are successful on bricks and client + self._verify_stat_info(nodes_to_check, test_file) + + # Verify there are no pending heals + heal_info = get_heal_info_summary(self.mnode, self.volname) + self.assertIsNotNone(heal_info, 'Unable to get heal info') + for brick in bricks_list: + self.assertEqual(int(heal_info[brick]['numberOfEntries']), + 0, ("Pending heal on brick {} ".format(brick))) + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal for mount + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Collecting data bricks + vol_info = get_volume_info(self.mnode, self.volname) + self.assertIsNotNone(vol_info, 'Unable to get volume info') + data_brick_list = [] + for brick in bricks_list: + for brick_info in vol_info[self.volname]["bricks"]["brick"]: + if brick_info["name"] == brick: + if brick_info["isArbiter"] == "0": + data_brick_list.append(brick) + bricks_list = data_brick_list + + # Get arequal on bricks and compare with mount_point_total + # It should be the same + arbiter = self.volume_type.find('arbiter') >= 0 + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + for subvol in subvols: + subvol = [i for i in subvol if i in bricks_list] + if subvol: + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue(ret, 'Unable to get arequal checksum ' + 'on {}'.format(subvol[0:stop])) + self.assertEqual(len(set(arequal)), 1, 'Mismatch of arequal ' + 'checksum among {} is ' + 'identified'.format(subvol[0:stop])) + brick_total = arequal[-1].splitlines()[-1].split(':')[-1] + self.assertEqual(brick_total, mount_point_total, + "Arequals for mountpoint and {} " + "are not equal".format(subvol[0:stop])) diff --git a/tests/functional/arbiter/test_mount_point_while_deleting_files.py b/tests/functional/arbiter/test_mount_point_while_deleting_files.py index 6acb8e0c8..68f880663 100755 --- a/tests/functional/arbiter/test_mount_point_while_deleting_files.py +++ b/tests/functional/arbiter/test_mount_point_while_deleting_files.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -34,8 +34,7 @@ from glustolibs.gluster.mount_ops import (mount_volume, from glustolibs.misc.misc_libs import upload_scripts -@runs_on([['arbiter'], - ['glusterfs']]) +@runs_on([['arbiter'], ['glusterfs']]) class VolumeSetDataSelfHealTests(GlusterBaseClass): @classmethod def setUpClass(cls): @@ -57,6 +56,7 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): # Setup Volumes cls.volume_configs = [] cls.mounts_dict_list = [] + cls.client = cls.clients[0] # Define two replicated volumes for i in range(1, 3): @@ -67,24 +67,22 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): cls.volume_configs.append(volume_config) # Redefine mounts - for client in cls.all_clients_info.keys(): - mount = { - 'protocol': cls.mount_type, - 'server': cls.mnode, - 'volname': volume_config['name'], - 'client': cls.all_clients_info[client], - 'mountpoint': (os.path.join( - "/mnt", '_'.join([volume_config['name'], - cls.mount_type]))), - 'options': '' - } - cls.mounts_dict_list.append(mount) - - cls.mounts = create_mount_objs(cls.mounts_dict_list) + mount = { + 'protocol': cls.mount_type, + 'server': cls.mnode, + 'volname': volume_config['name'], + 'client': cls.all_clients_info[cls.client], + 'mountpoint': (os.path.join( + "/mnt", '_'.join([volume_config['name'], + cls.mount_type]))), + 'options': '' + } + cls.mounts_dict_list.append(mount) + + cls.mounts = create_mount_objs(cls.mounts_dict_list) # Create and mount volumes cls.mount_points = [] - cls.client = cls.clients[0] for volume_config in cls.volume_configs: # Setup volume @@ -146,39 +144,33 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): raise ExecutionError("Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ # umount all volumes - for mount_obj in cls.mounts: + for mount_point in self.mount_points: ret, _, _ = umount_volume( - mount_obj.client_system, mount_obj.mountpoint) + self.client, mount_point) if ret: raise ExecutionError( "Failed to umount on volume %s " - % cls.volname) + % self.volname) g.log.info("Successfully umounted %s on client %s", - cls.volname, mount_obj.client_system) - ret = rmdir(mount_obj.client_system, mount_obj.mountpoint) + self.volname, self.client) + ret = rmdir(self.client, mount_point) if not ret: raise ExecutionError( - ret, "Failed to remove directory mount directory.") + "Failed to remove directory mount directory.") g.log.info("Mount directory is removed successfully") # stopping all volumes - g.log.info("Starting to Cleanup all Volumes") - volume_list = get_volume_list(cls.mnode) + volume_list = get_volume_list(self.mnode) for volume in volume_list: - ret = cleanup_volume(cls.mnode, volume) + ret = cleanup_volume(self.mnode, volume) if not ret: raise ExecutionError("Failed to cleanup Volume %s" % volume) g.log.info("Volume: %s cleanup is done", volume) g.log.info("Successfully Cleanedup all Volumes") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_mount_point_not_go_to_rofs(self): """ @@ -218,10 +210,8 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): for volname in volume_list: bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = ( + bricks_to_bring_offline_dict['volume_bricks']) # bring bricks offline g.log.info("Going to bring down the brick process for %s", @@ -251,3 +241,4 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): self.assertTrue( validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") + self.io_validation_complete = True diff --git a/tests/functional/arbiter/test_self_heal_50k_files.py b/tests/functional/arbiter/test_self_heal_50k_files.py new file mode 100644 index 000000000..887959fa0 --- /dev/null +++ b/tests/functional/arbiter/test_self_heal_50k_files.py @@ -0,0 +1,140 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSelfHeal50kFiles(GlusterBaseClass): + """ + Description: + Arbiter self heal of 50k files + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_50k_files(self): + """ + Description: + - Select bricks to bring offline + - Bring brick offline + - Create 50k files + - Validate IO + - Bring bricks online + - Monitor heal + - Check for split-brain + - Validate IO + """ + # pylint: disable=too-many-statements,too-many-locals + # Select bricks to bring offline + bricks_to_bring_offline_dict = select_bricks_to_bring_offline( + self.mnode, self.volname) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Create 50k files + command = ("cd %s ; " + "for i in `seq 1 50000` ; " + "do dd if=/dev/urandom of=test.$i " + "bs=100k count=1 ; " + "done ;" + % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3000) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') diff --git a/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py index 06f1f42c0..da98c4b7f 100644 --- a/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py +++ b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py @@ -107,10 +107,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) @@ -211,9 +208,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and ' + 'after bringing bricks online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') @@ -242,8 +240,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals after bringing bricks online # and after adding bricks - self.assertItemsEqual(result_after_online, result_after_adding_bricks, - 'Checksums after bringing bricks online and ' - 'after adding bricks are not equal') + self.assertEqual(sorted(result_after_online), + sorted(result_after_adding_bricks), + 'Checksums after bringing bricks online' + 'and after adding bricks are not equal') g.log.info('Checksums after bringing bricks online and ' 'after adding bricks are equal') diff --git a/tests/functional/arbiter/test_self_heal_daemon.py b/tests/functional/arbiter/test_self_heal_daemon.py new file mode 100644 index 000000000..37470e41c --- /dev/null +++ b/tests/functional/arbiter/test_self_heal_daemon.py @@ -0,0 +1,256 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_file_stat + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSelfHealDaemon(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to self heal + of data and hardlink + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_daemon(self): + """ + Test Data-Self-Heal(heal command) + Description: + - Create directory test_hardlink_self_heal + - Create directory test_data_self_heal + - Creating files for hardlinks and data files + - Get arequal before getting bricks offline + - Select bricks to bring offline + - Bring brick offline + - Create hardlinks and append data to data files + - Bring brick online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + - Select bricks to bring offline + - Bring brick offline + - Truncate data to data files and verify hardlinks + - Bring brick online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal again + + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_hardlink_self_heal + ret = mkdir(self.mounts[0].client_system, "{}/test_hardlink_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_hardlink_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Creating directory test_data_self_heal + ret = mkdir(self.mounts[0].client_system, "{}/test_data_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory test_hardlink_self_heal on %s created " + "successfully", self.mounts[0]) + + # Creating files for hardlinks and data files + cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5`;' + 'do mkdir dir.$i ; for j in `seq 1 10` ; do dd if=' + '/dev/urandom of=dir.$i/file.$j bs=1k count=$j;done; done;' + 'cd ..' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' + 'do dd if=/dev/urandom of=file.$i bs=128K count=$i;done;' + 'cd ..' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Get arequal before getting bricks offline + ret, result_before_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal before getting bricks online-%s', + result_before_online) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Append data to data files and create hardlinks + cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' + 'do dd if=/dev/urandom of=file.$i bs=512K count=$i ; done ;' + 'cd .. ' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to modify data files.") + g.log.info("Successfully modified data files") + + cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5` ;do ' + 'for j in `seq 1 10`;do ln dir.$i/file.$j dir.$i/link_file.$j;' + 'done ; done ; cd .. ' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Hardlinks creation failed") + g.log.info("Successfully created hardlinks of files") + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks online ' + 'is %s', result_after_online) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'.format + (bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Truncate data to data files and verify hardlinks + cmd = ('cd %s/test_data_self_heal ; for i in `seq 1 100` ;' + 'do truncate -s $(( $i * 128)) file.$i ; done ; cd ..' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to truncate files") + g.log.info("Successfully truncated files on mountpoint") + + file_path = ('%s/test_hardlink_self_heal/dir{1..5}/file{1..10}' + % (self.mounts[0].mountpoint)) + link_path = ('%s/test_hardlink_self_heal/dir{1..5}/link_file{1..10}' + % (self.mounts[0].mountpoint)) + file_stat = get_file_stat(self.mounts[0], file_path) + link_stat = get_file_stat(self.mounts[0], link_path) + self.assertEqual(file_stat, link_stat, "Verification of hardlinks " + "failed") + g.log.info("Successfully verified hardlinks") + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') diff --git a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py index d8d93a9ee..0c49bcd8f 100755 --- a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py +++ b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py @@ -152,10 +152,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -179,9 +176,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks offline - self.assertItemsEqual(result_before_offline, result_after_offline, - 'Checksums before and after ' - 'bringing bricks offline are not equal') + self.assertEqual(sorted(result_before_offline), + sorted(result_after_offline), + 'Checksums before and after bringing bricks' + ' offline are not equal') g.log.info('Checksums before and after ' 'bringing bricks offline are equal') @@ -271,8 +269,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and after bringing bricks' + ' online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') diff --git a/tests/functional/arbiter/test_self_heal_symbolic_links.py b/tests/functional/arbiter/test_self_heal_symbolic_links.py index 6907f8805..655ea7564 100644 --- a/tests/functional/arbiter/test_self_heal_symbolic_links.py +++ b/tests/functional/arbiter/test_self_heal_symbolic_links.py @@ -169,10 +169,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -196,9 +193,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks offline - self.assertItemsEqual(result_before_offline, result_after_offline, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_offline), + sorted(result_after_offline), + 'Checksums before and after bringing bricks ' + 'online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') @@ -323,8 +321,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and after bringing bricks ' + 'online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') diff --git a/tests/functional/arbiter/test_split_brain.py b/tests/functional/arbiter/test_split_brain.py new file mode 100644 index 000000000..e2684be49 --- /dev/null +++ b/tests/functional/arbiter/test_split_brain.py @@ -0,0 +1,165 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + wait_for_bricks_to_be_online) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.volume_libs import get_subvols + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSplitBrain(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @classmethod + def tearDownClass(cls): + """ + Cleanup Volume + """ + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + + cls.get_super_method(cls, 'tearDownClass')() + + def _bring_bricks_online(self): + """ + Bring bricks online and monitor heal completion + """ + # Bring bricks online + ret = bring_bricks_online( + self.mnode, + self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks online') + + # Wait for volume processes to be online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + def test_split_brain(self): + + """ + Description: Create split-brain on files and check if IO's fail + - Disable self-heal and cluster-quorum-type + - Get the bricks from the volume + - Write IO and validate IO + - Bring 1st set of brick offline(1 Data brick and arbiter brick) + - Write IO and validate IO + - Bring 2nd set of bricks offline(1 Data brick and arbiter brick) + - Write IO and validate IO + - Check volume is in split-brain + - Write IO and validate IO - should fail + - Enable self-heal and cluster-quorum-type + - Write IO and validate IO - should fail + """ + # Disable self-heal and cluster-quorum-type + options = {"self-heal-daemon": "off", + "cluster.quorum-type": "none"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for " + "volume %s" % (options, self.volname))) + + # Get the bricks from the volume + sub_vols = get_subvols(self.mnode, self.volname) + self.bricks_to_bring_offline = list(sub_vols['volume_subvols'][0]) + + # Write IO's + write_cmd = ("/usr/bin/env python %s create_files -f 1 " + "--base-file-name test_file --fixed-file-size 1k %s" % + (self.script_upload_path, + self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, write_cmd) + + # Bring 1st set of brick offline(1 Data brick and arbiter brick) + for bricks in ((0, -1), (1, -1)): + down_bricks = [] + for brick in bricks: + down_bricks.append(self.bricks_to_bring_offline[brick]) + ret = bring_bricks_offline(self.volname, down_bricks) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(down_bricks)) + proc = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertTrue( + validate_io_procs([proc], self.mounts), + "IO failed on some of the clients" + ) + + # Bring bricks online + self._bring_bricks_online() + + # Check volume is in split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertTrue(ret, "unable to create split-brain scenario") + g.log.info("Successfully created split brain scenario") + + # Write IO's + proc2 = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertFalse( + validate_io_procs([proc2], self.mounts), + "IO passed on split-brain" + ) + g.log.info("Expected - IO's failed due to split-brain") + + # Enable self-heal and cluster-quorum-type + options = {"self-heal-daemon": "on", + "cluster.quorum-type": "auto"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for " + "volume %s" % (options, self.volname))) + + # Write IO's + proc3 = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertFalse( + validate_io_procs([proc3], self.mounts), + "IO passed on split-brain" + ) + g.log.info("Expected - IO's failed due to split-brain") diff --git a/tests/functional/arbiter/test_verify_metadata_and_data_heal.py b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py new file mode 100644 index 000000000..d48e36e73 --- /dev/null +++ b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py @@ -0,0 +1,297 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import ( + is_heal_complete, is_volume_in_split_brain, monitor_heal_completion, + wait_for_self_heal_daemons_to_be_online) +from glustolibs.gluster.heal_ops import (disable_self_heal_daemon, + enable_self_heal_daemon, trigger_heal) +from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal, + del_user, group_add, group_del) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import list_all_files_and_dirs_mounts + + +@runs_on([['arbiter', 'replicated'], ['glusterfs']]) +class TestMetadataAndDataHeal(GlusterBaseClass): + '''Description: Verify shd heals files after performing metadata and data + operations while a brick was down''' + def _dac_helper(self, host, option): + '''Helper for creating, deleting users and groups''' + + # Permission/Ownership changes required only for `test_metadata..` + # tests, using random group and usernames + if 'metadata' not in self.test_dir: + return + + if option == 'create': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_add(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not add_user(host, 'qa_all', group='qa_func'): + raise ExecutionError('Unable to {} user {} under {} on ' + '{}'.format(option, 'qa_all', 'qa_func', + host)) + elif option == 'delete': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_del(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not del_user(host, 'qa_all'): + raise ExecutionError('Unable to {} user on {}'.format( + option, host)) + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = self.mounts[0:1] + self.client = self.mounts[0].client_system + + # Use testcase name as test directory + self.test_dir = self.id().split('.')[-1] + self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + # Crete group and user names required for the test + self._dac_helper(host=self.client, option='create') + + def tearDown(self): + # Delete group and user names created as part of setup + self._dac_helper(host=self.client, option='delete') + + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + + self.get_super_method(self, 'tearDown')() + + def _perform_io_and_disable_self_heal(self): + '''Refactor of steps common to all tests: Perform IO, disable heal''' + ret = mkdir(self.client, self.fqpath) + self.assertTrue(ret, + 'Directory creation failed on {}'.format(self.client)) + self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + # Create 6 dir's, 6 files and 6 files in each subdir with 10K data + file_io = ('''cd {0}; for i in `seq 1 6`; + do mkdir dir.$i; {1} 10K > file.$i; + for j in `seq 1 6`; + do {1} 10K > dir.$i/file.$j; done; + done;'''.format(self.fqpath, self.io_cmd)) + ret, _, err = g.run(self.client, file_io) + self.assertEqual(ret, 0, 'Unable to create directories and data files') + self.assertFalse(err, '{0} failed with {1}'.format(file_io, err)) + + # Disable self heal deamon + self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname), + 'Disabling self-heal-daemon falied') + + def _perform_brick_ops_and_enable_self_heal(self, op_type): + '''Refactor of steps common to all tests: Brick down and perform + metadata/data operations''' + # First brick in the subvol will always be online and used for self + # heal, so make keys match brick index + self.op_cmd = { + # Metadata Operations (owner and permission changes) + 'metadata': { + 2: + '''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \ + dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''', + 3: + '''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + }, + # Data Operations (append data to the files) + 'data': { + 2: + '''cd {0}; for i in `seq 1 3`; + do {1} 2K >> file.$i; + for j in `seq 1 3`; + do {1} 2K >> dir.$i/file.$j; done; + done;''', + 3: + '''cd {0}; for i in `seq 1 3`; + do {1} 3K >> file.$i; + for j in `seq 1 3`; + do {1} 3K >> dir.$i/file.$j; done; + done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; + do {1} 4K >> file.$i; + for j in `seq 1 6`; + do {1} 4K >> dir.$i/file.$j; done; + done;''', + }, + } + bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(bricks, + 'Not able to get list of bricks in the volume') + + # Make first brick always online and start operations from second brick + for index, brick in enumerate(bricks[1:], start=2): + + # Bring brick offline + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + # Perform metadata/data operation + cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Bring brick online + ret = bring_bricks_online( + self.mnode, + self.volname, + brick, + bring_bricks_online_methods='volume_start_force') + + # Assert metadata/data operations resulted in pending heals + self.assertFalse(is_heal_complete(self.mnode, self.volname)) + + # Enable and wait self heal daemon to be online + self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname), + 'Enabling self heal daemon failed') + self.assertTrue( + wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname), + 'Not all self heal daemons are online') + + def _validate_heal_completion_and_arequal(self, op_type): + '''Refactor of steps common to all tests: Validate heal from heal + commands, verify arequal, perform IO and verify arequal after IO''' + + # Validate heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + 'Self heal is not completed within timeout') + self.assertFalse( + is_volume_in_split_brain(self.mnode, self.volname), + 'Volume is in split brain even after heal completion') + + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + self.assertTrue(subvols, 'Not able to get list of subvols') + arbiter = self.volume_type.find('arbiter') >= 0 + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + + # Validate arequal + self._validate_arequal_and_perform_lookup(subvols, stop) + + # Perform some additional metadata/data operations + cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Validate arequal after additional operations + self._validate_arequal_and_perform_lookup(subvols, stop) + + def _validate_arequal_and_perform_lookup(self, subvols, stop): + '''Refactor of steps common to all tests: Validate arequal from bricks + backend and perform a lookup of all files from mount''' + for subvol in subvols: + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue( + ret, 'Unable to get `arequal` checksum on ' + '{}'.format(subvol[0:stop])) + self.assertEqual( + len(set(arequal)), 1, 'Mismatch of `arequal` ' + 'checksum among {} is identified'.format(subvol[0:stop])) + + # Perform a lookup of all files and directories on mounts + self.assertTrue(list_all_files_and_dirs_mounts(self.mounts), + 'Failed to list all files and dirs from mount') + + def test_metadata_heal_from_shd(self): + '''Description: Verify files heal after switching on `self-heal-daemon` + when metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of metadata heal after switching on ' + '`self heal daemon` is complete') + + def test_metadata_heal_from_heal_cmd(self): + '''Description: Verify files heal after triggering heal command when + metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal` + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + + # Invoke `glfsheal` + self.assertTrue(trigger_heal(self.mnode, self.volname), + 'Unable to trigger index heal on the volume') + + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info( + 'Pass: Verification of metadata heal via `glfsheal` is complete') + + def test_data_heal_from_shd(self): + '''Description: Verify files heal after triggering heal command when + data operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data + operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'data' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of data heal after switching on ' + '`self heal daemon` is complete') |