diff options
Diffstat (limited to 'tests/functional/arbiter')
34 files changed, 3319 insertions, 729 deletions
diff --git a/tests/functional/arbiter/brick_cases/test_brickcases.py b/tests/functional/arbiter/brick_cases/test_brickcases.py index ec5ac0a0e..766012bd5 100755 --- a/tests/functional/arbiter/brick_cases/test_brickcases.py +++ b/tests/functional/arbiter/brick_cases/test_brickcases.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -19,20 +19,20 @@ from glusto.core import Glusto as g
from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
from glustolibs.gluster.volume_libs import (
- log_volume_info_and_status, replace_brick_from_volume,
- expand_volume, wait_for_volume_process_to_be_online,
- verify_all_process_of_volume_are_online, shrink_volume)
+ replace_brick_from_volume, expand_volume, shrink_volume,
+ wait_for_volume_process_to_be_online,
+ verify_all_process_of_volume_are_online)
from glustolibs.gluster.rebalance_ops import (
rebalance_start, rebalance_status, wait_for_rebalance_to_complete)
-from glustolibs.gluster.heal_libs import monitor_heal_completion
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete)
from glustolibs.gluster.exceptions import ExecutionError
from glustolibs.io.utils import (validate_io_procs,
- list_all_files_and_dirs_mounts,
wait_for_io_to_complete)
from glustolibs.misc.misc_libs import upload_scripts
-@runs_on([['replicated', 'distributed-replicated'],
+@runs_on([['arbiter', 'distributed-arbiter'],
['glusterfs', 'cifs', 'nfs']])
class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass):
"""Class for testing Volume Type Change from replicated to
@@ -41,32 +41,14 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): @classmethod
def setUpClass(cls):
# Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
-
- # Overriding the volume type to specifically test the volume type
- # change from replicated to arbiter
- if cls.volume_type == "replicated":
- cls.volume['voltype'] = {
- 'type': 'replicated',
- 'replica_count': 2,
- 'dist_count': 1,
- 'transport': 'tcp'}
-
- if cls.volume_type == "distributed-replicated":
- cls.volume['voltype'] = {
- 'type': 'distributed-replicated',
- 'dist_count': 2,
- 'replica_count': 2,
- 'transport': 'tcp'}
+ cls.get_super_method(cls, 'setUpClass')()
# Upload io scripts for running IO on mounts
g.log.info("Upload io scripts to clients %s for running IO on mounts",
cls.clients)
- script_local_path = ("/usr/share/glustolibs/io/scripts/"
- "file_dir_ops.py")
cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
"file_dir_ops.py")
- ret = upload_scripts(cls.clients, [script_local_path])
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
if not ret:
raise ExecutionError("Failed to upload IO scripts to clients %s"
% cls.clients)
@@ -91,8 +73,8 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): """
- Setup Volume and Mount Volume
"""
- # Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(self)
+ # Calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
self.all_mounts_procs = []
self.io_validation_complete = False
@@ -120,13 +102,6 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): raise ExecutionError("IO failed on some of the clients")
g.log.info("IO is successful on all mounts")
- # List all files and dirs created
- g.log.info("List all files and directories:")
- ret = list_all_files_and_dirs_mounts(self.mounts)
- if not ret:
- raise ExecutionError("Failed to list all files and dirs")
- g.log.info("Listing all files and directories is successful")
-
# Unmount Volume and Cleanup Volume
g.log.info("Starting to Unmount Volume and Cleanup Volume")
ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
@@ -135,7 +110,7 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume")
# Calling GlusterBaseClass tearDown
- GlusterBaseClass.tearDown.im_func(self)
+ self.get_super_method(self, 'tearDown')()
def test_replicated_to_arbiter_volume_change_with_volume_ops(self):
"""
@@ -147,29 +122,25 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): # pylint: disable=too-many-statements
# Start IO on mounts
- g.log.info("Starting IO on all mounts...")
self.all_mounts_procs = []
- for mount_obj in self.mounts:
- g.log.info("Starting IO on %s:%s", mount_obj.client_system,
- mount_obj.mountpoint)
- cmd = ("python %s create_deep_dirs_with_files "
- "--dirname-start-num %d "
- "--dir-depth 2 "
- "--dir-length 15 "
- "--max-num-of-dirs 5 "
- "--num-of-files 5 %s" % (self.script_upload_path,
- self.counter,
- mount_obj.mountpoint))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
- self.all_mounts_procs.append(proc)
- self.counter = self.counter + 10
+ g.log.info("Starting IO on %s:%s", self.mounts[0].client_system,
+ self.mounts[0].mountpoint)
+ cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
+ "--dirname-start-num 10 --dir-depth 1 --dir-length 1 "
+ "--max-num-of-dirs 1 --num-of-files 5 %s" % (
+ self.script_upload_path,
+ self.mounts[0].mountpoint))
+ proc = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+ self.all_mounts_procs.append(proc)
self.io_validation_complete = False
# Validate IO
- ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+ self.assertTrue(
+ validate_io_procs(self.all_mounts_procs, self.mounts[0]),
+ "IO failed on some of the clients"
+ )
self.io_validation_complete = True
- self.assertTrue(ret, "IO failed on some of the clients")
# Adding bricks to make an Arbiter Volume
g.log.info("Adding bricks to convert to Arbiter Volume")
@@ -180,16 +151,6 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): g.log.info("Changing volume to arbiter volume is successful %s",
self.volname)
- # Log Volume Info and Status after changing the volume type from
- # replicated to arbitered
- g.log.info("Logging volume info and Status after changing to "
- "arbitered volume")
- ret = log_volume_info_and_status(self.mnode, self.volname)
- self.assertTrue(ret, ("Logging volume info and status failed on "
- "volume %s", self.volname))
- g.log.info("Successful in logging volume info and status of volume %s",
- self.volname)
-
# Wait for volume processes to be online
g.log.info("Wait for volume processes to be online")
ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
@@ -215,34 +176,25 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): g.log.info("self-heal is successful after changing the volume type "
"from replicated to arbitered volume")
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
# Start IO on mounts
- g.log.info("Starting IO on all mounts...")
self.all_mounts_procs = []
- for mount_obj in self.mounts:
- g.log.info("Starting IO on %s:%s", mount_obj.client_system,
- mount_obj.mountpoint)
- cmd = ("python %s create_deep_dirs_with_files "
- "--dirname-start-num %d "
- "--dir-depth 2 "
- "--dir-length 35 "
- "--max-num-of-dirs 5 "
- "--num-of-files 5 %s" % (self.script_upload_path,
- self.counter,
- mount_obj.mountpoint))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
- self.all_mounts_procs.append(proc)
- self.counter = self.counter + 10
+ g.log.info("Starting IO on %s:%s", self.mounts[0].client_system,
+ self.mounts[0].mountpoint)
+ cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
+ "--dirname-start-num 10 --dir-depth 1 --dir-length 1 "
+ "--max-num-of-dirs 1 --num-of-files 5 %s" % (
+ self.script_upload_path,
+ self.mounts[0].mountpoint))
+ proc = g.run_async(self.mounts[0].client_system, cmd,
+ user=self.mounts[0].user)
+ self.all_mounts_procs.append(proc)
self.io_validation_complete = False
- # Log Volume Info and Status before expanding the volume.
- g.log.info("Logging volume info and Status before expanding volume")
- ret = log_volume_info_and_status(self.mnode, self.volname)
- self.assertTrue(ret, ("Logging volume info and status failed on "
- "volume %s", self.volname))
- g.log.info("Successful in logging volume info and status of volume %s",
- self.volname)
-
# Start add-brick (subvolume-increase)
g.log.info("Start adding bricks to volume when IO in progress")
ret = expand_volume(self.mnode, self.volname, self.servers,
@@ -252,14 +204,6 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): g.log.info("Expanding volume when IO in progress is successful on "
"volume %s", self.volname)
- # Log Volume Info and Status after expanding the volume
- g.log.info("Logging volume info and Status after expanding volume")
- ret = log_volume_info_and_status(self.mnode, self.volname)
- self.assertTrue(ret, ("Logging volume info and status failed on "
- "volume %s", self.volname))
- g.log.info("Successful in logging volume info and status of volume %s",
- self.volname)
-
# Wait for volume processes to be online
g.log.info("Wait for volume processes to be online")
ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
@@ -295,15 +239,6 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): g.log.info("Rebalance is successfully complete on the volume %s",
self.volname)
- # Log Volume Info and Status before replacing brick from the volume.
- g.log.info("Logging volume info and Status before replacing brick "
- "from the volume %s", self.volname)
- ret = log_volume_info_and_status(self.mnode, self.volname)
- self.assertTrue(ret, ("Logging volume info and status failed on "
- "volume %s", self.volname))
- g.log.info("Successful in logging volume info and status of volume %s",
- self.volname)
-
# Replace brick from a sub-volume
g.log.info("Replace a faulty brick from the volume")
ret = replace_brick_from_volume(self.mnode, self.volname,
@@ -311,15 +246,6 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): self.assertTrue(ret, "Failed to replace faulty brick from the volume")
g.log.info("Successfully replaced faulty brick from the volume")
- # Log Volume Info and Status after replacing the brick
- g.log.info("Logging volume info and Status after replacing brick "
- "from the volume %s", self.volname)
- ret = log_volume_info_and_status(self.mnode, self.volname)
- self.assertTrue(ret, ("Logging volume info and status failed on "
- "volume %s", self.volname))
- g.log.info("Successful in logging volume info and status of volume %s",
- self.volname)
-
# Wait for volume processes to be online
g.log.info("Wait for volume processes to be online")
ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
@@ -343,13 +269,10 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): "current test workload")
g.log.info("self-heal is successful after replace-brick operation")
- # Log Volume Info and Status before shrinking the volume.
- g.log.info("Logging volume info and Status before shrinking volume")
- ret = log_volume_info_and_status(self.mnode, self.volname)
- self.assertTrue(ret, ("Logging volume info and status failed on "
- "volume %s", self.volname))
- g.log.info("Successful in logging volume info and status of volume %s",
- self.volname)
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
# Shrinking volume by removing bricks from volume when IO in progress
g.log.info("Start removing bricks from volume when IO in progress")
@@ -359,14 +282,6 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): g.log.info("Shrinking volume when IO in progress is successful on "
"volume %s", self.volname)
- # Log Volume Info and Status after shrinking the volume
- g.log.info("Logging volume info and Status after shrinking volume")
- ret = log_volume_info_and_status(self.mnode, self.volname)
- self.assertTrue(ret, ("Logging volume info and status failed on "
- "volume %s", self.volname))
- g.log.info("Successful in logging volume info and status of volume %s",
- self.volname)
-
# Wait for volume processes to be online
g.log.info("Wait for volume processes to be online")
ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
@@ -386,7 +301,7 @@ class GlusterArbiterVolumeTypeChangeClass(GlusterBaseClass): # Validate IO
self.assertTrue(
- validate_io_procs(self.all_mounts_procs, self.mounts),
+ validate_io_procs(self.all_mounts_procs, self.mounts[0]),
"IO failed on some of the clients"
)
self.io_validation_complete = True
diff --git a/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py b/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py index 8dbf0bcac..642c6f011 100755 --- a/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py +++ b/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -15,7 +15,9 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import time
+
from glusto.core import Glusto as g
+
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
from glustolibs.gluster.volume_libs import log_volume_info_and_status
from glustolibs.gluster.brick_libs import (
@@ -41,16 +43,14 @@ class ListMount(GlusterBaseClass): @classmethod
def setUpClass(cls):
# Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
+ cls.get_super_method(cls, 'setUpClass')()
# Upload io scripts for running IO on mounts
g.log.info("Upload io scripts to clients %s for running IO on mounts",
cls.clients)
- script_local_path = ("/usr/share/glustolibs/io/scripts/"
- "fd_writes.py")
cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
"fd_writes.py")
- ret = upload_scripts(cls.clients, [script_local_path])
+ ret = upload_scripts(cls.clients, cls.script_upload_path)
if not ret:
raise ExecutionError("Failed to upload IO scripts to clients %s"
% cls.clients)
@@ -73,7 +73,7 @@ class ListMount(GlusterBaseClass): def setUp(self):
# Calling GlusterBaseClass setUp
- GlusterBaseClass.setUp.im_func(self)
+ self.get_super_method(self, 'setUp')()
self.all_mounts_procs = []
self.io_validation_complete = False
@@ -117,7 +117,7 @@ class ListMount(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume")
# Calling GlusterBaseClass tearDown
- GlusterBaseClass.tearDown.im_func(self)
+ self.get_super_method(self, 'tearDown')()
def test_files_on_mount(self):
"""""
@@ -134,7 +134,7 @@ class ListMount(GlusterBaseClass): for mount_obj in self.mounts:
g.log.info("Starting IO on %s:%s", mount_obj.client_system,
mount_obj.mountpoint)
- cmd = ("python %s "
+ cmd = ("/usr/bin/env python %s "
"--file-sizes-list 1G "
"--chunk-sizes-list 128 "
"--write-time 900 "
diff --git a/tests/functional/arbiter/brick_cases/test_impact_of_replace_brick_for_glustershd.py b/tests/functional/arbiter/brick_cases/test_impact_of_replace_brick_for_glustershd.py index a8a222c24..56d2edbd9 100644 --- a/tests/functional/arbiter/brick_cases/test_impact_of_replace_brick_for_glustershd.py +++ b/tests/functional/arbiter/brick_cases/test_impact_of_replace_brick_for_glustershd.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, is_shd_daemonized) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'nfs']]) class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass): """ @@ -41,18 +41,7 @@ class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Override Volumes - if cls.volume_type == "distributed-replicated": - # Define distributed-replicated volume - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} - + cls.get_super_method(cls, 'setUpClass')() cls.glustershd = "/var/lib/glusterd/glustershd/glustershd-server.vol" def setUp(self): @@ -61,7 +50,7 @@ class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass): """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -88,7 +77,7 @@ class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_impact_of_replace_brick_for_glustershd(self): # pylint: disable=too-many-statements,too-many-branches,too-many-locals diff --git a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py index 33e92e9ee..24c014502 100755 --- a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py +++ b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -22,6 +22,12 @@ from glustolibs.gluster.volume_libs import ( expand_volume, wait_for_volume_process_to_be_online,
verify_all_process_of_volume_are_online, shrink_volume, get_subvols)
from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.io.utils import run_linux_untar
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete,
+ is_volume_in_split_brain)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_ops import trigger_heal
@runs_on([['replicated', 'distributed-replicated'],
@@ -30,45 +36,39 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): """Class for testing Volume Type Change from replicated to
Arbitered volume
"""
- @classmethod
- def setUpClass(cls):
- # Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(cls)
-
- # Overriding the volume type to specifically test the volume type
-
- if cls.volume_type == "replicated":
- cls.volume['voltype'] = {
- 'type': 'replicated',
- 'replica_count': 3,
- 'transport': 'tcp'}
-
def setUp(self):
"""
Setup Volume
"""
- # Calling GlusterBaseClass setUpClass
- GlusterBaseClass.setUpClass.im_func(self)
+ # Calling GlusterBaseClass setUp
+ self.get_super_method(self, 'setUp')()
+
+ # Set I/O flag to false
+ self.is_io_running = False
# Setup Volume
- g.log.info("Starting to Setup Volume")
- ret = self.setup_volume()
+ g.log.info("Starting to Setup and Mount Volume")
+ # Creating Volume and mounting the volume
+ ret = self.setup_volume_and_mount_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Setup_Volume")
- g.log.info("Successful in Setup Volume")
+ raise ExecutionError("Volume creation or mount failed: %s"
+ % self.volname)
self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
def tearDown(self):
- # Cleanup Volume
- g.log.info("Starting to Unmount Volume and Cleanup Volume")
- ret = self.cleanup_volume()
+ # Wait for I/O if not completed
+ if self.is_io_running:
+ if not self._wait_for_untar_completion():
+ g.log.error("I/O failed to stop on clients")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Cleanup Volume")
- g.log.info("Successful Cleanup Volume")
+ raise ExecutionError("Unable to delete volume % s" % self.volname)
# Calling GlusterBaseClass tearDown
- GlusterBaseClass.tearDown.im_func(self)
+ self.get_super_method(self, 'tearDown')()
# Clearing bricks
for subvol in self.subvols:
@@ -80,10 +80,22 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): g.log.info('Clearing brick %s is successful', brick)
g.log.info('Clearing for all brick is successful')
- def test_replicated_to_arbiter_volume(self):
+ def _wait_for_untar_completion(self):
+ """Wait for untar to complete"""
+ has_process_stopped = []
+ for proc in self.io_process:
+ try:
+ ret, _, _ = proc.async_communicate()
+ if not ret:
+ has_process_stopped.append(False)
+ has_process_stopped.append(True)
+ except ValueError:
+ has_process_stopped.append(True)
+ return all(has_process_stopped)
+
+ def _convert_replicated_to_arbiter_volume(self):
"""
- Description:-
- Reduce the replica count from replica 3 to arbiter
+ Helper module to convert replicated to arbiter volume.
"""
# pylint: disable=too-many-statements
# Remove brick to reduce the replica count from replica 3
@@ -112,7 +124,7 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): g.log.info("Adding bricks to convert to Arbiter Volume")
replica_arbiter = {'replica_count': 1, 'arbiter_count': 1}
ret = expand_volume(self.mnode, self.volname, self.servers,
- self.all_servers_info, add_to_hot_tier=False,
+ self.all_servers_info, force=True,
**replica_arbiter)
self.assertTrue(ret, "Failed to expand the volume %s" % self.volname)
g.log.info("Changing volume to arbiter volume is successful %s",
@@ -132,3 +144,70 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): self.assertTrue(ret, "Volume %s : All process are not online"
% self.volname)
g.log.info("Volume %s : All process are online", self.volname)
+
+ def test_replicated_to_arbiter_volume(self):
+ """
+ Description:-
+ Reduce the replica count from replica 3 to arbiter
+ """
+ # pylint: disable=too-many-statements
+ self._convert_replicated_to_arbiter_volume()
+
+ def test_replica_to_arbiter_volume_with_io(self):
+ """
+ Description: Replica 3 to arbiter conversion with ongoing IO's
+
+ Steps :
+ 1) Create a replica 3 volume and start volume.
+ 2) Set client side self heal off.
+ 3) Fuse mount the volume.
+ 4) Create directory dir1 and write data.
+ Example: untar linux tar from the client into the dir1
+ 5) When IO's is running, execute remove-brick command,
+ and convert replica 3 to replica 2 volume
+ 6) Execute add-brick command and convert to arbiter volume,
+ provide the path of new arbiter brick.
+ 7) Issue gluster volume heal.
+ 8) Heal should be completed with no files in split-brain.
+ """
+
+ # pylint: disable=too-many-statements
+ # Create a dir to start untar
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar
+ self.io_process = run_linux_untar(self.clients[0],
+ self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.is_io_running = True
+
+ # Convert relicated to arbiter volume
+ self._convert_replicated_to_arbiter_volume()
+
+ # Wait for IO to complete.
+ ret = self._wait_for_untar_completion()
+ self.assertFalse(ret, "IO didn't complete or failed on client")
+ self.is_io_running = False
+
+ # Start healing
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not started')
+ g.log.info('Healing is started')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
diff --git a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py index 5d8e87ed5..8d7304b0b 100755 --- a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py +++ b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.volume_libs import ( log_volume_info_and_status) @@ -30,7 +31,7 @@ from glustolibs.io.utils import (validate_io_procs, from glustolibs.misc.misc_libs import upload_scripts -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'cifs', 'nfs']]) class TestRmrfMount(GlusterBaseClass): """ @@ -40,16 +41,14 @@ class TestRmrfMount(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -72,7 +71,7 @@ class TestRmrfMount(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -110,7 +109,7 @@ class TestRmrfMount(GlusterBaseClass): g.log.info("Successful in Unmount Volume and Cleanup Volume") # Calling GlusterBaseClass tearDown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_self_heal(self): """ @@ -130,14 +129,14 @@ class TestRmrfMount(GlusterBaseClass): for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_deep_dirs_with_files " + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " "--dir-length 35 " "--max-num-of-dirs 5 " - "--num-of-files 5 %s" % (self.script_upload_path, - self.counter, - mount_obj.mountpoint)) + "--num-of-files 5 %s" % ( + self.script_upload_path, + self.counter, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) @@ -146,10 +145,7 @@ class TestRmrfMount(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Killing one brick from the volume set g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline) diff --git a/tests/functional/arbiter/test_afr_read_write.py b/tests/functional/arbiter/test_afr_read_write.py new file mode 100644 index 000000000..09e6a3a2a --- /dev/null +++ b/tests/functional/arbiter/test_afr_read_write.py @@ -0,0 +1,192 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestAfrReadWrite(GlusterBaseClass): + + """ + Description: + Arbiter test writes and reads from a file + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_online_heal(self, mnode, volname, bricks_list): + """ + Bring bricks online and monitor heal completion + """ + # Bring bricks online + ret = bring_bricks_online( + mnode, volname, bricks_list, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks online') + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(mnode, volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(mnode, volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (volname))) + g.log.info("Volume %s : All process are online", volname) + + # Monitor heal completion + ret = monitor_heal_completion(mnode, volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(mnode, volname) + self.assertFalse(ret, 'Volume is in split-brain state') + + def test_afr_read_write(self): + """ + Test read and write of file + Description: + - Get the bricks from the volume + - Creating directory test_write_and_read_file + - Write from 1st client + - Read from 2nd client + - Select brick to bring offline + - Bring brick offline + - Validating IO's on client1 + - Validating IO's on client2 + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Bring 2nd brick offline + - Check if brick is offline + - Write from 1st client + - Read from 2nd client + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Creating directory test_write_and_read_file + ret = mkdir(self.mounts[0].client_system, + "{}/test_write_and_read_file" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_write_and_read_file' on %s created " + "successfully", self.mounts[0]) + + # Write from 1st client + cmd_to_write = ( + 'cd %s/test_write_and_read_file ; for i in `seq 1 5000` ;' + 'do echo -e "Date:`date`\n" >> test_file ;echo -e "' + '`cal`\n" >> test_file ; done ; cd ..' + % self.mounts[0].mountpoint) + proc1 = g.run_async(self.mounts[0].client_system, + cmd_to_write) + + # Read from 2nd client + cmd = ('cd %s/ ;for i in {1..30};' + 'do cat test_write_and_read_file/test_file;done' + % self.mounts[1].mountpoint) + proc2 = g.run_async(self.mounts[1].client_system, cmd) + + # Bring brick offline + bricks_to_bring_offline = sample(bricks_list, 2) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[0]) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + # Check brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_to_bring_offline[0]]) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline[0])) + + # Validating IO's + for proc, mount in zip([proc1, proc2], self.mounts): + ret = validate_io_procs([proc], mount) + self.assertTrue(ret, "IO failed on client") + g.log.info("Successfully validated all IO's") + + self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list) + + # Bring down second brick + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[1]) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline[1])) + + # Check if brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_to_bring_offline[1]]) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline[1])) + + # Write from 1st client + ret, _, _ = g.run(self.mounts[0].client_system, cmd_to_write) + self.assertEqual(ret, 0, "Failed to write to file") + g.log.info("Successfully written to file") + + # Read from 2nd client + cmd = ('cd %s/ ;cat test_write_and_read_file/test_file' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to read file on mountpoint") + g.log.info("Successfully read file on mountpoint") + + self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list) diff --git a/tests/functional/arbiter/test_brick_down_cyclic.py b/tests/functional/arbiter/test_brick_down_cyclic.py new file mode 100644 index 000000000..8639a4dc5 --- /dev/null +++ b/tests/functional/arbiter/test_brick_down_cyclic.py @@ -0,0 +1,140 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +import time +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks, + are_bricks_online) +from glustolibs.gluster.heal_libs import ( + monitor_heal_completion, are_all_self_heal_daemons_are_online) + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestBrickDownHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @classmethod + def tearDownClass(cls): + """ + Cleanup Volume + """ + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + + cls.get_super_method(cls, 'tearDownClass')() + + def test_brick_down_heal(self): + """ + - Run IO's from client on a single file + - Now bring down bricks in cyclic order + - kill brick 1, sleep for 5 seconds, bring brick 1 up, wait for 10s + - Now repeat step3 for brick2 and brick 3 + - Repeat the cycle a few times + - Trigger heal, check for split brain using command + """ + # Write IO's + self.all_mounts_procs = [] + cmd = ("for i in `seq 1 10`;" + "do dd if=/dev/urandom of=%s/file$i bs=1K count=1;" + "done" % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd) + self.all_mounts_procs.append(proc) + + # Killing bricks in cyclic order + bricks_list = get_all_bricks(self.mnode, self.volname) + + # Total number of cyclic brick-down cycles to be executed + number_of_cycles = 0 + while number_of_cycles < 3: + number_of_cycles += 1 + for brick in bricks_list: + # Bring brick offline + g.log.info('Bringing bricks %s offline', brick) + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s offline" + % brick)) + + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, 'Bricks %s are not offline' % brick) + g.log.info('Bringing bricks %s offline is successful', brick) + + # Introducing 5 second sleep when brick is down + g.log.info("Waiting for 5 seconds, with ongoing IO while " + "brick %s is offline", brick) + ret = time.sleep(5) + + # Bring brick online + g.log.info('Bringing bricks %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s online " + % brick)) + g.log.info('Bricks %s are online', brick) + + # Introducing 10 second sleep when brick is up + g.log.info("Waiting for 10 seconds,when " + "brick %s is online", brick) + ret = time.sleep(10) + + # Check if bricks are online + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, 'Bricks %s are not online' % bricks_list) + g.log.info('Bricks %s are online', bricks_list) + + # Check daemons + g.log.info('Checking daemons...') + ret = are_all_self_heal_daemons_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Some of the self-heal Daemons are " + "offline")) + g.log.info('All self-heal Daemons are online') + + # Trigger self heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger heal on volume') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') diff --git a/tests/functional/arbiter/test_create_snapshot_and_verify_content.py b/tests/functional/arbiter/test_create_snapshot_and_verify_content.py index 53b433ef6..ec7e801f5 100755 --- a/tests/functional/arbiter/test_create_snapshot_and_verify_content.py +++ b/tests/functional/arbiter/test_create_snapshot_and_verify_content.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.snap_ops import (snap_create, snap_restore) @@ -24,10 +25,12 @@ from glustolibs.gluster.volume_libs import ( wait_for_volume_process_to_be_online, get_subvols) from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.io.utils import collect_mounts_arequal +from glustolibs.io.utils import ( + collect_mounts_arequal, + validate_io_procs) -@runs_on([['distributed-replicated', 'replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'nfs']]) class TestArbiterSelfHeal(GlusterBaseClass): """ @@ -36,16 +39,14 @@ class TestArbiterSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -54,19 +55,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) - - # Setup Volumes - if self.volume_type == "distributed-replicated": - self.volume_configs = [] - - # Redefine distributed-replicated volume - self.volume['voltype'] = { - 'type': 'distributed-replicated', - 'replica_count': 3, - 'dist_count': 2, - 'arbiter_count': 1, - 'transport': 'tcp'} + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -100,7 +89,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): g.log.info('Clearing for all brick is successful') # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_create_snapshot_and_verify_content(self): """ @@ -117,20 +106,20 @@ class TestArbiterSelfHeal(GlusterBaseClass): g.log.info("Generating data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Create dirs with file + all_mounts_procs = [] g.log.info('Creating dirs with file...') - command = ("python %s create_deep_dirs_with_files " - "-d 2 " - "-l 2 " - "-n 2 " - "-f 20 " - "%s" - % (self.script_upload_path, self.mounts[0].mountpoint)) - - ret, _, err = g.run(self.mounts[0].client_system, command, - user=self.mounts[0].user) - - self.assertFalse(ret, err) - g.log.info("IO is successful") + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -n 2 -f 20 %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + + # Validate IO + self.assertTrue( + validate_io_procs(all_mounts_procs, self.mounts), + "IO failed on some of the clients") # Get arequal before snapshot g.log.info('Getting arequal before snapshot...') @@ -149,21 +138,20 @@ class TestArbiterSelfHeal(GlusterBaseClass): g.log.info("Generating data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Create dirs with file + all_mounts_procs = [] g.log.info('Adding dirs with file...') - command = ("python %s create_deep_dirs_with_files " - "-d 2 " - "-l 2 " - "-n 2 " - "-f 20 " - "%s" - % (self.script_upload_path, - self.mounts[0].mountpoint+'/new_files')) - - ret, _, err = g.run(self.mounts[0].client_system, command, - user=self.mounts[0].user) - - self.assertFalse(ret, err) - g.log.info("IO is successful") + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -n 2 -f 20 %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint+'/new_files')) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + + # Validate IO + self.assertTrue( + validate_io_procs(all_mounts_procs, self.mounts), + "IO failed on some of the clients") # Stop the volume g.log.info("Stopping %s ...", self.volname) diff --git a/tests/functional/arbiter/test_data_delete.py b/tests/functional/arbiter/test_data_delete.py new file mode 100644 index 000000000..4753efcbc --- /dev/null +++ b/tests/functional/arbiter/test_data_delete.py @@ -0,0 +1,110 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestDataDelete(GlusterBaseClass): + """ + Description: + Test data delete/rename on arbiter volume + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_data_delete(self): + """ + Test steps: + - Get brick list + - Create files and rename + - Check if brick path contains old files + - Delete files from mountpoint + - Check .glusterfs/indices/xattrop is empty + - Check if brickpath is empty + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Create files and rename + cmd = ('cd %s ;for i in `seq 1 100` ;do mkdir -pv directory$i;' + 'cd directory$i;dd if=/dev/urandom of=file$i bs=1M count=5;' + 'mv file$i renamed$i;done;' % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Fail: Not able to create files on " + "{}".format(self.mounts[0].mountpoint)) + g.log.info("Files created successfully and renamed") + + # Check if brickpath contains old files + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |grep file |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} contains old " + "file in node {}".format(brick_path, brick_node)) + g.log.info("Brick path contains renamed files") + + # Delete files from mountpoint + cmd = ('rm -rf -v %s/*' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete files") + g.log.info("Files deleted successfully for %s", self.mounts[0]) + + # Check .glusterfs/indices/xattrop is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s/.glusterfs/indices/xattrop/ | " + "grep -ve \"xattrop-\" | wc -l" % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), ".glusterfs/indices/" + "xattrop is not empty") + g.log.info("No pending heals on bricks") + + # Check if brickpath is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} is not empty " + "in node {}".format(brick_path, brick_node)) + g.log.info("Brick path is empty on all nodes") diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py index fd6aaa98e..bbb30f271 100644 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import set_volume_options @@ -34,7 +35,7 @@ from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'cifs', 'nfs']]) class TestSelfHeal(GlusterBaseClass): """ @@ -46,26 +47,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -74,7 +63,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -96,7 +85,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_data_self_heal_algorithm_diff_default(self): """ @@ -127,8 +116,9 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Generating data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Creating files - command = ("python %s create_files -f 100 %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + command = "/usr/bin/env python %s create_files -f 100 %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -143,10 +133,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -165,8 +152,10 @@ class TestSelfHeal(GlusterBaseClass): all_mounts_procs = [] g.log.info("Modifying data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) - command = ("python %s create_files -f 100 --fixed-file-size 1M %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + command = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 1M %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -238,7 +227,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py index fadfc1150..0aa440af1 100755 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import set_volume_options @@ -35,7 +36,7 @@ from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'cifs', 'nfs']]) class TestSelfHeal(GlusterBaseClass): """ @@ -47,26 +48,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -75,7 +64,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -97,7 +86,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_data_self_heal_algorithm_diff_heal_command(self): """ @@ -148,8 +137,9 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Generating data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Creating files - command = ("python %s create_files -f 100 %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + command = "/usr/bin/env python %s create_files -f 100 %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -171,10 +161,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -193,8 +180,10 @@ class TestSelfHeal(GlusterBaseClass): all_mounts_procs = [] g.log.info("Modifying data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) - command = ("python %s create_files -f 100 --fixed-file-size 1M %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + command = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 1M %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -280,6 +269,7 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py index 710673a51..f4f13931a 100755 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import set_volume_options @@ -33,7 +34,7 @@ from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'cifs', 'nfs']]) class TestSelfHeal(GlusterBaseClass): """ @@ -45,26 +46,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -73,7 +62,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -95,7 +84,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_data_self_heal_algorithm_full_default(self): """ @@ -126,8 +115,9 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Generating data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Creating files - command = ("python %s create_files -f 100 %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + command = "/usr/bin/env python %s create_files -f 100 %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -142,10 +132,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -164,8 +151,10 @@ class TestSelfHeal(GlusterBaseClass): all_mounts_procs = [] g.log.info("Modifying data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) - command = ("python %s create_files -f 100 --fixed-file-size 1M %s" - % (self.script_upload_path, self.mounts[0].mountpoint)) + command = ("/usr/bin/env python %s create_files -f 100 " + "--fixed-file-size 1M %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -237,7 +226,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_daemon_off.py b/tests/functional/arbiter/test_data_self_heal_daemon_off.py new file mode 100644 index 000000000..9faae85ca --- /dev/null +++ b/tests/functional/arbiter/test_data_self_heal_daemon_off.py @@ -0,0 +1,351 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain, + is_shd_daemonized) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'nfs']]) +class TestSelfHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to + healing in default configuration of the volume + """ + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + g.log.info("Upload io scripts to clients %s for running IO on mounts", + cls.clients) + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_data_self_heal_daemon_off(self): + """ + Test Data-Self-Heal (heal command) + + Description: + - set the volume option + "metadata-self-heal": "off" + "entry-self-heal": "off" + "data-self-heal": "off" + - create IO + - Get arequal before getting bricks offline + - set the volume option + "self-heal-daemon": "off" + - bring down all bricks processes from selected set + - Get areeual after getting bricks offline and compare with + arequal before getting bricks offline + - modify the data + - bring bricks online + - set the volume option + "self-heal-daemon": "on" + - check daemons and start healing + - check if heal is completed + - check for split-brain + - add bricks + - do rebalance + - create 1k files + - while creating files - kill bricks and bring bricks online one by one + in cycle + - validate IO + """ + # pylint: disable=too-many-statements,too-many-locals + + # Setting options + g.log.info('Setting options...') + options = {"metadata-self-heal": "off", + "entry-self-heal": "off", + "data-self-heal": "off"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Successfully set %s for volume %s", + options, self.volname) + + # Creating files on client side + g.log.info("Starting IO on %s:%s", self.mounts[0].client_system, + self.mounts[0].mountpoint) + cmd = ("/usr/bin/env python %s create_files -f 100" + " --fixed-file-size 1k %s" + % (self.script_upload_path, + self.mounts[0].mountpoint)) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertFalse(ret, 'Failed to create the data for %s: %s' + % (self.mounts[0].mountpoint, err)) + g.log.info('Created IO for %s is successfully', + self.mounts[0].mountpoint) + + # Get arequal before getting bricks offline + g.log.info('Getting arequal before getting bricks offline...') + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + result_before_offline = arequals[0].splitlines()[-1].split(':')[-1] + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Setting options + g.log.info('Setting options...') + options = {"self-heal-daemon": "off"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") + + # Select bricks to bring offline + bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( + self.mnode, self.volname)) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] + + # Bring brick offline + g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Get arequal after getting bricks offline + g.log.info('Getting arequal after getting bricks offline...') + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + result_after_offline = arequals[0].splitlines()[-1].split(':')[-1] + g.log.info('Getting arequal after getting bricks offline ' + 'is successful') + + # Checking arequals before bringing bricks offline + # and after bringing bricks offline + self.assertEqual(result_before_offline, result_after_offline, + 'Checksums before and ' + 'after bringing bricks online are not equal') + g.log.info('Checksums before and after bringing bricks online ' + 'are equal') + + # Modify the data + g.log.info("Modifying data for %s:%s", self.mounts[0].client_system, + self.mounts[0].mountpoint) + cmd = ("/usr/bin/env python %s create_files -f 100" + " --fixed-file-size 10k %s" + % (self.script_upload_path, + self.mounts[0].mountpoint)) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertFalse(ret, 'Failed to midify the data for %s: %s' + % (self.mounts[0].mountpoint, err)) + g.log.info('Modified IO for %s is successfully', + self.mounts[0].mountpoint) + + # Bring brick online + g.log.info('Bringing bricks %s online...', bricks_to_bring_offline) + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Setting options + g.log.info('Setting options...') + options = {"self-heal-daemon": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + + # Wait for volume processes to be online + g.log.info("Wait for volume processes to be online") + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + g.log.info("Verifying volume's all process are online") + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Wait for self-heal-daemons to be online + g.log.info("Waiting for self-heal-daemons to be online") + ret = is_shd_daemonized(self.all_servers) + self.assertTrue(ret, "Either No self heal daemon process found") + g.log.info("All self-heal-daemons are online") + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Add bricks + g.log.info("Start adding bricks to volume...") + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Do rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Failed to start rebalance') + g.log.info('Rebalance is started') + + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Rebalance is not completed') + g.log.info('Rebalance is completed successfully') + + # Create 1k files + all_mounts_procs = [] + g.log.info("Modifying data for %s:%s", self.mounts[0].client_system, + self.mounts[0].mountpoint) + command = ("/usr/bin/env python %s create_files -f 1000" + " --base-file-name newfile %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + + # Kill all bricks in cycle + bricks_list = get_all_bricks(self.mnode, self.volname) + for brick in bricks_list: + # Bring brick offline + g.log.info('Bringing bricks %s offline', brick) + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % brick) + + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, 'Bricks %s are not offline' % brick) + g.log.info('Bringing bricks %s offline is successful', brick) + + # Introducing 30 second sleep when brick is down + g.log.info("Waiting for 30 seconds, with ongoing IO while " + "brick %s is offline", brick) + sleep(30) + + # Bring brick online + g.log.info('Bringing bricks %s online...', brick) + ret = bring_bricks_online(self.mnode, self.volname, + [brick]) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + brick) + + # Wait for volume processes to be online + g.log.info("Wait for volume processes to be online") + ret = wait_for_volume_process_to_be_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + g.log.info("Verifying volume's all process are online") + ret = verify_all_process_of_volume_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Wait for self-heal-daemons to be online + g.log.info("Waiting for self-heal-daemons to be online") + ret = is_shd_daemonized(self.all_servers) + self.assertTrue(ret, "Either No self heal daemon process found or" + "more than one self heal daemon process" + "found") + g.log.info("All self-heal-daemons are online") + + # Validate IO + self.assertTrue( + validate_io_procs(all_mounts_procs, self.mounts), + "IO failed on some of the clients") diff --git a/tests/functional/arbiter/test_entry_self_heal_heal_command.py b/tests/functional/arbiter/test_entry_self_heal_heal_command.py index d1b8cb3e8..64c6c2339 100644 --- a/tests/functional/arbiter/test_entry_self_heal_heal_command.py +++ b/tests/functional/arbiter/test_entry_self_heal_heal_command.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import set_volume_options @@ -36,7 +37,7 @@ from glustolibs.io.utils import (collect_mounts_arequal, list_all_files_and_dirs_mounts) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'cifs', 'nfs']]) class TestSelfHeal(GlusterBaseClass): """ @@ -48,26 +49,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -76,7 +65,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -97,7 +86,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_entry_self_heal_heal_command(self): """ @@ -145,12 +134,13 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Starting IO on all mounts...") g.log.info("Starting IO on %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) - cmd = ("python %s create_deep_dirs_with_files " + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dir-length 2 " "--dir-depth 2 " "--max-num-of-dirs 2 " - "--num-of-files 20 %s/files" % (self.script_upload_path, - self.mounts[0].mountpoint)) + "--num-of-files 20 %s/files" % ( + self.script_upload_path, + self.mounts[0].mountpoint)) ret, _, err = g.run(self.mounts[0].client_system, cmd, user=self.mounts[0].user) self.assertFalse(ret, 'Failed to create the data for %s: %s' @@ -160,13 +150,14 @@ class TestSelfHeal(GlusterBaseClass): # Command list to do different operations with data - # create, rename, copy and delete - cmd_list = ["python %s create_files -f 20 %s/files", - "python %s mv %s/files", - # 'copy' command works incorrect. disable until fixed - # "python %s copy --dest-dir %s/new_dir %s/files", - "python %s delete %s"] - - for cmd in cmd_list: + cmds = ( + "/usr/bin/env python %s create_files -f 20 %s/files", + "/usr/bin/env python %s mv %s/files", + # 'copy' command works incorrect. disable until fixed + # "/usr/bin/env python %s copy --dest-dir %s/new_dir %s/files", + "/usr/bin/env python %s delete %s", + ) + for cmd in cmds: # Get arequal before getting bricks offline g.log.info('Getting arequal before getting bricks offline...') ret, arequals = collect_mounts_arequal(self.mounts) @@ -186,10 +177,8 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = ( + bricks_to_bring_offline_dict['volume_bricks']) # Bring brick offline g.log.info('Bringing bricks %s offline...', diff --git a/tests/functional/arbiter/test_gfid_self_heal.py b/tests/functional/arbiter/test_gfid_self_heal.py new file mode 100644 index 000000000..9ed4a8767 --- /dev/null +++ b/tests/functional/arbiter/test_gfid_self_heal.py @@ -0,0 +1,206 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestGFIDSelfHeal(GlusterBaseClass): + + """ + Description: + Arbiter Test cases related to GFID self heal + """ + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_gfid_self_heal(self): + """ + Test GFID self heal + Description: + - Creating directory test_compilation + - Write Deep directories and files + - Get arequal before getting bricks offline + - Select bricks to bring offline + - Bring brick offline + - Delete directory on mountpoint where data is writte + - Create the same directory and write same data + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_compilation + ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_gfid_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Write Deep directories and files + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Successfully created files on mountpoint") + count += 10 + + # Get arequal before getting bricks offline + ret, result_before_offline = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks offline ' + 'is %s', result_before_offline) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Delete directory on mountpoint where data is written + cmd = ('rm -rf -v %s/test_gfid_self_heal' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete directory") + g.log.info("Directory deleted successfully for %s", self.mounts[0]) + + # Create the same directory and write same data + ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_gfid_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Write the same files again + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Successfully created files on mountpoint") + count += 10 + + # Bring bricks online + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks online ' + 'is %s', result_after_online) diff --git a/tests/functional/arbiter/test_gluster_clone_heal.py b/tests/functional/arbiter/test_gluster_clone_heal.py new file mode 100644 index 000000000..94603c701 --- /dev/null +++ b/tests/functional/arbiter/test_gluster_clone_heal.py @@ -0,0 +1,209 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['arbiter', 'distributed-arbiter', + 'replicated', 'distributed-replicated'], ['glusterfs']]) +class TestGlusterCloneHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to self heal + of data and hardlink + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_gluster_clone_heal(self): + """ + Test gluster compilation on mount point(Heal command) + - Creating directory test_compilation + - Compile gluster on mountpoint + - Select bricks to bring offline + - Bring brick offline + - Validate IO + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + - Compile gluster on mountpoint again + - Select bricks to bring offline + - Bring brick offline + - Validate IO + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_compilation + ret = mkdir(self.mounts[0].client_system, "{}/test_compilation" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_compilation' on %s created " + "successfully", self.mounts[0]) + + # Compile gluster on mountpoint + cmd = ("cd %s/test_compilation ; rm -rf glusterfs; git clone" + " git://github.com/gluster/glusterfs.git ; cd glusterfs ;" + " ./autogen.sh ;./configure CFLAGS='-g3 -O0 -DDEBUG'; make ;" + " cd ../..;" % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info("Arequal of mountpoint %s", result_after_online) + + # Compile gluster on mountpoint again + proc1 = g.run_async(self.mounts[0].client_system, cmd) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + # Validate IO + self.assertTrue( + validate_io_procs([proc1], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info("Arequal of mountpoint %s", result_after_online) diff --git a/tests/functional/arbiter/test_handling_data_split_brain_of_files_heal_command.py b/tests/functional/arbiter/test_handling_data_split_brain_of_files_heal_command.py index 37820ba1a..1cb4f2c49 100755 --- a/tests/functional/arbiter/test_handling_data_split_brain_of_files_heal_command.py +++ b/tests/functional/arbiter/test_handling_data_split_brain_of_files_heal_command.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import set_volume_options @@ -31,7 +32,7 @@ from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, from glustolibs.misc.misc_libs import upload_scripts -@runs_on([['replicated'], +@runs_on([['arbiter'], ['glusterfs', 'nfs', 'cifs']]) class TestArbiterSelfHeal(GlusterBaseClass): """ @@ -43,16 +44,14 @@ class TestArbiterSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -61,7 +60,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -103,7 +102,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_handling_data_split_brain(self): """ @@ -314,9 +313,9 @@ class TestArbiterSelfHeal(GlusterBaseClass): for mount_obj in self.mounts: g.log.info("Start heal for %s:%s", mount_obj.client_system, mount_obj.mountpoint) - command = ("python %s read %s" - % (self.script_upload_path, - self.mounts[0].mountpoint)) + command = "/usr/bin/env python %s read %s" % ( + self.script_upload_path, + self.mounts[0].mountpoint) ret, _, err = g.run(mount_obj.client_system, command) self.assertFalse(ret, err) g.log.info("Heal triggered for %s:%s", diff --git a/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py new file mode 100755 index 000000000..8e11af6e4 --- /dev/null +++ b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py @@ -0,0 +1,202 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain, + is_shd_daemon_running) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (collect_mounts_arequal, + validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) +from glustolibs.gluster.gluster_init import (start_glusterd, + stop_glusterd) +from glustolibs.misc.misc_libs import kill_process + + +@runs_on([['arbiter', 'distributed-arbiter'], + ['glusterfs', 'nfs']]) +class TestArbiterSelfHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to + healing in default configuration of the volume + """ + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_heal_full_after_deleting_files(self): + """ + - Create IO + - Calculate arequal from mount + - kill glusterd process and glustershd process on arbiter nodes + - Delete data from backend from the arbiter nodes + - Start glusterd process and force start the volume + to bring the processes online + - Check if heal is completed + - Check for split-brain + - Calculate arequal checksum and compare it + """ + # pylint: disable=too-many-locals,too-many-statements + # Creating files on client side + for mount_obj in self.mounts: + g.log.info("Generating data for %s:%s", + mount_obj.client_system, mount_obj.mountpoint) + # Create dirs with file + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -n 2 -f 20 %s" + % (self.script_upload_path, mount_obj.mountpoint)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Get arequal before killing gluster processes on arbiter node + ret, result_before_killing_procs = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Kill glusterd process and glustershd process on arbiter node + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + for subvol in subvols: + arbiter = subvol[-1] + node, brick_path = arbiter.split(':') + # Stop glusterd + ret = stop_glusterd(node) + self.assertTrue(ret, "Failed to stop the glusterd on arbiter node") + # Stop glustershd + ret = kill_process(node, "glustershd") + if not ret: + # Validate glustershd process is not running + self.assertFalse( + is_shd_daemon_running(self.mnode, node, self.volname), + "The glustershd process is still running.") + g.log.info('Killed glusterd and glustershd for all arbiter ' + 'brick successfully') + + # Delete data from backend from the arbiter node + for subvol in subvols: + arbiter = subvol[-1] + # Clearing the arbiter bricks + node, brick_path = arbiter.split(':') + ret, _, err = g.run(node, 'rm -rf %s/*' % brick_path) + self.assertFalse( + ret, err) + g.log.info('Clearing for all arbiter brick is successful') + + # Start glusterd process on each arbiter + for subvol in subvols: + arbiter = subvol[-1] + node, brick_path = arbiter.split(':') + ret = start_glusterd(node) + self.assertTrue( + ret, "Failed to start glusterd on the arbiter node") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after healing + ret, result_after_healing = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after getting bricks online ' + 'is successful') + + # Comparing arequals before before killing arbiter processes + # and after healing + self.assertEqual( + result_before_killing_procs, result_after_healing, + 'Arequals arequals before before killing arbiter ' + 'processes and after healing are not equal') + + g.log.info('Arequals before killing arbiter ' + 'processes and after healing are equal') diff --git a/tests/functional/arbiter/test_metadata_self_heal.py b/tests/functional/arbiter/test_metadata_self_heal.py index 3d0a37447..0b2708438 100755 --- a/tests/functional/arbiter/test_metadata_self_heal.py +++ b/tests/functional/arbiter/test_metadata_self_heal.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -32,10 +32,11 @@ from glustolibs.gluster.heal_libs import (monitor_heal_completion, is_shd_daemonized) from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) +from glustolibs.io.utils import (collect_mounts_arequal, + wait_for_io_to_complete) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'nfs']]) class TestMetadataSelfHeal(GlusterBaseClass): """ @@ -75,26 +76,14 @@ class TestMetadataSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -103,7 +92,7 @@ class TestMetadataSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Create user qa for mount_object in self.mounts: @@ -139,7 +128,7 @@ class TestMetadataSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_metadata_self_heal(self): """ @@ -203,10 +192,10 @@ class TestMetadataSelfHeal(GlusterBaseClass): user=self.mounts[0].user) all_mounts_procs.append(proc) - # Validate IO + # wait for io to complete self.assertTrue( - validate_io_procs(all_mounts_procs, self.mounts), - "IO failed on some of the clients") + wait_for_io_to_complete(all_mounts_procs, self.mounts), + "Io failed to complete on some of the clients") # Setting options g.log.info('Setting options...') @@ -218,10 +207,7 @@ class TestMetadataSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -343,8 +329,9 @@ class TestMetadataSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') @@ -367,11 +354,6 @@ class TestMetadataSelfHeal(GlusterBaseClass): ret, out, err = g.run(node, command) file_list = out.split() - g.log.info('Checking for user and group on %s...', node) - conn = g.rpyc_get_connection(node) - if conn is None: - raise Exception("Unable to get connection on node %s" % node) - for file_name in file_list: file_to_check = '%s/%s/%s' % (nodes_to_check[node], test_meta_data_self_heal_folder, @@ -379,26 +361,30 @@ class TestMetadataSelfHeal(GlusterBaseClass): g.log.info('Checking for permissions, user and group for %s', file_name) + # Check for permissions - permissions = oct( - conn.modules.os.stat(file_to_check).st_mode)[-3:] - self.assertEqual(permissions, '444', + cmd = ("stat -c '%a %n' {} | awk '{{print $1}}'" + .format(file_to_check)) + ret, permissions, _ = g.run(node, cmd) + self.assertEqual(permissions.split('\n')[0], '444', 'Permissions %s is not equal to 444' % permissions) g.log.info("Permissions are '444' for %s", file_name) # Check for user - uid = conn.modules.os.stat(file_to_check).st_uid - username = conn.modules.pwd.getpwuid(uid).pw_name - self.assertEqual(username, 'qa', 'User %s is not equal qa' + cmd = ("ls -ld {} | awk '{{print $3}}'" + .format(file_to_check)) + ret, username, _ = g.run(node, cmd) + self.assertEqual(username.split('\n')[0], + 'qa', 'User %s is not equal qa' % username) g.log.info("User is 'qa' for %s", file_name) # Check for group - gid = conn.modules.os.stat(file_to_check).st_gid - groupname = conn.modules.grp.getgrgid(gid).gr_name - self.assertEqual(groupname, 'qa', 'Group %s is not equal qa' + cmd = ("ls -ld {} | awk '{{print $4}}'" + .format(file_to_check)) + ret, groupname, _ = g.run(node, cmd) + self.assertEqual(groupname.split('\n')[0], + 'qa', 'Group %s is not equal qa' % groupname) g.log.info("Group is 'qa' for %s", file_name) - - g.rpyc_close_connection(host=node) diff --git a/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py new file mode 100644 index 000000000..8e4df5e9f --- /dev/null +++ b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py @@ -0,0 +1,244 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import os +import copy +from socket import gethostbyname +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.heal_ops import get_heal_info_summary +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.volume_ops import get_volume_info +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.glusterfile import get_pathinfo +from glustolibs.gluster.lib_utils import (collect_bricks_arequal, + add_user, del_user) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['replicated', 'distributed-replicated', 'arbiter', + 'distributed-arbiter'], + ['glusterfs']]) +class TestMetadataSelfHealOpenfd(GlusterBaseClass): + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.user = "qa" + self.nodes = [] + self.nodes = copy.deepcopy(self.servers) + self.nodes.append(self.clients[0]) + + # Create user for changing ownership + for node in self.nodes: + ret = add_user(node, self.user) + self.assertTrue(ret, "Failed to create user") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup and Mount_Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + for node in self.nodes: + del_user(node, self.user) + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + def _verify_stat_info(self, nodes_to_check, test_file): + """ + Helper method to verify stat on all bricks and client. + """ + for node in nodes_to_check: + filepath = nodes_to_check[node] + "/" + test_file + stat_dict = get_file_stat(node, filepath) + self.assertIsNotNone(stat_dict, "stat on {} failed" + .format(test_file)) + self.assertEqual(stat_dict['username'], self.user, + "Expected qa but found {}" + .format(stat_dict['username'])) + self.assertEqual(stat_dict['groupname'], self.user, + "Expected gid qa but found {}" + .format(stat_dict['groupname'])) + self.assertEqual(stat_dict['access'], '777', + "Expected permission 777 but found {}" + .format(stat_dict['access'])) + + def test_metadata_self_heal_on_open_fd(self): + """ + Description: Pro-active metadata self heal on open fd + + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Create test executable on volume mount. + 4) While test execution is in progress, bring down brick1. + 5) From mount point, change ownership, permission, group id of + the test file. + 6) While test execution is in progress, bring back brick1 online. + 7) Do stat on the test file to check ownership, permission, + group id on mount point and on bricks + 8) Stop test execution. + 9) Do stat on the test file to check ownership, permission, + group id on mount point and on bricks. + 10) There should be no pending heals in the heal info command. + 11) There should be no split-brain. + 12) Calculate arequal of the bricks and mount point and it + should be same. + """ + # pylint: disable=too-many-statements,too-many-locals + # pylint: disable=too-many-branches + bricks_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(bricks_list, 'Brick list is None') + client = self.clients[0] + + # Create test executable file on mount point + m_point = self.mounts[0].mountpoint + test_file = "testfile.sh" + cmd = ("echo 'while true; do echo 'Press CTRL+C to stop execution';" + " done' >> {}/{}".format(m_point, test_file)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to create test file") + + # Execute the test file + cmd = "cd {}; sh {}".format(m_point, test_file) + g.run_async(client, cmd) + + # Get pid of the test file + _cmd = "ps -aux | grep -v grep | grep testfile.sh | awk '{print $2}'" + ret, out, _ = g.run(client, _cmd) + self.assertEqual(ret, 0, "Failed to get pid of test file execution") + + # Bring brick1 offline + ret = bring_bricks_offline(self.volname, [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring bricks {} ' + 'offline'.format(bricks_list[1])) + + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Bricks {} are not ' + 'offline'.format(bricks_list[1])) + + # change uid, gid and permission from client + cmd = "chown {} {}/{}".format(self.user, m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chown failed") + + cmd = "chgrp {} {}/{}".format(self.user, m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chgrp failed") + + cmd = "chmod 777 {}/{}".format(m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chown failed") + + # Bring brick1 online + ret = bring_bricks_online(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring bricks {} online' + .format(bricks_list[1])) + + ret = get_pathinfo(client, "{}/{}" + .format(m_point, test_file)) + self.assertIsNotNone(ret, "Unable to get " + "trusted.glusterfs.pathinfo of file") + nodes_to_check = {} + bricks_list = [] + for brick in ret['brickdir_paths']: + node, brick_path = brick.split(':') + if node[0:2].isdigit(): + nodes_to_check[node] = os.path.dirname(brick_path) + path = node + ":" + os.path.dirname(brick_path) + else: + nodes_to_check[gethostbyname(node)] = (os.path.dirname( + brick_path)) + path = gethostbyname(node) + ":" + os.path.dirname(brick_path) + bricks_list.append(path) + nodes_to_check[client] = m_point + + # Verify that the changes are successful on bricks and client + self._verify_stat_info(nodes_to_check, test_file) + + # Kill the test executable file + for pid in out.split('\n')[:-1]: + cmd = "kill -s 9 {}".format(pid) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to kill test file execution") + + # Verify that the changes are successful on bricks and client + self._verify_stat_info(nodes_to_check, test_file) + + # Verify there are no pending heals + heal_info = get_heal_info_summary(self.mnode, self.volname) + self.assertIsNotNone(heal_info, 'Unable to get heal info') + for brick in bricks_list: + self.assertEqual(int(heal_info[brick]['numberOfEntries']), + 0, ("Pending heal on brick {} ".format(brick))) + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal for mount + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Collecting data bricks + vol_info = get_volume_info(self.mnode, self.volname) + self.assertIsNotNone(vol_info, 'Unable to get volume info') + data_brick_list = [] + for brick in bricks_list: + for brick_info in vol_info[self.volname]["bricks"]["brick"]: + if brick_info["name"] == brick: + if brick_info["isArbiter"] == "0": + data_brick_list.append(brick) + bricks_list = data_brick_list + + # Get arequal on bricks and compare with mount_point_total + # It should be the same + arbiter = self.volume_type.find('arbiter') >= 0 + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + for subvol in subvols: + subvol = [i for i in subvol if i in bricks_list] + if subvol: + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue(ret, 'Unable to get arequal checksum ' + 'on {}'.format(subvol[0:stop])) + self.assertEqual(len(set(arequal)), 1, 'Mismatch of arequal ' + 'checksum among {} is ' + 'identified'.format(subvol[0:stop])) + brick_total = arequal[-1].splitlines()[-1].split(':')[-1] + self.assertEqual(brick_total, mount_point_total, + "Arequals for mountpoint and {} " + "are not equal".format(subvol[0:stop])) diff --git a/tests/functional/arbiter/test_mount_point_while_deleting_files.py b/tests/functional/arbiter/test_mount_point_while_deleting_files.py index 1bbdf279f..68f880663 100755 --- a/tests/functional/arbiter/test_mount_point_while_deleting_files.py +++ b/tests/functional/arbiter/test_mount_point_while_deleting_files.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,10 +14,12 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - import os + from glusto.core import Glusto as g + from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import rmdir from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import setup_volume, cleanup_volume from glustolibs.gluster.volume_ops import get_volume_list @@ -32,22 +34,19 @@ from glustolibs.gluster.mount_ops import (mount_volume, from glustolibs.misc.misc_libs import upload_scripts -@runs_on([['replicated'], - ['glusterfs']]) +@runs_on([['arbiter'], ['glusterfs']]) class VolumeSetDataSelfHealTests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -55,69 +54,62 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): cls.clients) # Setup Volumes - if cls.volume_type == "replicated": - cls.volume_configs = [] - cls.mounts_dict_list = [] - # Define two replicated volumes - for i in range(1, 3): - cls.volume['voltype'] = { - 'type': 'replicated', - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} - - volume_config = {'name': 'testvol_%s_%d' - % (cls.volume['voltype']['type'], i), - 'servers': cls.servers, - 'voltype': cls.volume['voltype']} - cls.volume_configs.append(volume_config) - - # redefine mounts - for client in cls.all_clients_info.keys(): - mount = { - 'protocol': cls.mount_type, - 'server': cls.mnode, - 'volname': volume_config['name'], - 'client': cls.all_clients_info[client], - 'mountpoint': (os.path.join( - "/mnt", '_'.join([volume_config['name'], - cls.mount_type]))), - 'options': '' - } - cls.mounts_dict_list.append(mount) - - cls.mounts = create_mount_objs(cls.mounts_dict_list) - - # Create and mount volumes - cls.mount_points = [] - cls.client = cls.clients[0] - for volume_config in cls.volume_configs: - # Setup volume - ret = setup_volume(mnode=cls.mnode, - all_servers_info=cls.all_servers_info, - volume_config=volume_config, - force=False) - if not ret: - raise ExecutionError("Failed to setup Volume %s" - % volume_config['name']) - g.log.info("Successful in setting volume %s", - volume_config['name']) - - # Mount volume - mount_point = (os.path.join("/mnt", '_'.join( - [volume_config['name'], cls.mount_type]))) - cls.mount_points.append(mount_point) - ret, _, _ = mount_volume(volume_config['name'], - cls.mount_type, - mount_point, - cls.mnode, - cls.client) - if ret: - raise ExecutionError( - "Failed to do gluster mount on volume %s " - % cls.volname) - g.log.info("Successfully mounted %s on client %s", - cls.volname, cls.client) + cls.volume_configs = [] + cls.mounts_dict_list = [] + cls.client = cls.clients[0] + + # Define two replicated volumes + for i in range(1, 3): + volume_config = { + 'name': 'testvol_%s_%d' % (cls.volume['voltype']['type'], i), + 'servers': cls.servers, + 'voltype': cls.volume['voltype']} + cls.volume_configs.append(volume_config) + + # Redefine mounts + mount = { + 'protocol': cls.mount_type, + 'server': cls.mnode, + 'volname': volume_config['name'], + 'client': cls.all_clients_info[cls.client], + 'mountpoint': (os.path.join( + "/mnt", '_'.join([volume_config['name'], + cls.mount_type]))), + 'options': '' + } + cls.mounts_dict_list.append(mount) + + cls.mounts = create_mount_objs(cls.mounts_dict_list) + + # Create and mount volumes + cls.mount_points = [] + for volume_config in cls.volume_configs: + + # Setup volume + ret = setup_volume(mnode=cls.mnode, + all_servers_info=cls.all_servers_info, + volume_config=volume_config, + force=False) + if not ret: + raise ExecutionError("Failed to setup Volume %s" + % volume_config['name']) + g.log.info("Successful in setting volume %s", + volume_config['name']) + + # Mount volume + mount_point = (os.path.join("/mnt", '_'.join( + [volume_config['name'], cls.mount_type]))) + cls.mount_points.append(mount_point) + ret, _, _ = mount_volume(volume_config['name'], + cls.mount_type, + mount_point, + cls.mnode, cls.client) + if ret: + raise ExecutionError( + "Failed to do gluster mount on volume %s " + % cls.volname) + g.log.info("Successfully mounted %s on client %s", + cls.volname, cls.client) def setUp(self): """ @@ -125,7 +117,7 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -152,33 +144,33 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): raise ExecutionError("Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ + # umount all volumes + for mount_point in self.mount_points: + ret, _, _ = umount_volume( + self.client, mount_point) + if ret: + raise ExecutionError( + "Failed to umount on volume %s " + % self.volname) + g.log.info("Successfully umounted %s on client %s", + self.volname, self.client) + ret = rmdir(self.client, mount_point) + if not ret: + raise ExecutionError( + "Failed to remove directory mount directory.") + g.log.info("Mount directory is removed successfully") + # stopping all volumes - g.log.info("Starting to Cleanup all Volumes") - volume_list = get_volume_list(cls.mnode) + volume_list = get_volume_list(self.mnode) for volume in volume_list: - ret = cleanup_volume(cls.mnode, volume) + ret = cleanup_volume(self.mnode, volume) if not ret: raise ExecutionError("Failed to cleanup Volume %s" % volume) g.log.info("Volume: %s cleanup is done", volume) g.log.info("Successfully Cleanedup all Volumes") - # umount all volumes - for mount_point in cls.mount_points: - ret, _, _ = umount_volume(cls.client, mount_point) - if ret: - raise ExecutionError( - "Failed to umount on volume %s " - % cls.volname) - g.log.info("Successfully umounted %s on client %s", - cls.volname, cls.client) - - # calling GlusterBaseClass tearDownClass - GlusterBaseClass.tearDownClass.im_func(cls) + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_mount_point_not_go_to_rofs(self): """ @@ -189,39 +181,37 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): - Check if all the files are deleted from the mount point from both the servers """ + # pylint: disable=too-many-locals,too-many-statements # create files on all mounts g.log.info("Starting IO on all mounts...") - all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files " + command = ("/usr/bin/env python %s create_files " "-f 100 " "--fixed-file-size 1M " - "%s" - % (self.script_upload_path, mount_obj.mountpoint)) + "%s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) - all_mounts_procs.append(proc) + self.all_mounts_procs.append(proc) # Validate IO self.assertTrue( - validate_io_procs(all_mounts_procs, self.mounts), - "IO failed on some of the clients" - ) + validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on some of the clients") # select bricks to bring offline volume_list = get_volume_list(self.mnode) for volname in volume_list: bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = ( + bricks_to_bring_offline_dict['volume_bricks']) # bring bricks offline g.log.info("Going to bring down the brick process for %s", @@ -234,20 +224,21 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): # delete files on all mounts g.log.info("Deleting IO on all mounts...") - all_mounts_procs = [] + self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Deleting data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Delete files g.log.info('Deleting files...') - command = ("python %s delete %s" - % (self.script_upload_path, mount_obj.mountpoint)) + command = "/usr/bin/env python %s delete %s" % ( + self.script_upload_path, + mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) - all_mounts_procs.append(proc) + self.all_mounts_procs.append(proc) # Validate IO self.assertTrue( - validate_io_procs(all_mounts_procs, self.mounts), - "IO failed on some of the clients" - ) + validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on some of the clients") + self.io_validation_complete = True diff --git a/tests/functional/arbiter/test_no_data_loss_arbiter_vol_after_rename_file.py b/tests/functional/arbiter/test_no_data_loss_arbiter_vol_after_rename_file.py index 80339e5d3..4f30249d0 100755 --- a/tests/functional/arbiter/test_no_data_loss_arbiter_vol_after_rename_file.py +++ b/tests/functional/arbiter/test_no_data_loss_arbiter_vol_after_rename_file.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,8 +14,8 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import (set_volume_options, @@ -30,7 +30,7 @@ from glustolibs.gluster.heal_libs import (monitor_heal_completion, from glustolibs.misc.misc_libs import upload_scripts -@runs_on([['replicated'], +@runs_on([['arbiter'], ['glusterfs']]) class ArbiterSelfHealTests(GlusterBaseClass): """ @@ -39,16 +39,14 @@ class ArbiterSelfHealTests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -61,7 +59,7 @@ class ArbiterSelfHealTests(GlusterBaseClass): """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) @@ -86,12 +84,12 @@ class ArbiterSelfHealTests(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_no_data_loss_arbiter_vol_after_rename_file(self): """ - Create a 1x(2+1) arbiter replicate volume - - Turn off self-heal daemon + - Turn off Clients side healing option - Create a directory 'test_dir' - Bring down the 1-st data brick - Create a file under 'test_dir' @@ -99,6 +97,7 @@ class ArbiterSelfHealTests(GlusterBaseClass): - Bring up the 1-st data brick - Rename file under 'test_dir' - Bring up the 2-nd data brick + - Turn on Clients side healing option - Trigger heal - Check if no pending heals - Check if md5sum on mountpoint is the same for md5sum_node on nodes @@ -107,7 +106,9 @@ class ArbiterSelfHealTests(GlusterBaseClass): test_dir = 'test_dir' # Setting options - options = {"self-heal-daemon": "off"} + options = {"cluster.metadata-self-heal": "off", + "cluster.entry-self-heal": "off", + "cluster.data-self-heal": "off"} g.log.info('Setting options %s for volume %s...', options, self.volname) ret = set_volume_options(self.mnode, self.volname, options) @@ -115,11 +116,11 @@ class ArbiterSelfHealTests(GlusterBaseClass): % (options, self.volname)) g.log.info("Successfully set %s for volume %s", options, self.volname) - - # Check if options are set to off options_dict = get_volume_options(self.mnode, self.volname) - self.assertEqual(options_dict['cluster.self-heal-daemon'], 'off', - 'Option self-heal-daemon is not set to off') + # validating options are off + for opt in options: + self.assertEqual(options_dict[opt], 'off', + 'options are not set to off') g.log.info('Option are set to off for volume %s: %s', options, self.volname) @@ -128,10 +129,10 @@ class ArbiterSelfHealTests(GlusterBaseClass): self.mounts[0].client_system, self.mounts[0].mountpoint) # Create dir g.log.info('Creating dir...') - command = ('python %s create_deep_dir -d 1 -l 0 -n 1 %s/%s' - % (self.script_upload_path, - self.mounts[0].mountpoint, - test_dir)) + command = ('/usr/bin/env python %s create_deep_dir -d 1 -l 0 -n 1 ' + '%s/%s' % ( + self.script_upload_path, + self.mounts[0].mountpoint, test_dir)) ret, _, err = g.run(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -163,10 +164,9 @@ class ArbiterSelfHealTests(GlusterBaseClass): self.mounts[0].client_system, self.mounts[0].mountpoint) # Create file g.log.info('Creating file...') - command = ("python %s create_files -f 1 %s/%s" - % (self.script_upload_path, - self.mounts[0].mountpoint, - test_dir)) + command = "/usr/bin/env python %s create_files -f 1 %s/%s" % ( + self.script_upload_path, + self.mounts[0].mountpoint, test_dir) ret, _, err = g.run(self.mounts[0].client_system, command, user=self.mounts[0].user) @@ -212,12 +212,11 @@ class ArbiterSelfHealTests(GlusterBaseClass): # Rename file under test_dir g.log.info("Renaming file for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) - command = ("python %s mv %s/%s" - % (self.script_upload_path, - self.mounts[0].mountpoint, - test_dir)) + command = "/usr/bin/env python %s mv %s/%s" % ( + self.script_upload_path, + self.mounts[0].mountpoint, test_dir) ret, _, err = g.run(self.mounts[0].client_system, command) - self.assertFalse(ret, err) + self.assertEqual(ret, 0, err) g.log.info("Renaming file for %s:%s is successful", self.mounts[0].client_system, self.mounts[0].mountpoint) @@ -237,17 +236,25 @@ class ArbiterSelfHealTests(GlusterBaseClass): ret = self.mount_volume(self.mounts) self.assertTrue(ret, 'Unable to mount %s' % self.volname) + # Enable client side healing + g.log.info("Enable client side healing options") + options = {"metadata-self-heal": "on", + "entry-self-heal": "on", + "data-self-heal": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Successfully set %s for volume %s", + options, self.volname) # Trigger heal from mount point g.log.info("Triggering heal for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) - command = ("python %s read %s/%s" - % (self.script_upload_path, - self.mounts[0].mountpoint, + command = ("cd %s/%s ; find . | xargs getfattr -d -m . -e hex" + % (self.mounts[0].mountpoint, test_dir)) ret, _, err = g.run(self.mounts[0].client_system, command) - self.assertFalse(ret, 'Failed to start "find . | xargs stat" ' - 'on %s' + self.assertFalse(ret, 'Failed to trigger heal using ' + '"find . | xargs getfattr -d -m . -e hex" on %s' % self.mounts[0].client_system) # Monitor heal completion @@ -275,10 +282,10 @@ class ArbiterSelfHealTests(GlusterBaseClass): self.assertFalse(ret, err) g.log.info('md5sum for the node: %s', md5sum_node) - # comparing md5sum_node result with mountpoint + # Comparing md5sum_node result with mountpoint g.log.info('Comparing md5sum result with mountpoint...') - self.assertEqual(md5sum, md5sum_node, 'File contents are not equal' + self.assertEqual(md5sum, md5sum_node, 'md5sums are not equal' ' on %s and %s' % (self.mounts[0].mountpoint, brick)) - g.log.info('File contents are equal on %s and %s', + g.log.info('md5sums are equal on %s and %s', self.mounts[0].mountpoint, brick) diff --git a/tests/functional/arbiter/test_oom_on_client_heal_is_in_progress_arbiter.py b/tests/functional/arbiter/test_oom_on_client_heal_is_in_progress_arbiter.py index 0e55cdf8a..00988386b 100755 --- a/tests/functional/arbiter/test_oom_on_client_heal_is_in_progress_arbiter.py +++ b/tests/functional/arbiter/test_oom_on_client_heal_is_in_progress_arbiter.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,8 +14,8 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.brick_libs import (bring_bricks_offline, @@ -29,7 +29,7 @@ from glustolibs.gluster.lib_utils import list_files from glustolibs.misc.misc_libs import upload_scripts -@runs_on([['replicated'], +@runs_on([['arbiter'], ['glusterfs', 'nfs', 'cifs']]) class ArbiterSelfHealTests(GlusterBaseClass): """ @@ -38,16 +38,14 @@ class ArbiterSelfHealTests(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -59,7 +57,7 @@ class ArbiterSelfHealTests(GlusterBaseClass): setUp method for every test """ # calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -101,7 +99,7 @@ class ArbiterSelfHealTests(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_oom_on_client_heal_in_progress(self): """ @@ -120,11 +118,12 @@ class ArbiterSelfHealTests(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("python %s create_files " + command = ("/usr/bin/env python %s create_files " "-f 1000 " "--fixed-file-size 10k " - "%s" - % (self.script_upload_path, mount_obj.mountpoint)) + "%s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) diff --git a/tests/functional/arbiter/test_remove_faulty_subvol_and_add_new_subvol.py b/tests/functional/arbiter/test_remove_faulty_subvol_and_add_new_subvol.py index 6768d7660..06b494e9b 100644 --- a/tests/functional/arbiter/test_remove_faulty_subvol_and_add_new_subvol.py +++ b/tests/functional/arbiter/test_remove_faulty_subvol_and_add_new_subvol.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,6 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import (expand_volume, shrink_volume, @@ -29,7 +30,7 @@ from glustolibs.io.utils import (collect_mounts_arequal, wait_for_io_to_complete) -@runs_on([['distributed-replicated'], +@runs_on([['distributed-arbiter'], ['glusterfs', 'nfs', 'cifs']]) class TestArbiterSelfHeal(GlusterBaseClass): """ @@ -41,16 +42,14 @@ class TestArbiterSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -59,20 +58,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) - - # Setup Volumes - if self.volume_type == "distributed-replicated": - self.volume_configs = [] - - # Redefine distributed-replicated volume - self.volume['voltype'] = { - 'type': 'distributed-replicated', - 'replica_count': 3, - 'dist_count': 2, - 'arbiter_count': 1, - 'transport': 'tcp'} - + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -113,7 +99,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_remove_faulty_subvol_and_add_new_subvol(self): """ @@ -132,13 +118,14 @@ class TestArbiterSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create dirs with file g.log.info('Creating dirs with file...') - command = ("python %s create_deep_dirs_with_files " + command = ("/usr/bin/env python %s create_deep_dirs_with_files " "-d 2 " "-l 2 " "-n 2 " "-f 20 " - "%s" - % (self.script_upload_path, mount_obj.mountpoint)) + "%s" % ( + self.script_upload_path, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) diff --git a/tests/functional/arbiter/test_replacing_all_arbiter_bricks_in_the_volume.py b/tests/functional/arbiter/test_replacing_all_arbiter_bricks_in_the_volume.py index c1a1b5d7c..26c848171 100755 --- a/tests/functional/arbiter/test_replacing_all_arbiter_bricks_in_the_volume.py +++ b/tests/functional/arbiter/test_replacing_all_arbiter_bricks_in_the_volume.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,7 +14,9 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + from glusto.core import Glusto as g + from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import ( @@ -33,7 +35,7 @@ from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) -@runs_on([['distributed-replicated'], +@runs_on([['distributed-arbiter'], ['glusterfs']]) class TestArbiterSelfHeal(GlusterBaseClass): """ @@ -44,16 +46,14 @@ class TestArbiterSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -62,20 +62,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) - - # Setup Volumes - if self.volume_type == "distributed-replicated": - self.volume_configs = [] - - # Redefine distributed-replicated volume - self.volume['voltype'] = { - 'type': 'distributed-replicated', - 'replica_count': 3, - 'dist_count': 4, - 'arbiter_count': 1, - 'transport': 'tcp'} - + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False self.bricks_to_clean = [] @@ -127,7 +114,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_replacing_all_arbiters(self): """ @@ -159,12 +146,8 @@ class TestArbiterSelfHeal(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create dirs with file g.log.info('Creating dirs with file...') - command = ("python %s create_deep_dirs_with_files " - "-d 3 " - "-l 3 " - "-n 3 " - "-f 20 " - "%s" + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 3 -l 3 -n 3 -f 20 %s" % (self.script_upload_path, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, diff --git a/tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py b/tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py index 0e9f945c2..954098677 100644 --- a/tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py +++ b/tests/functional/arbiter/test_resolving_meta_data_split_brain_extended_attributes.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) -@runs_on([['distributed-replicated'], +@runs_on([['distributed-arbiter'], ['glusterfs', 'nfs', 'cifs']]) class TestArbiterSelfHeal(GlusterBaseClass): """ @@ -41,18 +41,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) - - # Setup Volumes - if self.volume_type == "distributed-replicated": - # Redefine distributed-replicated volume - self.volume['voltype'] = { - 'type': 'distributed-replicated', - 'replica_count': 3, - 'dist_count': 2, - 'arbiter_count': 1, - 'transport': 'tcp'} - + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] self.io_validation_complete = False @@ -93,7 +82,7 @@ class TestArbiterSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_resolving_meta_data(self): """ diff --git a/tests/functional/arbiter/test_self_heal_50k_files.py b/tests/functional/arbiter/test_self_heal_50k_files.py new file mode 100644 index 000000000..887959fa0 --- /dev/null +++ b/tests/functional/arbiter/test_self_heal_50k_files.py @@ -0,0 +1,140 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSelfHeal50kFiles(GlusterBaseClass): + """ + Description: + Arbiter self heal of 50k files + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_50k_files(self): + """ + Description: + - Select bricks to bring offline + - Bring brick offline + - Create 50k files + - Validate IO + - Bring bricks online + - Monitor heal + - Check for split-brain + - Validate IO + """ + # pylint: disable=too-many-statements,too-many-locals + # Select bricks to bring offline + bricks_to_bring_offline_dict = select_bricks_to_bring_offline( + self.mnode, self.volname) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Create 50k files + command = ("cd %s ; " + "for i in `seq 1 50000` ; " + "do dd if=/dev/urandom of=test.$i " + "bs=100k count=1 ; " + "done ;" + % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3000) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') diff --git a/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py new file mode 100644 index 000000000..da98c4b7f --- /dev/null +++ b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py @@ -0,0 +1,248 @@ +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain, + is_shd_daemonized) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) + + +@runs_on([['arbiter', 'distributed-arbiter'], + ['glusterfs', 'nfs']]) +class TestSelfHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to + healing in default configuration of the volume + """ + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_50k_files_heal_command_by_add_brick(self): + """ + Test self-heal of 50k files (heal command) + Description: + - Set the volume option + "metadata-self-heal": "off" + "entry-self-heal": "off" + "data-self-heal": "off" + "self-heal-daemon": "off" + - Bring down all bricks processes from selected set + - Create IO (50k files) + - Get arequal before getting bricks online + - Bring bricks online + - Set the volume option + "self-heal-daemon": "on" + - Check for daemons + - Start healing + - Check if heal is completed + - Check for split-brain + - Get arequal after getting bricks online and compare with + arequal before getting bricks online + - Add bricks + - Do rebalance + - Get arequal after adding bricks and compare with + arequal after getting bricks online + """ + # pylint: disable=too-many-locals,too-many-statements + # Setting options + options = {"metadata-self-heal": "off", + "entry-self-heal": "off", + "data-self-heal": "off", + "self-heal-daemon": "off"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options') + g.log.info("Successfully set %s for volume %s", options, self.volname) + + # Select bricks to bring offline + bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( + self.mnode, self.volname)) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Creating files on client side + all_mounts_procs = [] + + # Create 50k files + g.log.info('Creating files...') + command = ("cd %s ; " + "for i in `seq 1 50000` ; " + "do dd if=/dev/urandom of=test.$i " + "bs=100k count=1 ; " + "done ;" + % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + + # Validate IO + self.assertTrue( + validate_io_procs(all_mounts_procs, self.mounts[0]), + "IO failed on some of the clients" + ) + + # Get arequal before getting bricks online + ret, result_before_online = collect_mounts_arequal(self.mounts[0]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks online ' + 'is successful') + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"self-heal-daemon": "on"}) + self.assertTrue(ret, 'Failed to set option self-heal-daemon to ON.') + g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Wait for self-heal-daemons to be online + ret = is_shd_daemonized(self.all_servers) + self.assertTrue(ret, "Either No self heal daemon process found") + g.log.info("All self-heal-daemons are online") + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts[0]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after getting bricks online ' + 'is successful') + + # Checking arequals before bringing bricks online + # and after bringing bricks online + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and ' + 'after bringing bricks online are not equal') + g.log.info('Checksums before and after bringing bricks online ' + 'are equal') + + # Add bricks + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume when IO in " + "progress on volume %s", self.volname)) + g.log.info("Expanding volume is successful on volume %s", self.volname) + + # Do rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Failed to start rebalance') + g.log.info('Rebalance is started') + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=3600) + self.assertTrue(ret, 'Rebalance is not completed') + g.log.info('Rebalance is completed successfully') + + # Get arequal after adding bricks + ret, result_after_adding_bricks = collect_mounts_arequal( + self.mounts[0]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after getting bricks ' + 'is successful') + + # Checking arequals after bringing bricks online + # and after adding bricks + self.assertEqual(sorted(result_after_online), + sorted(result_after_adding_bricks), + 'Checksums after bringing bricks online' + 'and after adding bricks are not equal') + g.log.info('Checksums after bringing bricks online and ' + 'after adding bricks are equal') diff --git a/tests/functional/arbiter/test_self_heal_algorithm_full_daemon_off.py b/tests/functional/arbiter/test_self_heal_algorithm_full_daemon_off.py index 577961f36..99c0f400c 100644 --- a/tests/functional/arbiter/test_self_heal_algorithm_full_daemon_off.py +++ b/tests/functional/arbiter/test_self_heal_algorithm_full_daemon_off.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -32,7 +32,7 @@ from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'cifs', 'nfs']]) class TestSelfHeal(GlusterBaseClass): """ @@ -44,26 +44,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -72,7 +60,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -97,7 +85,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_self_heal_algorithm_full_daemon_off(self): """"" diff --git a/tests/functional/arbiter/test_self_heal_daemon.py b/tests/functional/arbiter/test_self_heal_daemon.py new file mode 100644 index 000000000..37470e41c --- /dev/null +++ b/tests/functional/arbiter/test_self_heal_daemon.py @@ -0,0 +1,256 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_file_stat + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSelfHealDaemon(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to self heal + of data and hardlink + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_daemon(self): + """ + Test Data-Self-Heal(heal command) + Description: + - Create directory test_hardlink_self_heal + - Create directory test_data_self_heal + - Creating files for hardlinks and data files + - Get arequal before getting bricks offline + - Select bricks to bring offline + - Bring brick offline + - Create hardlinks and append data to data files + - Bring brick online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + - Select bricks to bring offline + - Bring brick offline + - Truncate data to data files and verify hardlinks + - Bring brick online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal again + + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_hardlink_self_heal + ret = mkdir(self.mounts[0].client_system, "{}/test_hardlink_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_hardlink_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Creating directory test_data_self_heal + ret = mkdir(self.mounts[0].client_system, "{}/test_data_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory test_hardlink_self_heal on %s created " + "successfully", self.mounts[0]) + + # Creating files for hardlinks and data files + cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5`;' + 'do mkdir dir.$i ; for j in `seq 1 10` ; do dd if=' + '/dev/urandom of=dir.$i/file.$j bs=1k count=$j;done; done;' + 'cd ..' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' + 'do dd if=/dev/urandom of=file.$i bs=128K count=$i;done;' + 'cd ..' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Get arequal before getting bricks offline + ret, result_before_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal before getting bricks online-%s', + result_before_online) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Append data to data files and create hardlinks + cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' + 'do dd if=/dev/urandom of=file.$i bs=512K count=$i ; done ;' + 'cd .. ' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to modify data files.") + g.log.info("Successfully modified data files") + + cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5` ;do ' + 'for j in `seq 1 10`;do ln dir.$i/file.$j dir.$i/link_file.$j;' + 'done ; done ; cd .. ' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Hardlinks creation failed") + g.log.info("Successfully created hardlinks of files") + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks online ' + 'is %s', result_after_online) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'.format + (bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Truncate data to data files and verify hardlinks + cmd = ('cd %s/test_data_self_heal ; for i in `seq 1 100` ;' + 'do truncate -s $(( $i * 128)) file.$i ; done ; cd ..' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to truncate files") + g.log.info("Successfully truncated files on mountpoint") + + file_path = ('%s/test_hardlink_self_heal/dir{1..5}/file{1..10}' + % (self.mounts[0].mountpoint)) + link_path = ('%s/test_hardlink_self_heal/dir{1..5}/link_file{1..10}' + % (self.mounts[0].mountpoint)) + file_stat = get_file_stat(self.mounts[0], file_path) + link_stat = get_file_stat(self.mounts[0], link_path) + self.assertEqual(file_stat, link_stat, "Verification of hardlinks " + "failed") + g.log.info("Successfully verified hardlinks") + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') diff --git a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py index 7a7480002..0c49bcd8f 100755 --- a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py +++ b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,10 +29,12 @@ from glustolibs.gluster.heal_libs import (monitor_heal_completion, is_volume_in_split_brain, is_shd_daemonized) from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) +from glustolibs.io.utils import ( + collect_mounts_arequal, validate_io_procs, + wait_for_io_to_complete) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'cifs', 'nfs']]) class TestSelfHeal(GlusterBaseClass): """ @@ -44,26 +46,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -72,7 +62,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() self.all_mounts_procs = [] @@ -96,7 +86,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_self_heal_differing_in_file_type(self): """ @@ -147,11 +137,10 @@ class TestSelfHeal(GlusterBaseClass): user=self.mounts[0].user) all_mounts_procs.append(proc) - # Validate IO + # wait for io to complete self.assertTrue( - validate_io_procs(all_mounts_procs, self.mounts), - "IO failed on some of the clients" - ) + wait_for_io_to_complete(all_mounts_procs, self.mounts), + "Io failed to complete on some of the clients") # Get arequal before getting bricks offline g.log.info('Getting arequal before getting bricks offline...') @@ -163,10 +152,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -190,9 +176,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks offline - self.assertItemsEqual(result_before_offline, result_after_offline, - 'Checksums before and after ' - 'bringing bricks offline are not equal') + self.assertEqual(sorted(result_before_offline), + sorted(result_after_offline), + 'Checksums before and after bringing bricks' + ' offline are not equal') g.log.info('Checksums before and after ' 'bringing bricks offline are equal') @@ -282,8 +269,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and after bringing bricks' + ' online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') diff --git a/tests/functional/arbiter/test_self_heal_symbolic_links.py b/tests/functional/arbiter/test_self_heal_symbolic_links.py index 46b1889d3..655ea7564 100644 --- a/tests/functional/arbiter/test_self_heal_symbolic_links.py +++ b/tests/functional/arbiter/test_self_heal_symbolic_links.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -31,10 +31,11 @@ from glustolibs.gluster.heal_libs import (monitor_heal_completion, is_shd_daemonized) from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) +from glustolibs.io.utils import (collect_mounts_arequal, + wait_for_io_to_complete) -@runs_on([['replicated', 'distributed-replicated'], +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs', 'nfs']]) class TestSelfHeal(GlusterBaseClass): """ @@ -46,26 +47,14 @@ class TestSelfHeal(GlusterBaseClass): @classmethod def setUpClass(cls): # Calling GlusterBaseClass setUpClass - GlusterBaseClass.setUpClass.im_func(cls) - - # Overriding the volume type to specifically test the volume type - # Change from distributed-replicated to arbiter - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'arbiter_count': 1, - 'transport': 'tcp'} + cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) - script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") - ret = upload_scripts(cls.clients, [script_local_path]) + ret = upload_scripts(cls.clients, cls.script_upload_path) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" % cls.clients) @@ -74,7 +63,7 @@ class TestSelfHeal(GlusterBaseClass): def setUp(self): # Calling GlusterBaseClass setUp - GlusterBaseClass.setUp.im_func(self) + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") @@ -96,7 +85,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown - GlusterBaseClass.tearDown.im_func(self) + self.get_super_method(self, 'tearDown')() def test_self_heal_symbolic_links(self): """ @@ -165,11 +154,10 @@ class TestSelfHeal(GlusterBaseClass): user=self.mounts[0].user) all_mounts_procs.append(proc) - # Validate IO + # wait for io to complete self.assertTrue( - validate_io_procs(all_mounts_procs, self.mounts), - "IO failed on some of the clients" - ) + wait_for_io_to_complete(all_mounts_procs, self.mounts), + "Io failed to complete on some of the clients") # Get arequal before getting bricks offline g.log.info('Getting arequal before getting bricks offline...') @@ -181,10 +169,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks'])) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -208,9 +193,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks offline - self.assertItemsEqual(result_before_offline, result_after_offline, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_offline), + sorted(result_after_offline), + 'Checksums before and after bringing bricks ' + 'online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') @@ -335,8 +321,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and after bringing bricks ' + 'online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') diff --git a/tests/functional/arbiter/test_split_brain.py b/tests/functional/arbiter/test_split_brain.py new file mode 100644 index 000000000..e2684be49 --- /dev/null +++ b/tests/functional/arbiter/test_split_brain.py @@ -0,0 +1,165 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + wait_for_bricks_to_be_online) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.volume_libs import get_subvols + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSplitBrain(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @classmethod + def tearDownClass(cls): + """ + Cleanup Volume + """ + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + + cls.get_super_method(cls, 'tearDownClass')() + + def _bring_bricks_online(self): + """ + Bring bricks online and monitor heal completion + """ + # Bring bricks online + ret = bring_bricks_online( + self.mnode, + self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks online') + + # Wait for volume processes to be online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + def test_split_brain(self): + + """ + Description: Create split-brain on files and check if IO's fail + - Disable self-heal and cluster-quorum-type + - Get the bricks from the volume + - Write IO and validate IO + - Bring 1st set of brick offline(1 Data brick and arbiter brick) + - Write IO and validate IO + - Bring 2nd set of bricks offline(1 Data brick and arbiter brick) + - Write IO and validate IO + - Check volume is in split-brain + - Write IO and validate IO - should fail + - Enable self-heal and cluster-quorum-type + - Write IO and validate IO - should fail + """ + # Disable self-heal and cluster-quorum-type + options = {"self-heal-daemon": "off", + "cluster.quorum-type": "none"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for " + "volume %s" % (options, self.volname))) + + # Get the bricks from the volume + sub_vols = get_subvols(self.mnode, self.volname) + self.bricks_to_bring_offline = list(sub_vols['volume_subvols'][0]) + + # Write IO's + write_cmd = ("/usr/bin/env python %s create_files -f 1 " + "--base-file-name test_file --fixed-file-size 1k %s" % + (self.script_upload_path, + self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, write_cmd) + + # Bring 1st set of brick offline(1 Data brick and arbiter brick) + for bricks in ((0, -1), (1, -1)): + down_bricks = [] + for brick in bricks: + down_bricks.append(self.bricks_to_bring_offline[brick]) + ret = bring_bricks_offline(self.volname, down_bricks) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(down_bricks)) + proc = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertTrue( + validate_io_procs([proc], self.mounts), + "IO failed on some of the clients" + ) + + # Bring bricks online + self._bring_bricks_online() + + # Check volume is in split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertTrue(ret, "unable to create split-brain scenario") + g.log.info("Successfully created split brain scenario") + + # Write IO's + proc2 = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertFalse( + validate_io_procs([proc2], self.mounts), + "IO passed on split-brain" + ) + g.log.info("Expected - IO's failed due to split-brain") + + # Enable self-heal and cluster-quorum-type + options = {"self-heal-daemon": "on", + "cluster.quorum-type": "auto"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for " + "volume %s" % (options, self.volname))) + + # Write IO's + proc3 = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertFalse( + validate_io_procs([proc3], self.mounts), + "IO passed on split-brain" + ) + g.log.info("Expected - IO's failed due to split-brain") diff --git a/tests/functional/arbiter/test_verify_metadata_and_data_heal.py b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py new file mode 100644 index 000000000..d48e36e73 --- /dev/null +++ b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py @@ -0,0 +1,297 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import ( + is_heal_complete, is_volume_in_split_brain, monitor_heal_completion, + wait_for_self_heal_daemons_to_be_online) +from glustolibs.gluster.heal_ops import (disable_self_heal_daemon, + enable_self_heal_daemon, trigger_heal) +from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal, + del_user, group_add, group_del) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import list_all_files_and_dirs_mounts + + +@runs_on([['arbiter', 'replicated'], ['glusterfs']]) +class TestMetadataAndDataHeal(GlusterBaseClass): + '''Description: Verify shd heals files after performing metadata and data + operations while a brick was down''' + def _dac_helper(self, host, option): + '''Helper for creating, deleting users and groups''' + + # Permission/Ownership changes required only for `test_metadata..` + # tests, using random group and usernames + if 'metadata' not in self.test_dir: + return + + if option == 'create': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_add(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not add_user(host, 'qa_all', group='qa_func'): + raise ExecutionError('Unable to {} user {} under {} on ' + '{}'.format(option, 'qa_all', 'qa_func', + host)) + elif option == 'delete': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_del(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not del_user(host, 'qa_all'): + raise ExecutionError('Unable to {} user on {}'.format( + option, host)) + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = self.mounts[0:1] + self.client = self.mounts[0].client_system + + # Use testcase name as test directory + self.test_dir = self.id().split('.')[-1] + self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + # Crete group and user names required for the test + self._dac_helper(host=self.client, option='create') + + def tearDown(self): + # Delete group and user names created as part of setup + self._dac_helper(host=self.client, option='delete') + + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + + self.get_super_method(self, 'tearDown')() + + def _perform_io_and_disable_self_heal(self): + '''Refactor of steps common to all tests: Perform IO, disable heal''' + ret = mkdir(self.client, self.fqpath) + self.assertTrue(ret, + 'Directory creation failed on {}'.format(self.client)) + self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + # Create 6 dir's, 6 files and 6 files in each subdir with 10K data + file_io = ('''cd {0}; for i in `seq 1 6`; + do mkdir dir.$i; {1} 10K > file.$i; + for j in `seq 1 6`; + do {1} 10K > dir.$i/file.$j; done; + done;'''.format(self.fqpath, self.io_cmd)) + ret, _, err = g.run(self.client, file_io) + self.assertEqual(ret, 0, 'Unable to create directories and data files') + self.assertFalse(err, '{0} failed with {1}'.format(file_io, err)) + + # Disable self heal deamon + self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname), + 'Disabling self-heal-daemon falied') + + def _perform_brick_ops_and_enable_self_heal(self, op_type): + '''Refactor of steps common to all tests: Brick down and perform + metadata/data operations''' + # First brick in the subvol will always be online and used for self + # heal, so make keys match brick index + self.op_cmd = { + # Metadata Operations (owner and permission changes) + 'metadata': { + 2: + '''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \ + dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''', + 3: + '''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + }, + # Data Operations (append data to the files) + 'data': { + 2: + '''cd {0}; for i in `seq 1 3`; + do {1} 2K >> file.$i; + for j in `seq 1 3`; + do {1} 2K >> dir.$i/file.$j; done; + done;''', + 3: + '''cd {0}; for i in `seq 1 3`; + do {1} 3K >> file.$i; + for j in `seq 1 3`; + do {1} 3K >> dir.$i/file.$j; done; + done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; + do {1} 4K >> file.$i; + for j in `seq 1 6`; + do {1} 4K >> dir.$i/file.$j; done; + done;''', + }, + } + bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(bricks, + 'Not able to get list of bricks in the volume') + + # Make first brick always online and start operations from second brick + for index, brick in enumerate(bricks[1:], start=2): + + # Bring brick offline + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + # Perform metadata/data operation + cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Bring brick online + ret = bring_bricks_online( + self.mnode, + self.volname, + brick, + bring_bricks_online_methods='volume_start_force') + + # Assert metadata/data operations resulted in pending heals + self.assertFalse(is_heal_complete(self.mnode, self.volname)) + + # Enable and wait self heal daemon to be online + self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname), + 'Enabling self heal daemon failed') + self.assertTrue( + wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname), + 'Not all self heal daemons are online') + + def _validate_heal_completion_and_arequal(self, op_type): + '''Refactor of steps common to all tests: Validate heal from heal + commands, verify arequal, perform IO and verify arequal after IO''' + + # Validate heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + 'Self heal is not completed within timeout') + self.assertFalse( + is_volume_in_split_brain(self.mnode, self.volname), + 'Volume is in split brain even after heal completion') + + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + self.assertTrue(subvols, 'Not able to get list of subvols') + arbiter = self.volume_type.find('arbiter') >= 0 + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + + # Validate arequal + self._validate_arequal_and_perform_lookup(subvols, stop) + + # Perform some additional metadata/data operations + cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Validate arequal after additional operations + self._validate_arequal_and_perform_lookup(subvols, stop) + + def _validate_arequal_and_perform_lookup(self, subvols, stop): + '''Refactor of steps common to all tests: Validate arequal from bricks + backend and perform a lookup of all files from mount''' + for subvol in subvols: + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue( + ret, 'Unable to get `arequal` checksum on ' + '{}'.format(subvol[0:stop])) + self.assertEqual( + len(set(arequal)), 1, 'Mismatch of `arequal` ' + 'checksum among {} is identified'.format(subvol[0:stop])) + + # Perform a lookup of all files and directories on mounts + self.assertTrue(list_all_files_and_dirs_mounts(self.mounts), + 'Failed to list all files and dirs from mount') + + def test_metadata_heal_from_shd(self): + '''Description: Verify files heal after switching on `self-heal-daemon` + when metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of metadata heal after switching on ' + '`self heal daemon` is complete') + + def test_metadata_heal_from_heal_cmd(self): + '''Description: Verify files heal after triggering heal command when + metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal` + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + + # Invoke `glfsheal` + self.assertTrue(trigger_heal(self.mnode, self.volname), + 'Unable to trigger index heal on the volume') + + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info( + 'Pass: Verification of metadata heal via `glfsheal` is complete') + + def test_data_heal_from_shd(self): + '''Description: Verify files heal after triggering heal command when + data operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data + operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'data' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of data heal after switching on ' + '`self heal daemon` is complete') |