summaryrefslogtreecommitdiffstats
path: root/tests/functional/afr
diff options
context:
space:
mode:
authorVitalii Koriakov <vkoriako@redhat.com>2018-02-16 16:09:50 +0200
committerNigel Babu <nigelb@redhat.com>2018-05-04 07:34:36 +0000
commit72812c9f72c69fa8083b7f79c49507a2df9817cf (patch)
treea30714669a59e9b635af49c6048bef4d20e4ecb8 /tests/functional/afr
parent0952b6138fca369ebb213ece251b04f20a5109cb (diff)
Test write I/O on mount point is resumed when client side quorum is restored ( x3)
Change-Id: Ic0aaccdbf6938702ec1dbb44e888e45eb9f21e28 Signed-off-by: Vitalii Koriakov <vkoriako@redhat.com>
Diffstat (limited to 'tests/functional/afr')
-rwxr-xr-xtests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py561
1 files changed, 561 insertions, 0 deletions
diff --git a/tests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py b/tests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py
new file mode 100755
index 0000000..4e60208
--- /dev/null
+++ b/tests/functional/afr/test_write_io_mount_point_resumed_quorum_restored.py
@@ -0,0 +1,561 @@
+# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import time
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.volume_libs import (
+ set_volume_options, get_subvols,
+ wait_for_volume_process_to_be_online,
+ verify_all_process_of_volume_are_online)
+from glustolibs.misc.misc_libs import (upload_scripts,
+ are_nodes_online,
+ reboot_nodes)
+from glustolibs.io.utils import (validate_io_procs,
+ is_io_procs_fail_with_rofs,
+ list_all_files_and_dirs_mounts,
+ wait_for_io_to_complete)
+
+
+@runs_on([['distributed-replicated'],
+ ['glusterfs']])
+class ClientSideQuorumRestored(GlusterBaseClass):
+ """ Description:
+ Test Cases in this module tests the client side quorum.
+ """
+ @classmethod
+ def setUpClass(cls):
+ # Calling GlusterBaseClass setUpClass
+ GlusterBaseClass.setUpClass.im_func(cls)
+
+ # Upload io scripts for running IO on mounts
+ g.log.info("Upload io scripts to clients %s for running IO on mounts",
+ cls.clients)
+ script_local_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+ ret = upload_scripts(cls.clients, [script_local_path])
+ if not ret:
+ raise ExecutionError("Failed to upload IO scripts to clients %s"
+ % cls.clients)
+ g.log.info("Successfully uploaded IO scripts to clients %s",
+ cls.clients)
+
+ def setUp(self):
+ # Calling GlusterBaseClass setUp
+ GlusterBaseClass.setUp.im_func(self)
+
+ self.all_mounts_procs = []
+ self.io_validation_complete = False
+
+ # Setup Volume and Mount Volume
+ g.log.info("Starting to Setup Volume and Mount Volume")
+ ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+ g.log.info("Successful in Setup Volume and Mount Volume")
+
+ def tearDown(self):
+ """
+ If test method failed before validating IO, tearDown waits for the
+ IO's to complete and checks for the IO exit status
+
+ Cleanup and umount volume
+ """
+ if not self.io_validation_complete:
+ g.log.info("Wait for IO to complete as IO validation did not "
+ "succeed in test method")
+ ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
+ if not ret:
+ raise ExecutionError("IO failed on some of the clients")
+ g.log.info("IO is successful on all mounts")
+
+ # List all files and dirs created
+ g.log.info("List all files and directories:")
+ ret = list_all_files_and_dirs_mounts(self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to list all files and dirs")
+ g.log.info("Listing all files and directories is successful")
+
+ # Cleanup and umount volume
+ g.log.info("Starting to Unmount Volume and Cleanup Volume")
+ ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+ if not ret:
+ raise ExecutionError("Failed to umount the vol & cleanup Volume")
+ g.log.info("Successful in umounting the volume and Cleanup")
+
+ # Calling GlusterBaseClass teardown
+ GlusterBaseClass.tearDown.im_func(self)
+
+ def test_write_io_mount_point_resumed_quorum_restored_x3(self):
+ """
+ - set cluster.quorum-type to auto
+ - start I/O from the mount point
+ - Do IO and check on subvols with two nodes to reboot
+ (do for each subvol)
+ - get files to delete/create for nodes to be offline
+ - delete files from mountpoint
+ - reboot nodes
+ - creating files on nodes while rebooting
+ - validate for rofs
+ - wait for volume processes to be online
+ - creating files on nodes after rebooting
+ - validate IO
+ - Do IO and check on subvols without nodes to reboot
+ (do for each subvol)
+ - get files to delete/create for nodes to be online
+ - delete files from mountpoint
+ - reboot nodes
+ - creating files on online nodes while rebooting other nodes
+ - validate IO
+ - Do IO and check and reboot two nodes on all subvols
+ - get files to delete/create for nodes to be offline
+ - delete files from mountpoint
+ - reboot nodes
+ - creating files on nodes while rebooting
+ - validate for rofs
+ - wait for volume processes to be online
+ - creating files on nodes after rebooting
+ - validate IO
+ """
+ # pylint: disable=too-many-locals,too-many-statements,too-many-branches
+ # set cluster.quorum-type to auto
+ options = {"cluster.quorum-type": "auto"}
+ g.log.info("setting cluster.quorum-type to auto on volume %s",
+ self.volname)
+ ret = set_volume_options(self.mnode, self.volname, options)
+ self.assertTrue(ret, ("Unable to set volume option %s for"
+ "volume %s" % (options, self.volname)))
+ g.log.info("Sucessfully set %s for volume %s",
+ options, self.volname)
+
+ # Creating files on client side
+ for mount_obj in self.mounts:
+ g.log.info("Generating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("python %s create_files -f 30 %s"
+ % (self.script_upload_path, mount_obj.mountpoint))
+
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ self.all_mounts_procs.append(proc)
+
+ # Validate IO
+ self.io_validation_complete = False
+ g.log.info("Wait for IO to complete and validate IO ...")
+ ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+ self.assertTrue(ret, "IO failed on some of the clients")
+ self.io_validation_complete = True
+ g.log.info("IO is successful on all mounts")
+
+ # Do IO and check on subvols with nodes to reboot
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ for subvol in subvols_dict['volume_subvols']:
+ # define nodes to reboot
+ brick_list = subvol[0:2]
+ nodes_to_reboot = []
+ for brick in brick_list:
+ node, brick_path = brick.split(':')
+ nodes_to_reboot.append(node)
+
+ # get files to delete/create for nodes to be offline
+ node, brick_path = brick_list[0].split(':')
+ ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
+ self.assertFalse(ret, 'Failed to ls files on %s' % node)
+ file_list = brick_file_list.splitlines()
+
+ # delete files from mountpoint
+ for mount_obj in self.mounts:
+ g.log.info("Deleting data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+ cmd = ('cd %s/ ; rm -rf %s'
+ % (mount_obj.mountpoint, ' '.join(file_list)))
+ ret, _, _ = g.run(mount_obj.client_system, cmd)
+ self.assertFalse(ret, 'Failed to rm file on %s'
+ % mount_obj.client_system)
+ g.log.info('Files %s are deleted', file_list)
+
+ # reboot nodes on subvol and wait while rebooting
+ g.log.info("Rebooting the nodes %s", nodes_to_reboot)
+ ret = reboot_nodes(nodes_to_reboot)
+ self.assertTrue(ret, 'Failed to reboot nodes %s '
+ % nodes_to_reboot)
+
+ # Creating files on nodes while rebooting
+ self.all_mounts_procs = []
+ for mount_obj in self.mounts:
+ g.log.info("Creating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("cd %s/ ;"
+ "touch %s"
+ % (mount_obj.mountpoint, ' '.join(file_list)))
+
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ self.all_mounts_procs.append(proc)
+
+ # Validate IO
+ self.io_validation_complete = False
+ g.log.info("Validating if IO failed with read-only filesystem")
+ ret = is_io_procs_fail_with_rofs(self, self.all_mounts_procs,
+ self.mounts)
+ self.assertTrue(ret, ("Unexpected error and IO successful"
+ " on read-only filesystem"))
+ self.io_validation_complete = True
+ g.log.info("EXPECTED: "
+ "Read-only file system in IO while creating file")
+
+ # check if nodes are online
+ counter = 0
+ timeout = 300
+ _rc = False
+ while counter < timeout:
+ ret, reboot_results = are_nodes_online(nodes_to_reboot)
+ if not ret:
+ g.log.info("Nodes are offline, Retry after 5 seconds ... ")
+ time.sleep(5)
+ counter = counter + 5
+ else:
+ _rc = True
+ break
+
+ if not _rc:
+ for node in reboot_results:
+ if reboot_results[node]:
+ g.log.info("Node %s is online", node)
+ else:
+ g.log.error("Node %s is offline even after "
+ "%d minutes", node, timeout / 60.0)
+ else:
+ g.log.info("All nodes %s are up and running", nodes_to_reboot)
+
+ # Wait for volume processes to be online
+ g.log.info("Wait for volume processes to be online")
+ ret = wait_for_volume_process_to_be_online(self.mnode,
+ self.volname)
+ self.assertTrue(ret,
+ ("Failed to wait for volume %s processes to "
+ "be online", self.volname))
+ g.log.info("Successful in waiting for volume %s processes to be "
+ "online", self.volname)
+
+ # Verify volume's all process are online
+ g.log.info("Verifying volume's all process are online")
+ ret = verify_all_process_of_volume_are_online(self.mnode,
+ self.volname)
+ self.assertTrue(ret, ("Volume %s : All process are not online"
+ % self.volname))
+ g.log.info("Volume %s : All process are online", self.volname)
+
+ # Creating files on nodes after rebooting
+ self.all_mounts_procs = []
+ for mount_obj in self.mounts:
+ g.log.info("Creating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("cd %s/ ;"
+ "touch %s"
+ % (mount_obj.mountpoint, ' '.join(file_list)))
+
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ self.all_mounts_procs.append(proc)
+
+ # Validate IO
+ self.io_validation_complete = False
+ g.log.info("Wait for IO to complete and validate IO ...")
+ ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+ self.assertTrue(ret, "IO failed on some of the clients")
+ self.io_validation_complete = True
+ g.log.info("IO is successful on all mounts")
+
+ # Do IO and check on subvols without nodes to reboot
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ for subvol in subvols_dict['volume_subvols']:
+ # define nodes to reboot
+ brick_list = subvol[0:2]
+ nodes_to_reboot = []
+ for brick in brick_list:
+ node, brick_path = brick.split(':')
+ nodes_to_reboot.append(node)
+
+ # get files to delete/create for nodes to be online
+ new_subvols_dict = get_subvols(self.mnode, self.volname)
+ subvol_to_operate = new_subvols_dict['volume_subvols']
+ subvol_to_operate.remove(subvol)
+ brick_list_subvol_online = subvol_to_operate[0]
+
+ node, brick_path_vol_online = \
+ brick_list_subvol_online[0].split(':')
+ ret, brick_file_list, _ = g.run(node,
+ 'ls %s' % brick_path_vol_online)
+ self.assertFalse(ret, 'Failed to ls files on %s' % node)
+ file_list = brick_file_list.splitlines()
+
+ # delete files from mountpoint
+ for mount_obj in self.mounts:
+ g.log.info("Deleting data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+ cmd = ('cd %s/ ; rm -rf %s'
+ % (mount_obj.mountpoint, ' '.join(file_list)))
+ ret, _, _ = g.run(mount_obj.client_system, cmd)
+ self.assertFalse(ret, 'Failed to rm file on %s'
+ % mount_obj.client_system)
+ g.log.info('Files %s are deleted', file_list)
+
+ # reboot nodes on subvol and wait while rebooting
+ g.log.info("Rebooting the nodes %s", nodes_to_reboot)
+ ret = reboot_nodes(nodes_to_reboot)
+ self.assertTrue(ret, 'Failed to reboot nodes %s '
+ % nodes_to_reboot)
+
+ # Creating files on nodes while rebooting
+ self.all_mounts_procs = []
+ for mount_obj in self.mounts:
+ g.log.info("Creating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("cd %s/ ;"
+ "touch %s"
+ % (mount_obj.mountpoint, ' '.join(file_list)))
+
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ self.all_mounts_procs.append(proc)
+
+ # Validate IO
+ self.io_validation_complete = False
+ g.log.info("Wait for IO to complete and validate IO ...")
+ ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+ self.assertTrue(ret, "IO failed on some of the clients")
+ self.io_validation_complete = True
+ g.log.info("IO is successful on all mounts")
+
+ # check if nodes are online
+ counter = 0
+ timeout = 300
+ _rc = False
+ while counter < timeout:
+ ret, reboot_results = are_nodes_online(nodes_to_reboot)
+ if not ret:
+ g.log.info("Nodes are offline, Retry after 5 seconds ... ")
+ time.sleep(5)
+ counter = counter + 5
+ else:
+ _rc = True
+ break
+
+ if not _rc:
+ for node in reboot_results:
+ if reboot_results[node]:
+ g.log.info("Node %s is online", node)
+ else:
+ g.log.error("Node %s is offline even after "
+ "%d minutes", node, timeout / 60.0)
+ else:
+ g.log.info("All nodes %s are up and running", nodes_to_reboot)
+
+ # Wait for volume processes to be online
+ g.log.info("Wait for volume processes to be online")
+ ret = wait_for_volume_process_to_be_online(self.mnode,
+ self.volname)
+ self.assertTrue(ret,
+ ("Failed to wait for volume %s processes to "
+ "be online", self.volname))
+ g.log.info("Successful in waiting for volume %s processes to be "
+ "online", self.volname)
+
+ # Verify volume's all process are online
+ g.log.info("Verifying volume's all process are online")
+ ret = verify_all_process_of_volume_are_online(self.mnode,
+ self.volname)
+ self.assertTrue(ret, ("Volume %s : All process are not online"
+ % self.volname))
+ g.log.info("Volume %s : All process are online", self.volname)
+
+ # Do IO and check and reboot nodes on all subvols
+ subvols_dict = get_subvols(self.mnode, self.volname)
+ nodes_to_reboot = []
+ file_list_for_all_subvols = []
+ for subvol in subvols_dict['volume_subvols']:
+ # define nodes to reboot
+ brick_list = subvol[0:2]
+ for brick in brick_list:
+ node, brick_path = brick.split(':')
+ nodes_to_reboot.append(node)
+
+ # get files to delete/create for nodes to be offline
+ node, brick_path = brick_list[0].split(':')
+ ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
+ self.assertFalse(ret, 'Failed to ls files on %s' % node)
+ file_list = brick_file_list.splitlines()
+ file_list_for_all_subvols.append(file_list)
+
+ # delete files from mountpoint
+ for mount_obj in self.mounts:
+ g.log.info("Deleting data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+ cmd = ('cd %s/ ; rm -rf %s'
+ % (mount_obj.mountpoint, ' '.join(file_list)))
+ ret, _, _ = g.run(mount_obj.client_system, cmd)
+ self.assertFalse(ret, 'Failed to rm file on %s' % node)
+ g.log.info('Files %s are deleted', file_list)
+
+ # reboot nodes on subvol and wait while rebooting
+ g.log.info("Rebooting the nodes %s", nodes_to_reboot)
+ ret = reboot_nodes(nodes_to_reboot)
+ self.assertTrue(ret, 'Failed to reboot nodes %s '
+ % nodes_to_reboot)
+
+ # Creating files on nodes while rebooting
+ all_mounts_procs, all_mounts_procs_1, all_mounts_procs_2 = [], [], []
+ # Create files for 1-st subvol and get all_mounts_procs_1
+ for mount_obj in self.mounts:
+ g.log.info("Creating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("cd %s/ ;"
+ "touch %s"
+ % (mount_obj.mountpoint,
+ ' '.join(file_list_for_all_subvols[0])))
+
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ all_mounts_procs_1.append(proc)
+ all_mounts_procs.append(all_mounts_procs_1)
+
+ # Create files for 2-st subvol and get all_mounts_procs_2
+ for mount_obj in self.mounts:
+ g.log.info("Creating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("cd %s/ ;"
+ "touch %s"
+ % (mount_obj.mountpoint,
+ ' '.join(file_list_for_all_subvols[1])))
+
+ proc2 = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ all_mounts_procs_2.append(proc2)
+ all_mounts_procs.append(all_mounts_procs_2)
+
+ for mounts_procs in all_mounts_procs:
+ # Validate IO
+ self.io_validation_complete = False
+ g.log.info("Validating if IO failed with read-only filesystem")
+ ret = is_io_procs_fail_with_rofs(self, mounts_procs,
+ self.mounts)
+ self.assertTrue(ret, ("Unexpected error and IO successful"
+ " on read-only filesystem"))
+ self.io_validation_complete = True
+ g.log.info("EXPECTED: "
+ "Read-only file system in IO while creating file")
+
+ # check if nodes are online
+ counter = 0
+ timeout = 300
+ _rc = False
+ while counter < timeout:
+ ret, reboot_results = are_nodes_online(nodes_to_reboot)
+ if not ret:
+ g.log.info("Nodes are offline, Retry after 5 seconds ... ")
+ time.sleep(5)
+ counter = counter + 5
+ else:
+ _rc = True
+ break
+
+ if not _rc:
+ for node in reboot_results:
+ if reboot_results[node]:
+ g.log.info("Node %s is online", node)
+ else:
+ g.log.error("Node %s is offline even after "
+ "%d minutes", node, timeout / 60.0)
+ else:
+ g.log.info("All nodes %s are up and running", nodes_to_reboot)
+
+ # Wait for volume processes to be online
+ g.log.info("Wait for volume processes to be online")
+ ret = wait_for_volume_process_to_be_online(self.mnode,
+ self.volname)
+ self.assertTrue(ret,
+ ("Failed to wait for volume %s processes to "
+ "be online", self.volname))
+ g.log.info("Successful in waiting for volume %s processes to be "
+ "online", self.volname)
+
+ # Verify volume's all process are online
+ g.log.info("Verifying volume's all process are online")
+ ret = verify_all_process_of_volume_are_online(self.mnode,
+ self.volname)
+ self.assertTrue(ret, ("Volume %s : All process are not online"
+ % self.volname))
+ g.log.info("Volume %s : All process are online", self.volname)
+
+ # Creating files on nodes after rebooting
+ all_mounts_procs, all_mounts_procs_1, all_mounts_procs_2 = [], [], []
+ # Create files for 1-st subvol and get all_mounts_procs_1
+ for mount_obj in self.mounts:
+ g.log.info("Creating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("cd %s/ ;"
+ "touch %s"
+ % (mount_obj.mountpoint,
+ ' '.join(file_list_for_all_subvols[0])))
+
+ proc = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ all_mounts_procs_1.append(proc)
+ all_mounts_procs.append(all_mounts_procs_1)
+
+ # Create files for 2-st subvol and get all_mounts_procs_2
+ for mount_obj in self.mounts:
+ g.log.info("Creating data for %s:%s",
+ mount_obj.client_system, mount_obj.mountpoint)
+
+ # Creating files
+ cmd = ("cd %s/ ;"
+ "touch %s"
+ % (mount_obj.mountpoint,
+ ' '.join(file_list_for_all_subvols[1])))
+
+ proc2 = g.run_async(mount_obj.client_system, cmd,
+ user=mount_obj.user)
+ all_mounts_procs_2.append(proc2)
+ all_mounts_procs.append(all_mounts_procs_2)
+
+ for mounts_procs in all_mounts_procs:
+ # Validate IO
+ self.io_validation_complete = False
+ g.log.info("Wait for IO to complete and validate IO ...")
+ ret = validate_io_procs(mounts_procs, self.mounts)
+ self.assertTrue(ret, "IO failed on some of the clients")
+ self.io_validation_complete = True
+ g.log.info("IO is successful on all mounts")