From b0de1e89029b2af15b1fdc92e83c378c5a353346 Mon Sep 17 00:00:00 2001 From: Vitalii Koriakov Date: Tue, 20 Feb 2018 15:12:50 +0200 Subject: Test brick process should not be started on read only storage_node disks Change-Id: Id0d9e468aaf0061e9ff0f5cc534c06017e97b793 Signed-off-by: Vitalii Koriakov --- .../afr/heal/test_self_heal_daemon_process.py | 174 ++++++++++++++++++++- 1 file changed, 171 insertions(+), 3 deletions(-) mode change 100644 => 100755 tests/functional/afr/heal/test_self_heal_daemon_process.py (limited to 'tests/functional/afr') diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py old mode 100644 new mode 100755 index 3412c1b49..1a9fa0987 --- a/tests/functional/afr/heal/test_self_heal_daemon_process.py +++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py @@ -18,6 +18,8 @@ Test Cases in this module tests the self heal daemon process. """ +import time +import calendar from glusto.core import Glusto as g from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on @@ -31,7 +33,8 @@ from glustolibs.gluster.brick_libs import (get_all_bricks, bring_bricks_offline, bring_bricks_online, are_bricks_online, - select_bricks_to_bring_offline) + select_bricks_to_bring_offline, + are_bricks_offline) from glustolibs.gluster.brick_ops import replace_brick from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, do_bricks_exist_in_shd_volfile, @@ -39,6 +42,8 @@ from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, are_all_self_heal_daemons_are_online) from glustolibs.gluster.volume_ops import (volume_stop, volume_start) from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.io.utils import validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts @runs_on([['replicated', 'distributed-replicated', 'dispersed', @@ -48,6 +53,26 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): SelfHealDaemonProcessTests contains tests which verifies the self-heal daemon process of the nodes """ + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + GlusterBaseClass.setUpClass.im_func(cls) + + # Upload io scripts for running IO on mounts + g.log.info("Upload io scripts to clients %s for running IO on mounts", + cls.clients) + script_local_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, [script_local_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + def setUp(self): """ setup volume, mount volume and initialize necessary variables @@ -57,6 +82,9 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # calling GlusterBaseClass setUpClass GlusterBaseClass.setUp.im_func(self) + self.all_mounts_procs = [] + self.io_validation_complete = False + # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume and Mount Volume") ret = self.setup_volume_and_mount_volume(mounts=self.mounts) @@ -486,6 +514,147 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): self.assertTrue(ret, ("Not all bricks are online")) g.log.info("All bricks are online.") + def test_brick_process_not_started_on_read_only_node_disks(self): + """ + * create volume and start + * kill one brick + * start IO + * unmount the brick directory from node + * remount the brick directory with read-only option + * start the volume with "force" option + * check for error 'posix: initializing translator failed' in log file + * remount the brick directory with read-write option + * start the volume with "force" option + * validate IO + """ + # pylint: disable=too-many-locals,too-many-statements + # Select bricks to bring offline + bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( + self.mnode, self.volname)) + bricks_to_bring_offline = filter(None, ( + bricks_to_bring_offline_dict['hot_tier_bricks'] + + bricks_to_bring_offline_dict['cold_tier_bricks'] + + bricks_to_bring_offline_dict['volume_bricks'])) + + # Bring brick offline + g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Creating files for all volumes + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", + mount_obj.client_system, mount_obj.mountpoint) + cmd = ("python %s create_files -f 100 %s/test_dir" + % (self.script_upload_path, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + # umount brick + brick_node, volume_brick = bricks_to_bring_offline[0].split(':') + node_brick = '/'.join(volume_brick.split('/')[0:3]) + g.log.info('Start umount brick %s...', node_brick) + ret, _, _ = g.run(brick_node, 'umount %s' % node_brick) + self.assertFalse(ret, 'Failed to umount brick %s' % node_brick) + g.log.info('Successfully umounted %s', node_brick) + + # get time before remount the directory and checking logs for error + g.log.info('Getting time before remount the directory and ' + 'checking logs for error...') + _, time_before_checking_logs, _ = g.run(brick_node, 'date -u +%s') + g.log.info('Time before remount the directory and checking logs - %s', + time_before_checking_logs) + + # remount the directory with read-only option + g.log.info('Start remount brick %s with read-only option...', + node_brick) + ret, _, _ = g.run(brick_node, 'mount -o ro %s' % node_brick) + self.assertFalse(ret, 'Failed to remount brick %s' % node_brick) + g.log.info('Successfully remounted %s with read-only option', + node_brick) + + # start volume with "force" option + g.log.info('starting volume with "force" option...') + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertFalse(ret, 'Failed to start volume %s with "force" option' + % self.volname) + g.log.info('Successfully started volume %s with "force" option', + self.volname) + + # check logs for an 'initializing translator failed' error + g.log.info("Checking logs for an 'initializing translator failed' " + "error for %s brick...", node_brick) + error_msg = 'posix: initializing translator failed' + cmd = ("cat /var/log/glusterfs/bricks/bricks-%s-%s.log | " + "grep '%s'" + % (volume_brick.split('/')[-2], volume_brick.split('/')[-1], + error_msg)) + ret, log_msgs, _ = g.run(brick_node, cmd) + log_msg = log_msgs.rstrip().split('\n')[-1] + + self.assertTrue(error_msg in log_msg, 'No errors in logs') + g.log.info('EXPECTED: %s', error_msg) + + # get time from log message + log_time_msg = log_msg.split('E')[0][1:-2].split('.')[0] + log_time_msg_converted = calendar.timegm(time.strptime( + log_time_msg, '%Y-%m-%d %H:%M:%S')) + g.log.info('Time_msg from logs - %s ', log_time_msg) + g.log.info('Time from logs - %s ', log_time_msg_converted) + + # get time after remount the directory checking logs for error + g.log.info('Getting time after remount the directory and ' + 'checking logs for error...') + _, time_after_checking_logs, _ = g.run(brick_node, 'date -u +%s') + g.log.info('Time after remount the directory and checking logs - %s', + time_after_checking_logs) + + # check time periods + g.log.info('Checking if an error is in right time period...') + self.assertTrue(int(time_before_checking_logs) <= + int(log_time_msg_converted) <= + int(time_after_checking_logs), + 'Expected error is not in right time period') + g.log.info('Expected error is in right time period') + + # umount brick + g.log.info('Start umount brick %s...', node_brick) + ret, _, _ = g.run(brick_node, 'umount %s' % node_brick) + self.assertFalse(ret, 'Failed to umount brick %s' % node_brick) + g.log.info('Successfully umounted %s', node_brick) + + # remount the directory with read-write option + g.log.info('Start remount brick %s with read-write option...', + node_brick) + ret, _, _ = g.run(brick_node, 'mount %s' % node_brick) + self.assertFalse(ret, 'Failed to remount brick %s' % node_brick) + g.log.info('Successfully remounted %s with read-write option', + node_brick) + + # start volume with "force" option + g.log.info('starting volume with "force" option...') + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertFalse(ret, 'Failed to start volume %s with "force" option' + % self.volname) + g.log.info('Successfully started volume %s with "force" option', + self.volname) + + # Validate IO + g.log.info("Wait for IO to complete and validate IO ...") + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + @runs_on([['replicated', 'distributed-replicated'], ['glusterfs', 'nfs', 'cifs']]) @@ -525,8 +694,7 @@ class ImpactOfReplaceBrickForGlustershdTests(GlusterBaseClass): # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) - ret = self.setup_volume_and_mount_volume(self.mounts, - volume_create_force=False) + ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") -- cgit