From 0214a4b1ab6f195c348ad7170a9f62bf8acfb09d Mon Sep 17 00:00:00 2001 From: Ashish Pandey Date: Wed, 20 Jun 2018 16:08:59 +0530 Subject: functional/disperse: Verify IO hang during server side heal. When the IOs are done with client side heal disabled, it should not hang. RHG3-11098 Change-Id: I2f180dd1ba2f45ae0f302a730a02b90ae77b99ad Signed-off-by: Ashish Pandey --- .../disperse/test_ec_io_hang_serverside_heal.py | 139 +++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/functional/disperse/test_ec_io_hang_serverside_heal.py (limited to 'tests/functional/disperse') diff --git a/tests/functional/disperse/test_ec_io_hang_serverside_heal.py b/tests/functional/disperse/test_ec_io_hang_serverside_heal.py new file mode 100644 index 000000000..dc7781a8a --- /dev/null +++ b/tests/functional/disperse/test_ec_io_hang_serverside_heal.py @@ -0,0 +1,139 @@ +# Copyright (C) 2018 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +ec_io_hang_during_clientside_heal: + Disable server side heal. + Perform IO on mount point and kill some bricks and bring them up. + Check that the heal should complete via client side heal and it + should not hang any IO. +""" +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, + set_volume_options) +from glustolibs.gluster.heal_libs import ( + monitor_heal_completion) +from glustolibs.gluster.heal_ops import get_heal_info + + +def ec_check_heal_comp(self): + g.log.info("Get the pending heal info for the volume %s", + self.volname) + heal_info = get_heal_info(self.mnode, self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + g.log.info("Heal Entries %s : %s", self.volname, heal_info) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class EcClientHealHangTest(GlusterBaseClass): + # Method to setup the environment for test case + def setUp(self): + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def test_heal_io_hang(self): + mountpoint = self.mounts[0].mountpoint + + # disable client side heal + options = {"disperse.background-heals": 0} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to disable client side heal') + g.log.info("Successfully disabled client side heal") + + # Log Volume Info and Status after disabling client side heal + g.log.info("Logging volume info and status") + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed " + "on volume %s", self.volname)) + + bricks_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(bricks_list, "Failed to get the bricks list") + + # Create files + cmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;" + "do touch file$i; done" % mountpoint) + + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished creating files while all the bricks are UP') + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, bricks_list[0:1]) + self.assertTrue(ret, "Failed to bring down the bricks") + g.log.info("Successfully brought the bricks down") + + # Start pumping IO from client + cmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writing on files while a brick is DOWN') + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, bricks_list[0:1]) + self.assertTrue(ret, "Failed to bring up the bricks") + g.log.info("Successfully brought the bricks up") + + # Verifying all bricks online + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, "All bricks are not online") + + appendcmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=1 oflag=append conv=notrunc;done" % mountpoint) + + ret, _, err = g.run(self.mounts[0].client_system, appendcmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished append on files after bringing bricks online') + + # check the heal info and completion + ec_check_heal_comp(self) + + # Log Volume Info and Status after bringing the brick up + g.log.info("Logging volume info and status") + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed " + "on volume %s", self.volname)) + g.log.info("Successful in logging volume info and status " + "of volume %s", self.volname) + + # Method to cleanup test setup + def tearDown(self): + # Stopping the volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") -- cgit