functional/disperse: verify IO hang during client side heal

When the IOs are done with server side heal disabled, it should not hang. ec_check_heal_comp function will fail because of the bug 1593224- Client side heal is not removing dirty flag for some of the files While this bug has been raised and investigated by dev, this patch is doing its job and testing the target functionality. RHG3-11097 Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> Signed-off-by: Ashish Pandey <aspandey@redhat.com> Change-Id: I841285c9b1a747f5800ec8cdd29a099e5fcc08c5 Signed-off-by: Ashish Pandey <aspandey@redhat.com>
author: Sunil Kumar Acharya <sheggodu@redhat.com> 2018-01-31 12:42:55 +0530
committer: Vijay Avuthu <vavuthu@redhat.com> 2018-06-25 11:32:12 +0000
commit: 26016caac5295d8478447a0f1de8201c8ea61b5e (patch)
tree: 3c669a00e8f559386446e6ec2dcd8d60f2f519d9 /tests/functional/disperse/test_ec_io_hang_clientside_heal.py
parent: f0cacd170305a0f34466f12869b07ddd0eb7935c (diff)
1 files changed, 150 insertions, 0 deletions
diff --git a/tests/functional/disperse/test_ec_io_hang_clientside_heal.py b/tests/functional/disperse/test_ec_io_hang_clientside_heal.py
new file mode 100644
index 000000000..3e40010d3
--- /dev/null
+++ b/tests/functional/disperse/test_ec_io_hang_clientside_heal.py
@@ -0,0 +1,150 @@
+#  Copyright (C) 2018  Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+"""
+ec_io_hang_during_clientside_heal:
+    Disable server side heal.
+    Perform IO on mount point and kill some bricks and bring them up.
+    Check that the heal should complete via client side heal and it
+    should not hang any IO.
+"""
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on)
+from glustolibs.gluster.brick_libs import (
+    bring_bricks_offline,
+    bring_bricks_online,
+    are_bricks_online,
+    get_all_bricks)
+from glustolibs.gluster.volume_libs import (
+    log_volume_info_and_status)
+from glustolibs.gluster.heal_ops import (
+    disable_heal)
+from glustolibs.gluster.heal_libs import (
+    monitor_heal_completion)
+from glustolibs.gluster.heal_ops import get_heal_info
+
+
+def ec_check_heal_comp(self):
+    g.log.info("Get the pending heal info for the volume %s",
+               self.volname)
+    heal_info = get_heal_info(self.mnode, self.volname)
+    g.log.info("Successfully got heal info for the volume %s",
+               self.volname)
+    g.log.info("Heal Entries %s : %s", self.volname, heal_info)
+
+    # Monitor heal completion
+    ret = monitor_heal_completion(self.mnode, self.volname)
+    self.assertTrue(ret, 'Heal has not yet completed')
+
+
+@runs_on([['dispersed'], ['glusterfs']])
+class EcClientHealHangTest(GlusterBaseClass):
+    # Method to setup the environment for test case
+    def setUp(self):
+        # Setup Volume and Mount Volume
+        g.log.info("Starting to Setup Volume and Mount Volume")
+        ret = self.setup_volume_and_mount_volume(mounts=self.mounts,
+                                                 volume_create_force=True)
+        if not ret:
+            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+        g.log.info("Successful in Setup Volume and Mount Volume")
+
+    # Test Case
+    def test_heal_io_hang(self):
+        mountpoint = self.mounts[0].mountpoint
+
+        # disable server side heal
+        ret = disable_heal(self.mnode, self.volname)
+        self.assertTrue(ret, ("Failed to disable server side heal"))
+        g.log.info("Successfully disabled server side heal")
+
+        # Log Volume Info and Status after disabling client side heal
+        g.log.info("Logging volume info and status")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed "
+                              "on volume %s", self.volname))
+
+        bricks_list = get_all_bricks(self.mnode, self.volname)
+        self.assertIsNotNone(bricks_list, "Failed to get the bricks list")
+
+        # Create files
+        cmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
+               "do touch file$i; done" % mountpoint)
+
+        ret, _, err = g.run(self.mounts[0].client_system, cmd)
+        self.assertEqual(ret, 0, err)
+        g.log.info('Finished creating files while all the bricks are UP')
+
+        # Bring bricks offline
+        ret = bring_bricks_offline(self.volname, bricks_list[0:1])
+        self.assertTrue(ret, "Failed to bring down the bricks")
+        g.log.info("Successfully brought the bricks down")
+
+        # Start pumping IO from client
+        cmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
+               "do dd if=/dev/urandom of=file$i bs=1M "
+               "count=5;done" % mountpoint)
+
+        ret, _, err = g.run(self.mounts[0].client_system, cmd)
+        self.assertEqual(ret, 0, err)
+        g.log.info('Finished writing on files while a brick is DOWN')
+
+        # Bring bricks online
+        ret = bring_bricks_online(self.mnode, self.volname, bricks_list[0:1])
+        self.assertTrue(ret, "Failed to bring up the bricks")
+        g.log.info("Successfully brought the bricks up")
+
+        # Verifying all bricks online
+        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
+        self.assertTrue(ret, "All bricks are not online")
+
+        # Start client side heal by reading/writing files.
+        appendcmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
+                     "do dd if=/dev/urandom of=file$i bs=1M "
+                     "count=1 oflag=append conv=notrunc;done" % mountpoint)
+
+        readcmd = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
+                   "do dd if=file$i of=/dev/zero bs=1M "
+                   "count=5;done" % mountpoint)
+
+        ret, _, err = g.run(self.mounts[0].client_system, appendcmd)
+        self.assertEqual(ret, 0, err)
+        g.log.info('Finished append on files after bringing bricks online')
+
+        ret, _, err = g.run(self.mounts[0].client_system, readcmd)
+        self.assertEqual(ret, 0, err)
+        g.log.info('Finished read on files after bringing bricks online')
+
+        # check the heal info and completion
+        ec_check_heal_comp(self)
+
+        # Log Volume Info and Status after bringing the brick up
+        g.log.info("Logging volume info and status")
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed "
+                              "on volume %s", self.volname))
+        g.log.info("Successful in logging volume info and status "
+                   "of volume %s", self.volname)
+
+    # Method to cleanup test setup
+    def tearDown(self):
+        # Stopping the volume
+        g.log.info("Starting to Unmount Volume and Cleanup Volume")
+        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
+        if not ret:
+            raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
+        g.log.info("Successful in Unmount Volume and Cleanup Volume")
author	Sunil Kumar Acharya <sheggodu@redhat.com>	2018-01-31 12:42:55 +0530
committer	Vijay Avuthu <vavuthu@redhat.com>	2018-06-25 11:32:12 +0000
commit	26016caac5295d8478447a0f1de8201c8ea61b5e (patch)
tree	3c669a00e8f559386446e6ec2dcd8d60f2f519d9 /tests/functional/disperse/test_ec_io_hang_clientside_heal.py
parent	f0cacd170305a0f34466f12869b07ddd0eb7935c (diff)