From 924fce6697ec469bddf2e65824131f0a3d3ac5a5 Mon Sep 17 00:00:00 2001
From: Manisha Saini <msaini@redhat.com>
Date: Tue, 16 Feb 2021 23:41:22 +0530
Subject: [Test] Bring down data bricks in cyclic order and trigger heal

Change-Id: Ibf0391a2f7709fb08326f57a0c4c899e28faf62f
Signed-off-by: Manisha Saini <msaini@redhat.com>
---
 tests/functional/arbiter/test_brick_down_cyclic.py | 140 +++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 tests/functional/arbiter/test_brick_down_cyclic.py

diff --git a/tests/functional/arbiter/test_brick_down_cyclic.py b/tests/functional/arbiter/test_brick_down_cyclic.py
new file mode 100644
index 000000000..8639a4dc5
--- /dev/null
+++ b/tests/functional/arbiter/test_brick_down_cyclic.py
@@ -0,0 +1,140 @@
+#  Copyright (C) 2021  Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# pylint: disable=too-many-statements, too-many-locals
+import time
+from glusto.core import Glusto as g
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.heal_libs import (is_volume_in_split_brain,
+                                          is_heal_complete)
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+                                           bring_bricks_online,
+                                           are_bricks_offline,
+                                           get_all_bricks,
+                                           are_bricks_online)
+from glustolibs.gluster.heal_libs import (
+    monitor_heal_completion, are_all_self_heal_daemons_are_online)
+
+
+@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']])
+class TestBrickDownHeal(GlusterBaseClass):
+
+    @classmethod
+    def setUpClass(cls):
+
+        # Calling GlusterBaseClass setUpClass
+        cls.get_super_method(cls, 'setUpClass')()
+
+        # Setup Volume and Mount Volume
+        ret = cls.setup_volume_and_mount_volume(cls.mounts, True)
+        if not ret:
+            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
+
+    @classmethod
+    def tearDownClass(cls):
+        """
+        Cleanup Volume
+        """
+        ret = cls.unmount_volume_and_cleanup_volume(cls.mounts)
+        if not ret:
+            raise ExecutionError("Failed to create volume")
+
+        cls.get_super_method(cls, 'tearDownClass')()
+
+    def test_brick_down_heal(self):
+        """
+        - Run IO's from client on a single file
+        - Now bring down bricks in cyclic order
+        - kill brick 1, sleep for 5 seconds, bring brick 1 up, wait for 10s
+        - Now repeat step3 for brick2 and brick 3
+        - Repeat the cycle a few times
+        - Trigger heal, check for split brain using command
+        """
+        # Write IO's
+        self.all_mounts_procs = []
+        cmd = ("for i in `seq 1 10`;"
+               "do dd if=/dev/urandom of=%s/file$i bs=1K count=1;"
+               "done" % self.mounts[0].mountpoint)
+        proc = g.run_async(self.mounts[0].client_system, cmd)
+        self.all_mounts_procs.append(proc)
+
+        # Killing bricks in cyclic order
+        bricks_list = get_all_bricks(self.mnode, self.volname)
+
+        # Total number of cyclic brick-down cycles to be executed
+        number_of_cycles = 0
+        while number_of_cycles < 3:
+            number_of_cycles += 1
+            for brick in bricks_list:
+                # Bring brick offline
+                g.log.info('Bringing bricks %s offline', brick)
+                ret = bring_bricks_offline(self.volname, [brick])
+                self.assertTrue(ret, ("Failed to bring bricks %s offline"
+                                      % brick))
+
+                ret = are_bricks_offline(self.mnode, self.volname, [brick])
+                self.assertTrue(ret, 'Bricks %s are not offline' % brick)
+                g.log.info('Bringing bricks %s offline is successful', brick)
+
+                # Introducing 5 second sleep when brick is down
+                g.log.info("Waiting for 5 seconds, with ongoing IO while "
+                           "brick %s is offline", brick)
+                ret = time.sleep(5)
+
+                # Bring brick online
+                g.log.info('Bringing bricks %s online', brick)
+                ret = bring_bricks_online(self.mnode, self.volname, [brick])
+                self.assertTrue(ret, ("Failed to bring bricks %s online "
+                                      % brick))
+                g.log.info('Bricks %s are online', brick)
+
+                # Introducing 10 second sleep when brick is up
+                g.log.info("Waiting for 10 seconds,when "
+                           "brick %s is online", brick)
+                ret = time.sleep(10)
+
+                # Check if bricks are online
+                ret = are_bricks_online(self.mnode, self.volname, bricks_list)
+                self.assertTrue(ret, 'Bricks %s are not online' % bricks_list)
+                g.log.info('Bricks %s are online', bricks_list)
+
+                # Check daemons
+                g.log.info('Checking daemons...')
+                ret = are_all_self_heal_daemons_are_online(self.mnode,
+                                                           self.volname)
+                self.assertTrue(ret, ("Some of the self-heal Daemons are "
+                                      "offline"))
+                g.log.info('All self-heal Daemons are online')
+
+        # Trigger self heal
+        ret = trigger_heal(self.mnode, self.volname)
+        self.assertTrue(ret, 'Unable to trigger heal on volume')
+
+        # Monitor heal completion
+        ret = monitor_heal_completion(self.mnode, self.volname)
+        self.assertTrue(ret, 'Heal has not yet completed')
+
+        # Check if heal is completed
+        ret = is_heal_complete(self.mnode, self.volname)
+        self.assertTrue(ret, 'Heal is not complete')
+        g.log.info('Heal is completed successfully')
+
+        # Check for split-brain
+        ret = is_volume_in_split_brain(self.mnode, self.volname)
+        self.assertFalse(ret, 'Volume is in split-brain state')
+        g.log.info('Volume is not in split-brain state')
-- 
cgit