From 9439573ea62f2fe0221f953db21ea7991609d679 Mon Sep 17 00:00:00 2001
From: Anees Patel <anepatel@redhat.com>
Date: Mon, 17 Dec 2018 10:42:48 +0530
Subject: Per BZ 1401969, modifying the IO pattern to write single large files

When eager-lock is on, and two writes happen in parallel on a FD,
Arbiter becoming source of heal is observed, hence modifing the IO
pattern, Also this is a race-condition, hence executing the same
remove-brick cycle thrice per BZ 1401969, this patch also takes
care of multiple clients writing to different files/dirs, no two
clients writing to same file

Change-Id: If0003afb675bbcf9f6b555b43e9a11e4def5435c
Signed-off-by: Anees Patel <anepatel@redhat.com>
---
 .../brick_cases/test_cyclic_brick_kill_list.py     | 93 +++++++++++++---------
 1 file changed, 55 insertions(+), 38 deletions(-)

(limited to 'tests')

diff --git a/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py b/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py
index 5a828efb1..8dbf0bcac 100755
--- a/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py
+++ b/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py
@@ -14,6 +14,7 @@
 #  with this program; if not, write to the Free Software Foundation, Inc.,
 #  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
+import time
 from glusto.core import Glusto as g
 from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
 from glustolibs.gluster.volume_libs import log_volume_info_and_status
@@ -31,7 +32,7 @@ from glustolibs.misc.misc_libs import upload_scripts
 
 
 @runs_on([['replicated', 'distributed-replicated'],
-          ['glusterfs', 'cifs', 'nfs']])
+          ['glusterfs', 'nfs']])
 class ListMount(GlusterBaseClass):
     """
     Tetstcase involves killing brick in cyclic order and
@@ -46,9 +47,9 @@ class ListMount(GlusterBaseClass):
         g.log.info("Upload io scripts to clients %s for running IO on mounts",
                    cls.clients)
         script_local_path = ("/usr/share/glustolibs/io/scripts/"
-                             "file_dir_ops.py")
+                             "fd_writes.py")
         cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
-                                  "file_dir_ops.py")
+                                  "fd_writes.py")
         ret = upload_scripts(cls.clients, [script_local_path])
         if not ret:
             raise ExecutionError("Failed to upload IO scripts to clients %s"
@@ -127,18 +128,21 @@ class ListMount(GlusterBaseClass):
         """""
 
         # IO on the mount point
+        # Each client will write 2 files each of 1 GB and keep
+        # modifying the same file
         g.log.info("Starting IO on all mounts...")
         for mount_obj in self.mounts:
             g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                        mount_obj.mountpoint)
-            cmd = ("python %s create_deep_dirs_with_files "
-                   "--dirname-start-num %d "
-                   "--dir-depth 2 "
-                   "--dir-length 35 "
-                   "--max-num-of-dirs 5 "
-                   "--num-of-files 5 %s" % (self.script_upload_path,
-                                            self.counter,
-                                            mount_obj.mountpoint))
+            cmd = ("python %s "
+                   "--file-sizes-list 1G "
+                   "--chunk-sizes-list 128 "
+                   "--write-time 900 "
+                   "--num-of-files 2 "
+                   "--base-file-name test_brick_down_from_client_%s.txt "
+                   "--dir %s " % (self.script_upload_path,
+                                  mount_obj.client_system,
+                                  mount_obj.mountpoint))
             proc = g.run_async(mount_obj.client_system, cmd,
                                user=mount_obj.user)
             self.all_mounts_procs.append(proc)
@@ -147,33 +151,46 @@ class ListMount(GlusterBaseClass):
 
         # Killing bricks in cyclic order
         bricks_list = get_all_bricks(self.mnode, self.volname)
-        for brick in bricks_list:
-            # Bring brick offline
-            g.log.info('Bringing bricks %s offline', brick)
-            ret = bring_bricks_offline(self.volname, [brick])
-            self.assertTrue(ret, 'Failed to bring bricks %s offline' % brick)
-
-            ret = are_bricks_offline(self.mnode, self.volname, [brick])
-            self.assertTrue(ret, 'Bricks %s are not offline' % brick)
-            g.log.info('Bringing bricks %s offline is successful', brick)
-
-            # Bring brick online
-            g.log.info('Bringing bricks %s online', brick)
-            ret = bring_bricks_online(self.mnode, self.volname, [brick])
-            self.assertTrue(ret, 'Failed to bring bricks %s online' % brick)
-            g.log.info('Bricks %s are online', brick)
-
-            # Check if bricks are online
-            ret = are_bricks_online(self.mnode, self.volname, bricks_list)
-            self.assertTrue(ret, 'Bricks %s are not online' % bricks_list)
-            g.log.info('Bricks %s are online', bricks_list)
-
-            # Check daemons
-            g.log.info('Checking daemons...')
-            ret = are_all_self_heal_daemons_are_online(self.mnode,
-                                                       self.volname)
-            self.assertTrue(ret, 'Some of the self-heal Daemons are offline')
-            g.log.info('All self-heal Daemons are online')
+
+        # Total number of cyclic brick-down cycles to be executed
+        number_of_cycles = 0
+        while number_of_cycles < 3:
+            number_of_cycles += 1
+            for brick in bricks_list:
+                # Bring brick offline
+                g.log.info('Bringing bricks %s offline', brick)
+                ret = bring_bricks_offline(self.volname, [brick])
+                self.assertTrue(ret, ("Failed to bring bricks %s offline"
+                                      % brick))
+
+                ret = are_bricks_offline(self.mnode, self.volname, [brick])
+                self.assertTrue(ret, 'Bricks %s are not offline' % brick)
+                g.log.info('Bringing bricks %s offline is successful', brick)
+
+                # Introducing 30 second sleep when brick is down
+                g.log.info("Waiting for 30 seconds, with ongoing IO while "
+                           "brick %s is offline", brick)
+                ret = time.sleep(30)
+
+                # Bring brick online
+                g.log.info('Bringing bricks %s online', brick)
+                ret = bring_bricks_online(self.mnode, self.volname, [brick])
+                self.assertTrue(ret, ("Failed to bring bricks %s online "
+                                      % brick))
+                g.log.info('Bricks %s are online', brick)
+
+                # Check if bricks are online
+                ret = are_bricks_online(self.mnode, self.volname, bricks_list)
+                self.assertTrue(ret, 'Bricks %s are not online' % bricks_list)
+                g.log.info('Bricks %s are online', bricks_list)
+
+                # Check daemons
+                g.log.info('Checking daemons...')
+                ret = are_all_self_heal_daemons_are_online(self.mnode,
+                                                           self.volname)
+                self.assertTrue(ret, ("Some of the self-heal Daemons are "
+                                      "offline"))
+                g.log.info('All self-heal Daemons are online')
 
         # Validate IO
         self.assertTrue(
-- 
cgit