From 9439573ea62f2fe0221f953db21ea7991609d679 Mon Sep 17 00:00:00 2001 From: Anees Patel Date: Mon, 17 Dec 2018 10:42:48 +0530 Subject: Per BZ 1401969, modifying the IO pattern to write single large files When eager-lock is on, and two writes happen in parallel on a FD, Arbiter becoming source of heal is observed, hence modifing the IO pattern, Also this is a race-condition, hence executing the same remove-brick cycle thrice per BZ 1401969, this patch also takes care of multiple clients writing to different files/dirs, no two clients writing to same file Change-Id: If0003afb675bbcf9f6b555b43e9a11e4def5435c Signed-off-by: Anees Patel --- .../brick_cases/test_cyclic_brick_kill_list.py | 93 +++++++++++++--------- 1 file changed, 55 insertions(+), 38 deletions(-) (limited to 'tests') diff --git a/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py b/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py index 5a828efb1..8dbf0bcac 100755 --- a/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py +++ b/tests/functional/arbiter/brick_cases/test_cyclic_brick_kill_list.py @@ -14,6 +14,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import time from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_libs import log_volume_info_and_status @@ -31,7 +32,7 @@ from glustolibs.misc.misc_libs import upload_scripts @runs_on([['replicated', 'distributed-replicated'], - ['glusterfs', 'cifs', 'nfs']]) + ['glusterfs', 'nfs']]) class ListMount(GlusterBaseClass): """ Tetstcase involves killing brick in cyclic order and @@ -46,9 +47,9 @@ class ListMount(GlusterBaseClass): g.log.info("Upload io scripts to clients %s for running IO on mounts", cls.clients) script_local_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") + "fd_writes.py") cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" - "file_dir_ops.py") + "fd_writes.py") ret = upload_scripts(cls.clients, [script_local_path]) if not ret: raise ExecutionError("Failed to upload IO scripts to clients %s" @@ -127,18 +128,21 @@ class ListMount(GlusterBaseClass): """"" # IO on the mount point + # Each client will write 2 files each of 1 GB and keep + # modifying the same file g.log.info("Starting IO on all mounts...") for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) - cmd = ("python %s create_deep_dirs_with_files " - "--dirname-start-num %d " - "--dir-depth 2 " - "--dir-length 35 " - "--max-num-of-dirs 5 " - "--num-of-files 5 %s" % (self.script_upload_path, - self.counter, - mount_obj.mountpoint)) + cmd = ("python %s " + "--file-sizes-list 1G " + "--chunk-sizes-list 128 " + "--write-time 900 " + "--num-of-files 2 " + "--base-file-name test_brick_down_from_client_%s.txt " + "--dir %s " % (self.script_upload_path, + mount_obj.client_system, + mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) @@ -147,33 +151,46 @@ class ListMount(GlusterBaseClass): # Killing bricks in cyclic order bricks_list = get_all_bricks(self.mnode, self.volname) - for brick in bricks_list: - # Bring brick offline - g.log.info('Bringing bricks %s offline', brick) - ret = bring_bricks_offline(self.volname, [brick]) - self.assertTrue(ret, 'Failed to bring bricks %s offline' % brick) - - ret = are_bricks_offline(self.mnode, self.volname, [brick]) - self.assertTrue(ret, 'Bricks %s are not offline' % brick) - g.log.info('Bringing bricks %s offline is successful', brick) - - # Bring brick online - g.log.info('Bringing bricks %s online', brick) - ret = bring_bricks_online(self.mnode, self.volname, [brick]) - self.assertTrue(ret, 'Failed to bring bricks %s online' % brick) - g.log.info('Bricks %s are online', brick) - - # Check if bricks are online - ret = are_bricks_online(self.mnode, self.volname, bricks_list) - self.assertTrue(ret, 'Bricks %s are not online' % bricks_list) - g.log.info('Bricks %s are online', bricks_list) - - # Check daemons - g.log.info('Checking daemons...') - ret = are_all_self_heal_daemons_are_online(self.mnode, - self.volname) - self.assertTrue(ret, 'Some of the self-heal Daemons are offline') - g.log.info('All self-heal Daemons are online') + + # Total number of cyclic brick-down cycles to be executed + number_of_cycles = 0 + while number_of_cycles < 3: + number_of_cycles += 1 + for brick in bricks_list: + # Bring brick offline + g.log.info('Bringing bricks %s offline', brick) + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s offline" + % brick)) + + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, 'Bricks %s are not offline' % brick) + g.log.info('Bringing bricks %s offline is successful', brick) + + # Introducing 30 second sleep when brick is down + g.log.info("Waiting for 30 seconds, with ongoing IO while " + "brick %s is offline", brick) + ret = time.sleep(30) + + # Bring brick online + g.log.info('Bringing bricks %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s online " + % brick)) + g.log.info('Bricks %s are online', brick) + + # Check if bricks are online + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, 'Bricks %s are not online' % bricks_list) + g.log.info('Bricks %s are online', bricks_list) + + # Check daemons + g.log.info('Checking daemons...') + ret = are_all_self_heal_daemons_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Some of the self-heal Daemons are " + "offline")) + g.log.info('All self-heal Daemons are online') # Validate IO self.assertTrue( -- cgit