diff options
Diffstat (limited to 'tests/functional/disperse')
24 files changed, 5423 insertions, 37 deletions
diff --git a/tests/functional/disperse/test_disperse_eager_lock.py b/tests/functional/disperse/test_disperse_eager_lock.py new file mode 100644 index 000000000..7f7ee84f5 --- /dev/null +++ b/tests/functional/disperse/test_disperse_eager_lock.py @@ -0,0 +1,71 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +import string + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestDisperseEagerLock(GlusterBaseClass): + def setUp(self): + ret = self.setup_volume() + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @staticmethod + def get_random_string(chars, str_len=4): + return ''.join((choice(chars) for _ in range(str_len))) + + def test_disperse_eager_lock_cli(self): + """ + Testcase Steps: + 1.Create an EC volume + 2.Set the eager lock option by turning + on disperse.eager-lock by using different inputs: + - Try non boolean values(Must fail) + - Try boolean values + """ + # Set the eager lock option by turning + # on disperse.eager-lock by using different inputs + key = 'disperse.eager-lock' + + # Set eager lock option with non-boolean value + for char_type in (string.ascii_letters, string.punctuation, + string.printable, string.digits): + temp_val = self.get_random_string(char_type) + value = "{}".format(temp_val) + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertFalse(ret, "Unexpected: Erroneous value {}, to option " + "{} should result in failure".format(value, key)) + + # Set eager lock option with boolean value + for value in ('1', '0', 'off', 'on', 'disable', 'enable'): + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertTrue(ret, "Unexpected: Boolean value {}," + " to option {} shouldn't result in failure" + .format(value, key)) + g.log.info("Only Boolean values are accpeted by eager lock.") + + def tearDown(self): + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") diff --git a/tests/functional/disperse/test_ec_all_healtypes.py b/tests/functional/disperse/test_ec_all_healtypes.py new file mode 100644 index 000000000..f3210b6a7 --- /dev/null +++ b/tests/functional/disperse/test_ec_all_healtypes.py @@ -0,0 +1,285 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests FOps and Data Deletion on a healthy EC volume +""" +from random import sample +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs, collect_mounts_arequal +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list, + bring_bricks_offline) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import monitor_heal_completion + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcAllHealTypes(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_all_healtypes(self): + """ + Test steps: + - Create directory dir1 + - Create files inside dir1 + - Rename all file inside dir1 + - Create softlink and hardlink of files in mountpoint + - Create tiny, small, medium nd large file + - Get arequal of dir1 + - Create directory dir2 + - Creating files on dir2 + - Bring down other bricks to max redundancy + - Create directory dir3 + - Start pumping IO to dir3 + - Validating IO's on dir2 and waiting to complete + - Bring bricks online + - Wait for bricks to come online + - Check if bricks are online + - Monitor heal completion + - Get arequal of dir1 + - Compare arequal of dir1 + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + mountpoint = self.mounts[0].mountpoint + client = self.mounts[0].client_system + + # Creating dir1 + ret = mkdir(client, "%s/dir1" + % mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create files inside dir1 + cmd = ('touch %s/dir1/file{1..5};' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Rename all files inside dir1 + cmd = ('cd %s/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to rename files on " + "client") + g.log.info("Successfully renamed files on client") + + # Create softlink and hardlink of files in mountpoint + cmd = ('cd %s/dir1/; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' + 'done;' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME; cd ~;' + 'done;' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Get arequal of dir1 + ret, result_before_brick_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Creating dir2 + ret = mkdir(self.mounts[0].client_system, "%s/dir2" + % mountpoint) + self.assertTrue(ret, "Failed to create dir2") + g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) + + # Creating files on dir2 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir2" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Bring down other bricks to max redundancy + # Bringing bricks offline + bricks_to_offline = sample(bricks_list, 2) + ret = bring_bricks_offline(self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Bricks not offline') + g.log.info('Bricks are offline successfully') + + # Creating dir3 + ret = mkdir(self.mounts[0].client_system, "%s/dir3" + % mountpoint) + self.assertTrue(ret, "Failed to create dir2") + g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) + + # Start pumping IO to dir3 + cmd = ("cd %s/dir3; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writing on files while a brick is DOWN') + + appendcmd = ("cd %s/dir3; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=1 oflag=append conv=notrunc;done" % mountpoint) + + readcmd = ("cd %s/dir3; for i in `seq 1 100` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + + ret, _, err = g.run(self.mounts[0].client_system, appendcmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished append on files after redundant bricks offline') + + ret, _, err = g.run(self.mounts[0].client_system, readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished read on files after redundant bricks offline') + + # Validating IO's on dir2 and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all IO's") + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Bricks not brought online') + g.log.info('Bricks are online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Bricks are not online") + g.log.info("EXPECTED : Bricks are online") + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + g.log.info('Heal has completed successfully') + + # Get arequal of dir1 + ret, result_after_brick_up = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Comparing arequals of dir1 + self.assertEqual(result_before_brick_down, + result_after_brick_up, + 'Arequals are not equals before and after ' + 'bringing down redundant bricks') + g.log.info('Arequals are equals before before and after ' + 'bringing down redundant bricks') diff --git a/tests/functional/disperse/test_ec_brick_consumable_size.py b/tests/functional/disperse/test_ec_brick_consumable_size.py index e2cee80b1..c37dc834b 100644..100755 --- a/tests/functional/disperse/test_ec_brick_consumable_size.py +++ b/tests/functional/disperse/test_ec_brick_consumable_size.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,8 +21,10 @@ EcBrickConsumableSize: 'number of data bricks * least of brick size'. """ +from unittest import skip from glusto.core import Glusto as g -from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import (get_volume_info) from glustolibs.gluster.lib_utils import get_size_of_mountpoint @@ -41,68 +43,96 @@ class EcBrickConsumableSize(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - # Test Case - def test_disperse_vol_size(self): - # pylint: disable=too-many-locals - mnode = self.mnode - volname = self.volname - client = self.mounts[0].client_system - mountpoint = self.mounts[0].mountpoint - - # Obtain the volume size - vol_size = get_size_of_mountpoint(client, mountpoint) - self.assertIsNotNone(vol_size, ("Unable to get the volsize \ - of %s.", volname)) - - # Retrieve the minimum brick size + def _get_min_brick(self): + # Returns the brick with min size + bricks_list = get_all_bricks(self.mnode, self.volname) min_brick_size = -1 - bricks_list = get_all_bricks(mnode, volname) + min_size_brick = None for brick in bricks_list: brick_node, brick_path = brick.split(":") brick_size = get_size_of_mountpoint(brick_node, brick_path) if ((brick_size is not None) and (min_brick_size == -1) or (int(min_brick_size) > int(brick_size))): min_brick_size = brick_size + min_size_brick = brick + return min_size_brick, min_brick_size - # Calculate the consumable size - vol_info = get_volume_info(mnode, volname) + def _get_consumable_vol_size(self, min_brick_size): + # Calculates the consumable size of the volume created + vol_info = get_volume_info(self.mnode, self.volname) self.assertIsNotNone(vol_info, ("Unable to get the volinfo \ - of %s.", volname)) - - disp_data_bricks = (int(vol_info[volname]['disperseCount']) - - int(vol_info[volname]['redundancyCount'])) - dist_count = (int(vol_info[volname]['brickCount']) / - int(vol_info[volname]['disperseCount'])) + of %s.", self.volname)) + disp_data_bricks = (int(vol_info[self.volname]['disperseCount']) - + int(vol_info[self.volname]['redundancyCount'])) + dist_count = (int(vol_info[self.volname]['brickCount']) / + int(vol_info[self.volname]['disperseCount'])) consumable_size = ((int(min_brick_size) * int(disp_data_bricks)) * int(dist_count)) + return consumable_size, dist_count + + @skip('Skipping this test due to Bug 1883429') + def test_disperse_vol_size(self): + # pylint: disable=too-many-locals + client = self.mounts[0].client_system + mount_point = self.mounts[0].mountpoint + + # Obtain the volume size + vol_size = get_size_of_mountpoint(client, mount_point) + self.assertIsNotNone(vol_size, ("Unable to get the volsize " + "of %s.", self.volname)) + + # Retrieve the minimum brick size + min_size_brick, min_brick_size = self._get_min_brick() + + # Calculate the consumable size + consumable_size, dist_count = ( + self._get_consumable_vol_size(min_brick_size)) # Verify the volume size is in allowable range # Volume size should be above 98% of consumable size. delta = (100 - ((float(vol_size)/float(consumable_size)) * 100)) - self.assertTrue(delta < 2, ("Volume size is not in allowable range")) - + self.assertTrue(delta < 2, "Volume size is not in allowable range") g.log.info("Volume size is in allowable range") # Write to the available size block_size = 1024 - write_size = ((int(vol_size) * (0.95) * int(block_size)) / + write_size = ((int(vol_size) * 0.95 * int(block_size)) / (int(dist_count))) for i in range(1, int(dist_count)): - ret, _, _ = g.run(client, "fallocate -l {} {}/testfile{} \ - ".format(int(write_size), mountpoint, i)) - self.assertTrue(ret == 0, ("Writing file of available size failed \ - on volume %s", volname)) + ret, _, _ = g.run(client, "fallocate -l {} {}/testfile{} " + .format(int(write_size), mount_point, i)) + self.assertTrue(ret == 0, ("Writing file of available size " + "failed on volume %s", self.volname)) g.log.info("Successfully verified volume size") # Try writing more than the available size write_size = ((int(vol_size) * int(block_size)) * 1.2) - ret, _, _ = g.run(client, "fallocate -l {} {}/testfile1 \ - ".format(int(write_size), mountpoint)) - self.assertTrue(ret != 0, ("Writing file of more than available \ - size passed on volume %s", volname)) - + ret, _, _ = g.run(client, "fallocate -l {} {}/testfile1 " + .format(int(write_size), mount_point)) + self.assertTrue(ret != 0, ("Writing file of more than available " + "size passed on volume %s", self.volname)) g.log.info("Successfully verified brick consumable size") + # Cleanup the mounts to verify + cmd = ('rm -rf %s' % mount_point) + ret, _, _ = g.run(client, cmd) + if ret: + g.log.error("Failed to cleanup vol data on %s", mount_point) + # Bring down the smallest brick + ret = bring_bricks_offline(self.volname, min_size_brick) + self.assertTrue(ret, "Failed to bring down the smallest brick") + + # Find the volume size post brick down + post_vol_size = get_size_of_mountpoint(client, mount_point) + self.assertIsNotNone(post_vol_size, ("Unable to get the volsize " + "of %s.", self.volname)) + + # Vol size after bringing down the brick with smallest size should + # not be greater than the actual size + self.assertGreater(vol_size, post_vol_size, + ("The volume size after bringing down the volume " + "is greater than the initial")) + # Method to cleanup test setup def tearDown(self): # Stopping the volume diff --git a/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py b/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py new file mode 100755 index 000000000..dd5f3b6da --- /dev/null +++ b/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py @@ -0,0 +1,135 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import time +import itertools +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class EcVerifyLock(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + cls.script = "/usr/share/glustolibs/io/scripts/file_lock.py" + if not upload_scripts(cls.clients, [cls.script]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Setup Volume and Mount Volume + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def test_verify_lock_granted_from_2_clients(self): + """ + - Create disperse volume and mount it to 2 clients` + - Create file from 1 client on mount point + - Take lock from client 1 => Lock is acquired + - Try taking lock from client 2=> Lock is blocked (as already + being taken by client 1) + - Release lock from client1=> Lock is released + - Take lock from client2 + - Again try taking lock from client 1 + - verify test with once, by disabling eagerlock and other eager lock + and once by leaving eager and other eagerlock enabled(by default) + """ + mpoint = self.mounts[0].mountpoint + + # Create a file on client 1 + cmd = "touch {}/test_file".format(mpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on client 1") + + # Verifying OCL as ON + option = "optimistic-change-log" + option_dict = get_volume_options(self.mnode, self.volname, option) + self.assertIsNotNone(option_dict, ("Failed to get %s volume option" + " for volume %s" + % (option, self.volname))) + self.assertEqual(option_dict['disperse.optimistic-change-log'], 'on', + ("%s is not ON for volume %s" % (option, + self.volname))) + g.log.info("Succesfully verified %s value for volume %s", + option, self.volname) + + # Repeat the test with eager-lock and other-eager-lock 'on' & 'off' + for lock_status in ('on', 'off'): + options = {'disperse.eager-lock': lock_status, + 'disperse.other-eager-lock': lock_status} + ret = set_volume_options(self.mnode, self.volname, options) + + self.assertTrue(ret, ("failed to set eagerlock and other " + "eagerlock value as %s " % lock_status)) + g.log.info("Successfully set eagerlock and other eagerlock value" + " to %s", lock_status) + + # Repeat the test for both the combinations of clients + for client_1, client_2 in list(itertools.permutations( + [self.mounts[0].client_system, + self.mounts[1].client_system], r=2)): + # Get lock to file from one client + lock_cmd = ("/usr/bin/env python {} -f {}/" + "test_file -t 30".format(self.script, mpoint)) + proc = g.run_async(client_1, lock_cmd) + time.sleep(5) + + # As the lock is been acquired by one client, + # try to get lock from the other + ret, _, _ = g.run(client_2, lock_cmd) + self.assertEqual(ret, 1, ("Unexpected: {} acquired the lock " + "before been released by {}" + .format(client_2, client_1))) + g.log.info("Expected : Lock can't be acquired by %s before " + "being released by %s", client_2, client_1) + + # Wait for first client to release the lock. + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, ("File lock process failed on %s:%s", + client_1, mpoint)) + + # Try taking the lock from other client and releasing it + lock_cmd = ("/usr/bin/env python {} -f " + "{}/test_file -t 1".format(self.script, mpoint)) + ret, _, _ = g.run(client_2, lock_cmd) + self.assertEqual(ret, 0, + ("Unexpected:{} Can't acquire the lock even " + "after its been released by {}" + .format(client_2, client_1))) + g.log.info("Successful, Lock acquired by %s after being " + "released by %s", client_2, client_1) + + def tearDown(self): + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup " + "Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/disperse/test_ec_data_delete.py b/tests/functional/disperse/test_ec_data_delete.py new file mode 100644 index 000000000..662a94b57 --- /dev/null +++ b/tests/functional/disperse/test_ec_data_delete.py @@ -0,0 +1,270 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests FOps and Data Deletion on a healthy EC volume +""" + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcDataDelete(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_data_delete(self): + """ + Test steps: + - Create directory dir1 + - Create 5 dir and 5 files in each dir in directory 1 + - Rename all file inside dir1 + - Truncate at any dir in mountpoint inside dir1 + - Create softlink and hardlink of files in mountpoint + - Delete op for deleting all file in one of the dirs + - chmod, chown, chgrp inside dir1 + - Create tiny, small, medium nd large file + - Creating files on client side for dir1 + - Validating IO's and waiting to complete + - Deleting dir1 + - Check .glusterfs/indices/xattrop is empty + - Check if brickpath is empty + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start + 1))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + start += 5 + + # chmod, chown, chgrp inside dir1 + # start and end used as offset to access diff files + # at diff clients. + start, end = 2, 5 + for mount_obj in self.mounts: + dir_file_range = '%s..%s' % (str(start), str(end)) + cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing mode of files has failed") + g.log.info("Mode of files have been changed successfully") + + cmd = ('chown root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing owner of files has failed") + g.log.info("Owner of files have been changed successfully") + + cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing group of files has failed") + g.log.info("Group of files have been changed successfully") + start += 5 + end += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all IO's") + + # Deleting dir1 + cmd = ('rm -rf -v %s/dir1' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete directory1") + g.log.info("Directory 1 deleted successfully for %s", self.mounts[0]) + + # Check .glusterfs/indices/xattrop is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s/.glusterfs/indices/xattrop/ | " + "grep -ve \"xattrop-\" | wc -l" % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), ".glusterfs/indices/" + "xattrop is not empty") + g.log.info("No pending heals on %s", brick) + + # Check if brickpath is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} is not empty " + "in node {}".format(brick_path, brick_node)) + g.log.info("Brick path is empty in node %s", brick_node) diff --git a/tests/functional/disperse/test_ec_data_intergrity.py b/tests/functional/disperse/test_ec_data_intergrity.py new file mode 100644 index 000000000..5241e8d80 --- /dev/null +++ b/tests/functional/disperse/test_ec_data_intergrity.py @@ -0,0 +1,314 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests Data Consistency and Intergrity +""" +from random import sample +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs, collect_mounts_arequal +from glustolibs.gluster.brick_libs import (are_bricks_offline, + bring_bricks_offline, + bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.volume_libs import get_subvols + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcDataIntegrity(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def _bring_redundant_bricks_offline(self, mnode, volname): + """ + Bring redundant bricks offline + """ + brickset_to_offline = [] + # List two bricks in each subvol + all_subvols_dict = get_subvols(mnode, volname) + subvols = all_subvols_dict['volume_subvols'] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brickset_to_offline.extend(sample(subvol, 2)) + + # Bring two bricks of each subvol down + ret = bring_bricks_offline(volname, brickset_to_offline) + self.assertTrue(ret, "Bricks are still online") + + # Validating the bricks are offline + ret = are_bricks_offline(mnode, volname, + brickset_to_offline) + self.assertTrue(ret, "Few of the bricks are still online in" + " {} ".format(brickset_to_offline)) + return brickset_to_offline + + def test_ec_data_integrity(self): + """ + Test steps: + - Create directory dir1 + - Create 5 dir and 5 files in each dir in directory 1 + - Rename all file inside dir1 + - Truncate at any dir in mountpoint inside dir1 + - Create softlink and hardlink of files in mountpoint + - chmod, chown, chgrp inside dir1 + - Create tiny, small, medium nd large file + - Creating files on client side for dir1 + - Validating IO's and waiting to complete + - Get arequal of dir1 + - Bring redundant bricks offline + - Get arequal of dir1 after 1st set of bricks down + - Bring redundant bricks offline + - Get arequal of dir1 after 2nd set of bricks down + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + brickset_to_offline = [] + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint + start = 1 + for mount_obj in self.mounts: + for link_type, ln_mode in (('softlink', 'ln -s'), + ('hardlink', 'ln')): + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do %s $FILENAME %s_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start), ln_mode, + link_type)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating %s have failed" % link_type) + g.log.info("%s of files created successfully", link_type) + start += 5 + + # chmod, chown, chgrp inside dir1 + # start and end used as offset to access diff files + # at diff clients. + start, end = 2, 5 + for mount_obj in self.mounts: + dir_file_range = '%s..%s' % (str(start), str(end)) + cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing mode of files has failed") + g.log.info("Mode of files have been changed successfully") + + cmd = ('chown root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing owner of files has failed") + g.log.info("Owner of files have been changed successfully") + + cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing group of files has failed") + g.log.info("Group of files have been changed successfully") + start += 5 + end += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + for size, filename in (('100', 'tiny_file'), ('20M', 'small_file'), + ('200M', 'medium_file'), + ('1G', 'large_file')): + cmd = 'fallocate -l {} {}{}.txt'.format(size, filename, offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for files failed") + g.log.info("Fallocate for files successfully") + offset += 1 + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all IO's") + + # Get arequal of dir1 + ret, result_before_bricks_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Bring redundant bricks offline + brickset_to_offline = self._bring_redundant_bricks_offline( + self.mnode, self.volname) + + # Get arequal of dir1 after 1st set of bricks down + ret, result_after_1st_brickset_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + brickset_to_offline) + self.assertTrue(ret, 'Bricks not brought online') + g.log.info('Bricks are online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Bricks are not online") + g.log.info("EXPECTED : Bricks are online") + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Bring redundant bricks offline + brickset_to_offline = self._bring_redundant_bricks_offline( + self.mnode, self.volname) + + # Get arequal of dir1 after 2nd set of bricks down + ret, result_after_2nd_brickset_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Comparing arequals + self.assertEqual(result_before_bricks_down, + result_after_1st_brickset_down, + 'Arequals are not equals before brickset ' + 'down and after 1st brickset down') + g.log.info('Arequals are equals before brickset down ' + 'and after brickset down') + + self.assertEqual(result_after_2nd_brickset_down, + result_after_1st_brickset_down, + 'Arequals are not equals before 2nd set ' + 'brick down and after 1st set brick down') + g.log.info('Arequals are equals for 2nd brickset down ' + 'and 1st brickset down') diff --git a/tests/functional/disperse/test_ec_eager_lock_functional_validation.py b/tests/functional/disperse/test_ec_eager_lock_functional_validation.py new file mode 100644 index 000000000..b4fb4c9d9 --- /dev/null +++ b/tests/functional/disperse/test_ec_eager_lock_functional_validation.py @@ -0,0 +1,161 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Verify Eager lock reduces the number of locks + being taken when writing to the file continuosly +""" +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.profile_ops import (profile_start, profile_stop) +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.lib_utils import get_extended_attributes_info + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class EagerlockFunctionalValidationTest(GlusterBaseClass): + # Method to setup the environment for test case + + def setUp(self): + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=True) + if not ret: + raise ExecutionError("Failed to setup and mount volume") + + def _check_dirty_xattr(self, filename): + """Get trusted.ec.dirty xattr value to validate eagerlock behavior""" + # Find the hashed subvol of the file created + # for distributed disperse case + subvols_info = get_subvols(self.mnode, self.volname) + subvols_info = subvols_info['volume_subvols'] + if len(subvols_info) > 1: + _, hashed_subvol = find_hashed_subvol(subvols_info, + '', filename) + if hashed_subvol is None: + g.log.error("Error in finding hash value of %s", filename) + return None + else: + hashed_subvol = 0 + + # Collect ec.dirty xattr value from each brick + result = [] + for subvol in subvols_info[hashed_subvol]: + host, brickpath = subvol.split(':') + brickpath = brickpath + '/' + filename + ret = get_extended_attributes_info(host, [brickpath], + encoding='hex', + attr_name='trusted.ec.dirty') + ret = ret[brickpath]['trusted.ec.dirty'] + result.append(ret) + + # Check if xattr values are same across all bricks + if result.count(result[0]) == len(result): + return ret + g.log.error("trusted.ec.dirty value is not consistent across the " + "disperse set %s", result) + return None + + def _file_create_and_profile_info(self, status): + """Create a file and check the volume profile for inode lock count.""" + # Creating file + mountpoint = self.mounts[0].mountpoint + client = self.mounts[0].client_system + + filename = 'f1_EagerLock_' + status + cmd = ("dd if=/dev/urandom of=%s/%s bs=100M count=10" + % (mountpoint, filename)) + + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Getting and checking output of profile info. + cmd = "gluster volume profile %s info | grep -i INODELK" % self.volname + ret, rout, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to grep INODELK count from profile " + "info") + g.log.info("The lock counts on all bricks with eager-lock %s: %s", + status, rout) + + return filename + + def test_validate_profile_for_inodelk(self): + """ + Test Steps: + 1) Create an ecvolume and mount it + 2) Set the eagerlock option + 3) Create a 1GB file + 4) View the profile of the volume for INODELK count must be about + 2-10 locks for each brick. + 5) check backend bricks for trusted.ec.dirty xattr must be non-zero + 6) Disable the eagerlock option + 7) Repeat steps 3-5 and now dirty xattr must be zero and + INODELK count in range of 100-5k. + """ + + # Enable EagerLock + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock': 'on', + 'disperse.eager-lock-timeout': '10'}) + self.assertTrue(ret, "Failed to turn on eagerlock" + "on %s" % self.volname) + + # Start profile on volume. + ret, _, _ = profile_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start profile on volume: %s" + % self.volname) + + # Test behavior with EagerLock on + filename = self._file_create_and_profile_info("on") + self.assertIsNotNone(filename, "Failed to get filename") + + # Test dirty bit with EagerLock on + ret = self._check_dirty_xattr(filename) + self.assertEqual(ret, '0x00000000000000010000000000000001', + "Unexpected dirty xattr value is %s on %s" + % (ret, filename)) + + # Disable EagerLock + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock': 'off'}) + self.assertTrue(ret, "Failed to turn off eagerlock " + "on %s" % self.volname) + + # Test behavior with EagerLock off + filename = self._file_create_and_profile_info("off") + self.assertIsNotNone(filename, "Failed to get filename") + + # Test dirty bit with EagerLock off + ret = self._check_dirty_xattr(filename) + self.assertEqual(ret, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (ret, filename)) + + # Stop profile on volume. + ret, _, _ = profile_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop profile on volume: %s" + % self.volname) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") diff --git a/tests/functional/disperse/test_ec_eagerlock.py b/tests/functional/disperse/test_ec_eagerlock.py new file mode 100644 index 000000000..3da2d67b7 --- /dev/null +++ b/tests/functional/disperse/test_ec_eagerlock.py @@ -0,0 +1,264 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Verify Eagerlock and other-eagerlock behavior +""" +from unittest import SkipTest +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.glusterdir import rmdir +from glustolibs.gluster.lib_utils import (append_string_to_file, + get_extended_attributes_info) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) +from glustolibs.misc.misc_libs import (yum_install_packages, + upload_scripts) + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class DisperseEagerlockTest(GlusterBaseClass): + # Method to setup the environment for test case + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + # Check for availability of atleast 4 clients + if len(cls.clients) < 4: + raise SkipTest("This test requires atleast 4 clients") + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients {}" + .format(cls.clients)) + # Install time package on all clients needed for measurement of ls + + ret = yum_install_packages(cls.clients, 'time') + if not ret: + raise ExecutionError("Failed to install TIME package on all nodes") + + def setUp(self): + """ + setUp method + """ + # Setup_Volume + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to setup and mount volume") + g.log.info("Volume %s has been setup successfully", self.volname) + + def _filecreate_and_hashcheck(self, timeoutval): + """Create a file and check on which subvol it is hashed to""" + # Create and write to a file to test the eagerlock timeout behavior + objectname = 'EagerLockTimeoutCheck-file-' + timeoutval + objectpath = ("{}/{}".format(self.mounts[0].mountpoint, objectname)) + ret = append_string_to_file(self.mounts[0].client_system, + objectpath, 'EagerLockTest') + self.assertTrue(ret, 'create and append of %s failed' % objectname) + ret = get_subvols(self.mnode, self.volname) + # Find the hashed subvol of the file created + if len(ret['volume_subvols']) > 1: + _, hashed_subvol = find_hashed_subvol(ret['volume_subvols'], + '', objectname) + if hashed_subvol is None: + g.log.error("Error in finding hash value of %s", objectname) + return None + return (objectname, ret['volume_subvols'], hashed_subvol) + # Set subvol to 0 for plain(non-distributed) disperse volume + hashed_subvol = 0 + return (objectname, ret['volume_subvols'], hashed_subvol) + + @staticmethod + def _get_dirty_xattr_value(ret, hashed_subvol, objectname): + """Get trusted.ec.dirty xattr value to validate eagerlock behavior""" + # Collect ec.dirty xattr value from each brick + hashvals = [] + for subvol in ret[hashed_subvol]: + host, brickpath = subvol.split(':') + brickpath = brickpath + '/' + objectname + ret = get_extended_attributes_info(host, [brickpath], + encoding='hex', + attr_name='trusted.ec.dirty') + ret = ret[brickpath]['trusted.ec.dirty'] + hashvals.append(ret) + # Check if xattr values are same across all bricks + if hashvals.count(hashvals[0]) == len(hashvals): + del hashvals + return ret + g.log.error("trusted.ec.dirty value is not consistent across the " + "disperse set %s", hashvals) + return None + + def _change_eagerlock_timeouts(self, timeoutval): + """Change eagerlock and other-eagerlock timeout values as per input""" + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock-timeout': timeoutval, + 'disperse.other-eager-lock-timeout': + timeoutval}) + self.assertTrue(ret, 'failed to change eager-lock timeout values to ' + '%s sec on %s' % (timeoutval, self.volname)) + g.log.info("SUCCESS:Changed eager-lock timeout vals to %s sec on %s", + timeoutval, self.volname) + + def _file_dir_create(self, clients, mountpoint): + """Create Directories and files which will be used for + checking response time of lookups""" + client = choice(clients) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num 0 " + "--dir-depth 2 " + "--dir-length 4 " + "--max-num-of-dirs 4 " + "--num-of-files 100 %s" % (self.script_upload_path, mountpoint)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "FAILED to create data needed for lookups") + + @staticmethod + def _lookup_response_time(clients, mountpoint): + """ Check lookup response time which should be around 2-3 sec """ + # Sleeping to allow some cache timeout + sleep(60) + cmd = '/usr/bin/time -f "%e" ls -lRt ' + mountpoint + ' >>/dev/null' + results = g.run_parallel(clients, cmd) + # Checking the actual time taken for lookup + for ret_values in results.values(): + _, _, ret = ret_values + calc = float(ret.strip()) + if calc > 2: + g.log.error("lookups taking more than 2 seconds." + " Actual time: %s", calc) + + def _rmdir_on_mountpoint(self, clients, mountpoint): + """ Perform rm of created files as part of Sanity Check """ + # Skipping below lines of code as running rm -rf parallely + # from multiple clients is a known bug Refer BZ-1787328 + # cmd = 'rm -rf ' + mountpoint + # results = g.run_parallel(clients, cmd) + # for client, ret_values in results.items(): + # ret, out, err = ret_values + # self.assertEqual(ret, 0, "rm -rf failed on %s with %s" + # % (client, err)) + ret = rmdir(choice(clients), mountpoint + '/*', force=True) + self.assertTrue(ret, "rm -rf failed") + ret, out, err = g.run(choice(clients), 'ls ' + mountpoint) + self.assertEqual((ret, out, err), (0, '', ''), + "Some entries still exist even after rm -rf ;" + " the entries are %s and error msg is %s" + % (out, err)) + g.log.info("rm -rf was successful") + + def test_eagerlock(self): + """ + Test Steps: + 1) Create an ecvolume + 2) Test EagerLock and Other-EagerLock default values and timeout-values + 3) Set the timeout values to 60 + 4) Write to a file and check backend brick for + "trusted.ec.dirty=0x00000000000000000000000000000000", must be non-zero + 5) Create some dirs and files in each dir + 6) Do ls -lRt * --> must not take more than 2-3sec + 7) disable eager lock + 8) retest write to a file and this time lock must be released + immediately with dirty.xattr value all zeros + """ + # Get list of clients + clients = [] + for mount_obj in self.mounts: + clients.append(mount_obj.client_system) + mountpoint = mount_obj.mountpoint + + # Check if EC Eagerlock set of options enabled with correct values + ret = get_volume_options(self.mnode, self.volname) + self.assertTrue(bool((ret['disperse.eager-lock'] == + ret['disperse.other-eager-lock'] == 'on') and + (ret['disperse.eager-lock-timeout'] == + ret['disperse.other-eager-lock-timeout'] == + '1')), + 'Some EC-eagerlock options set are not correct') + # Test behavior with default timeout value of 1sec + objectname, ret, hashed_subvol = self._filecreate_and_hashcheck('1sec') + sleep(2) + ret = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(ret, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (ret, objectname)) + self._file_dir_create(clients, mountpoint) + # Now test the performance issue wrt lookups + self._lookup_response_time(clients, mountpoint) + # Do rm -rf of created data as sanity test + self._rmdir_on_mountpoint(clients, mountpoint) + + # Increasing timeout values to 60sec in order to test the functionality + self._change_eagerlock_timeouts('60') + self._file_dir_create(clients, mountpoint) + objectname, ret, hashed_subvol =\ + self._filecreate_and_hashcheck('60seconds') + # Check in all the bricks "trusted.ec.dirty" value + # It should be "0x00000000000000010000000000000001" + _ = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(_, '0x00000000000000010000000000000001', + "Unexpected dirty xattr value %s on %s" + % (_, objectname)) + # Sleep 60sec after which dirty_val should reset to "0x00000..." + sleep(62) + _ = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(_, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (_, objectname)) + # Test the performance issue wrt lookups + self._lookup_response_time(clients, mountpoint) + # Do rm -rf of created data as sanity test + self._rmdir_on_mountpoint(clients, mountpoint) + + # Disable EagerLock and other-Eagerlock + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock': 'off', + 'disperse.other-eager-lock': 'off'}) + self.assertTrue(ret, "failed to turn off eagerlock and " + "other eagerlock on %s" % self.volname) + g.log.info("SUCCESS: Turned off eagerlock and other-eagerlock on %s", + self.volname) + # Again create same dataset and retest ls -lRt, shouldnt take much time + self._file_dir_create(clients, mountpoint) + # Create a new file see the dirty flag getting unset immediately + objectname, ret, hashed_subvol = self._filecreate_and_hashcheck( + 'Eagerlock_Off') + # Check in all the bricks "trusted.ec.dirty value" + # It should be "0x00000000000000000000000000000000" + ret = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(ret, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (ret, objectname)) + # Test the performance issue wrt ls + self._lookup_response_time(clients, mountpoint) + # Cleanup created data as sanity test + self._rmdir_on_mountpoint(clients, mountpoint) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") diff --git a/tests/functional/disperse/test_ec_file_rename_on_brick_down.py b/tests/functional/disperse/test_ec_file_rename_on_brick_down.py new file mode 100644 index 000000000..be82ceeed --- /dev/null +++ b/tests/functional/disperse/test_ec_file_rename_on_brick_down.py @@ -0,0 +1,221 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +from random import choice +from time import sleep + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks, bring_bricks_offline +from glustolibs.gluster.volume_libs import volume_start +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestECRenameFilesOnBrickDown(GlusterBaseClass): + + # pylint: disable=too-many-statements,too-many-locals + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Remove on fixing BZ 1596165 + if 'dispersed' in self.volname: + self.skipTest("Test will fail due to BZ 1596165") + + # Setup and mount volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Unmount and cleanup volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + + self.get_super_method(self, 'tearDown')() + + def create_links(self, client, path): + + # Soft links + for i in range(4, 7): + ret = create_link_file(client, + '{}/file{}_or'.format(path, i), + '{}/file{}_sl'.format(path, i), soft=True) + self.assertTrue(ret, "Fail: Not able to create soft link for " + "{}/file{}_or".format(path, i)) + g.log.info("Created soft links for files successfully") + + # Hard links + for i in range(7, 10): + ret = create_link_file(client, + '{}/file{}_or'.format(path, i), + '{}/file{}_hl'.format(path, i),) + self.assertTrue(ret, "Fail: Not able to create hard link for " + "{}/file{}_or".format(path, i)) + g.log.info("Created hard links for files successfully") + + def test_ec_rename_files_with_brick_down(self): + """ + Description: Test to check no errors on file/dir renames when one of + the bricks is down in the volume. + Steps: + 1. Create an EC volume + 2. Mount the volume using FUSE on two different clients + 3. Create ~9 files from one of the client + 4. Create ~9 dir with ~9 files each from another client + 5. Create soft-links, hard-links for file{4..6}, file{7..9} + 6. Create soft-links for dir{4..6} + 7. Begin renaming the files, in multiple iterations + 8. Bring down a brick while renaming the files + 9. Bring the brick online after renaming some of the files + 10. Wait for renaming of the files + 11. Validate no data loss and files are renamed successfully + """ + + # Creating ~9 files from client 1 on mount + m_point = self.mounts[0].mountpoint + cmd = 'cd %s; touch file{1..9}_or' % m_point + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Fail: Not able to create files on " + "{}".format(m_point)) + g.log.info("Files created successfully on mount point") + + # Creating 9 dir X 9 files in each dir from client 2 + cmd = ('cd %s; mkdir -p dir{1..9}_or; touch ' + 'dir{1..9}_or/file{1..9}_or' % m_point) + ret, _, _ = g.run(self.clients[1], cmd) + self.assertEqual(ret, 0, "Fail: Not able to create dir with files on " + "{}".format(m_point)) + g.log.info("Dirs with files are created successfully on mount point") + + # Create required soft links and hard links from client 1 on mount + client, path = self.clients[0], m_point + self.create_links(client, path) + + client = self.clients[1] + for i in range(1, 10): + + # Create required soft and hard links in nested dirs + path = '{}/dir{}_or'.format(m_point, i) + self.create_links(client, path) + + # Create soft links for dirs + path = m_point + for i in range(4, 7): + ret = create_link_file(client, + '{}/dir{}_or'.format(path, i), + '{}/dir{}_sl'.format(path, i), soft=True) + self.assertTrue(ret, "Fail: Not able to create soft link for " + "{}/dir{}_or".format(path, i)) + g.log.info("Created nested soft and hard links for files successfully") + + # Calculate all file count against each section orginal, hard, soft + # links + cmd = ('cd %s; arr=(or sl hl); ' + 'for i in ${arr[*]}; do find . -name "*$i" | wc -l ; ' + 'done; ' % m_point) + ret, out, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Not able get list of soft and hard links " + "created on the mount point") + all_org, all_soft, all_hard = out.split() + + # Rename 2 out of 3 dir's soft links from client 1 + client = self.clients[0] + cmd = ('cd %s; sl=0; ' + 'for line in `ls -R | grep -P "dir(4|5)_sl"`; ' + 'do mv -f "$line" "$line""_renamed"; ((sl++)); done; ' + 'echo $sl;' % m_point) + ret, out, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Not able to rename directory soft links") + temp_soft = out.strip() + + # Start renaming original files from client 1 and + # softlinks, hardlinks from client 2 + cmd = ('cd %s; arr=(. dir{1..9}_or); or=0; ' + 'for item in ${arr[*]}; do ' + 'cd $item; ' + 'for line in `ls | grep -P "file(1|2)_or"`; ' + 'do mv -f "$line" "$line""_renamed"; ((or++)); sleep 2; done;' + 'cd - > /dev/null; sleep 1; done; echo $or ' % m_point) + proc_or = g.run_async(client, cmd) + + client = self.clients[1] + cmd = ('cd %s; arr=(. dir{1..9}_or); sl=0; hl=0; ' + 'for item in ${arr[*]}; do ' + 'cd $item; ' + 'for line in `ls | grep -P "file(4|5)_sl"`; ' + 'do mv -f "$line" "$line""_renamed"; ((sl++)); sleep 1; done; ' + 'for line in `ls | grep -P "file(7|8)_hl"`; ' + 'do mv -f "$line" "$line""_renamed"; ((hl++)); sleep 1; done; ' + 'cd - > /dev/null; sleep 1; done; echo $sl $hl; ' % m_point) + proc_sl_hl = g.run_async(client, cmd) + + # Wait for some files to be renamed + sleep(20) + + # Kill one of the bricks + brick_list = get_all_bricks(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, choice(brick_list)) + self.assertTrue(ret, "Failed to bring one of the bricks offline") + + # Wait for some more files to be renamed + sleep(20) + + # Bring brick online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Not able to start Volume with force option") + + # Wait for rename to complete and take count of file operations + ret, out, _ = proc_or.async_communicate() + self.assertEqual(ret, 0, "Fail: Origianl files are not renamed") + ren_org = out.strip() + + ret, out, _ = proc_sl_hl.async_communicate() + self.assertEqual(ret, 0, "Fail: Soft and Hard links are not renamed") + ren_soft, ren_hard = out.strip().split() + ren_soft = str(int(ren_soft) + int(temp_soft)) + + # Count actual data of renaming links/files + cmd = ('cd %s; arr=(or or_renamed sl sl_renamed hl hl_renamed); ' + 'for i in ${arr[*]}; do find . -name "*$i" | wc -l ; ' + 'done; ' % m_point) + ret, out, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Not able to get count of original and link " + "files after brick was brought up") + (act_org, act_org_ren, act_soft, + act_soft_ren, act_hard, act_hard_ren) = out.split() + + # Validate count of expected and actual rename of + # links/files is matching + for exp, act, msg in ((ren_org, act_org_ren, 'original'), + (ren_soft, act_soft_ren, 'soft links'), + (ren_hard, act_hard_ren, 'hard links')): + self.assertEqual(exp, act, "Count of {} files renamed while brick " + "was offline is not matching".format(msg)) + + # Validate no data is lost in rename process + for exp, act, msg in ( + (int(all_org)-int(act_org_ren), int(act_org), 'original'), + (int(all_soft)-int(act_soft_ren), int(act_soft), 'soft links'), + (int(all_hard)-int(act_hard_ren), int(act_hard), 'hard links'), + ): + self.assertEqual(exp, act, "Count of {} files which are not " + "renamed while brick was offline " + "is not matching".format(msg)) diff --git a/tests/functional/disperse/test_ec_heal_on_file_appends.py b/tests/functional/disperse/test_ec_heal_on_file_appends.py new file mode 100644 index 000000000..b39e6dc0f --- /dev/null +++ b/tests/functional/disperse/test_ec_heal_on_file_appends.py @@ -0,0 +1,186 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +from time import sleep + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + validate_xattr_on_all_bricks, + get_online_bricks_list) +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.misc.misc_libs import kill_process + + +@runs_on([['dispersed'], ['glusterfs']]) +class TestHealOnFileAppends(GlusterBaseClass): + """ + Test to verify heal on dispersed volume on file appends + """ + + def setUp(self): + + self.get_super_method(self, 'setUp')() + self.mount_obj = self.mounts[0] + self.client = self.mount_obj.client_system + + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=[self.mount_obj]) + if not ret: + raise ExecutionError("Failed to create and mount volume") + g.log.info("Created and Mounted volume successfully") + + self.offline_bricks = [] + self.is_io_started = False + self.file_name = 'test_file' + + def tearDown(self): + + # Kill the IO on client + if self.is_io_started: + ret = kill_process(self.client, process_names=[self.file_name]) + if not ret: + raise ExecutionError("Not able to kill/stop IO in client") + g.log.info('Successfully stopped IO in client') + + if self.offline_bricks: + ret = bring_bricks_online(self.mnode, self.volname, + self.offline_bricks) + if not ret: + raise ExecutionError(ret, 'Not able to bring bricks {} ' + 'online'.format(self.offline_bricks)) + + # Cleanup and unmount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mount_obj]) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + g.log.info("Unmount and Cleanup of volume is successful") + + self.get_super_method(self, 'tearDown')() + + def test_heal_on_file_appends(self): + """ + Test steps: + - create and mount EC volume 4+2 + - start append to a file from client + - bring down one of the bricks (say b1) + - wait for ~minute and bring down another brick (say b2) + - after ~minute bring up first brick (b1) + - check the xattrs 'ec.size', 'ec.version' + - xattrs of online bricks should be same as an indication to heal + """ + + # Get bricks list + bricks_list = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(bricks_list, 'Not able to get bricks list') + + # Creating a file, generate and append data to the file + self.file_name = 'test_file' + cmd = ("cd %s ;" + "while true; do " + "cat /dev/urandom | tr -dc [:space:][:print:] " + "| head -c 4K >> %s; sleep 2; " + "done;" + % (self.mount_obj.mountpoint, self.file_name)) + ret = g.run_async(self.client, cmd, + user=self.mount_obj.user) + self.assertIsNotNone(ret, "Not able to start IO on client") + g.log.info('Started generating and appending data to the file') + self.is_io_started = True + + # Select 3 bricks, 2 need to be offline and 1 will be healthy + brick_1, brick_2, brick_3 = sample(bricks_list, 3) + + # Wait for IO to fill the bricks + sleep(30) + + # Bring first brick offline and validate + ret = bring_bricks_offline(self.volname, [brick_1]) + self.assertTrue( + ret, 'Failed to bring brick {} offline'.format(brick_1)) + ret = are_bricks_offline(self.mnode, self.volname, [brick_1]) + self.assertTrue(ret, 'Not able to validate brick {} being ' + 'offline'.format(brick_1)) + g.log.info("Brick %s is brought offline successfully", brick_1) + self.offline_bricks.append(brick_1) + + # Wait for IO to fill the bricks + sleep(30) + + # Bring second brick offline and validate + ret = bring_bricks_offline(self.volname, [brick_2]) + self.assertTrue( + ret, 'Failed to bring brick {} offline'.format(brick_2)) + ret = are_bricks_offline(self.mnode, self.volname, [brick_2]) + self.assertTrue(ret, 'Not able to validate brick {} being ' + 'offline'.format(brick_2)) + g.log.info("Brick %s is brought offline successfully", brick_2) + self.offline_bricks.append(brick_2) + + # Wait for IO to fill the bricks + sleep(30) + + # Bring first brick online and validate peer status + ret = bring_bricks_online( + self.mnode, + self.volname, + [brick_1], + bring_bricks_online_methods=['glusterd_restart']) + self.assertTrue(ret, 'Not able to bring brick {} ' + 'online'.format(brick_1)) + g.log.info("Offlined brick %s is brought online successfully", brick_1) + ret = self.validate_peers_are_connected() + self.assertTrue(ret, "Peers are not in connected state after bringing " + "an offline brick to online via `glusterd restart`") + g.log.info("Successfully validated peers are in connected state") + + # To catchup onlined brick with healthy bricks + sleep(30) + + # Validate the xattr to be same on onlined and healthy bric + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, 'Unable to fetch online bricks') + g.log.info('All online bricks are fetched successfully') + for xattr in ('trusted.ec.size', 'trusted.ec.version'): + ret = validate_xattr_on_all_bricks( + [brick_1, brick_3], self.file_name, xattr) + self.assertTrue(ret, "{} is not same on all online " + "bricks".format(xattr)) + + # Get epoch time on the client + ret, prev_ctime, _ = g.run(self.client, 'date +%s') + self.assertEqual(ret, 0, 'Not able to get epoch time from client') + + # Headroom for file ctime to get updated + sleep(5) + + # Validate file was being apended while checking for xattrs + ret = get_file_stat( + self.client, + '{}/{}'.format(self.mount_obj.mountpoint, self.file_name)) + self.assertIsNotNone(ret, "Not able to get stats of the file") + curr_ctime = ret['epoch_ctime'] + self.assertGreater(int(curr_ctime), int(prev_ctime), "Not able " + "to validate data is appended to the file " + "while checking for xaatrs") + + g.log.info("Data on all online bricks is healed and consistent") diff --git a/tests/functional/disperse/test_ec_io_continuity.py b/tests/functional/disperse/test_ec_io_continuity.py new file mode 100644 index 000000000..2a1510ce0 --- /dev/null +++ b/tests/functional/disperse/test_ec_io_continuity.py @@ -0,0 +1,215 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from datetime import datetime, timedelta +from time import sleep + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import bring_bricks_offline +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import create_link_file +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.heal_ops import heal_info +from glustolibs.gluster.volume_libs import get_subvols, volume_start +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs', 'nfs']]) +class TestIOsOnECVolume(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + cls.script_path = '/usr/share/glustolibs/io/scripts' + for file_ops in ('file_dir_ops.py', 'fd_writes.py'): + ret = upload_scripts(cls.clients, + '{}/{}'.format(cls.script_path, file_ops)) + if not ret: + raise ExecutionError('Failed to upload IO scripts to client') + + def setUp(self): + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + self.all_mounts_procs = [] + if not ret: + raise ExecutionError('Failed to setup and mount volume') + + def tearDown(self): + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[1]] * + len(self.all_mounts_procs)) + if not ret: + raise ExecutionError('Wait for IO completion failed on some ' + 'of the clients') + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Not able to unmount and cleanup volume") + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_online_and_monitor_heal(self, bricks): + """Bring the bricks online and monitor heal until completion""" + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, 'Not able to force start volume') + ret = monitor_heal_completion(self.mnode, + self.volname, + bricks=list(bricks)) + self.assertTrue(ret, 'Heal is not complete for {}'.format(bricks)) + + # pylint: disable=too-many-locals + def test_io_with_cyclic_brick_down(self): + """ + Description: To check heal process on EC volume when brick is brought + down in a cyclic fashion + Steps: + - Create, start and mount an EC volume in two clients + - Create multiple files and directories including all file types on one + directory from client 1 + - Take arequal check sum of above data + - Create another folder and pump different fops from client 2 + - Fail and bring up redundant bricks in a cyclic fashion in all of the + subvols maintaining a minimum delay between each operation + - In every cycle create new dir when brick is down and wait for heal + - Validate heal info on volume when brick down erroring out instantly + - Validate arequal on brining the brick offline + """ + + # Create a directory structure on mount from client 1 + mount_obj = self.mounts[0] + cmd = ('/usr/bin/env python {}/file_dir_ops.py ' + 'create_deep_dirs_with_files --dir-depth 3 ' + '--max-num-of-dirs 5 --fixed-file-size 10k ' + '--num-of-files 9 {}'.format( + self.script_path, + mount_obj.mountpoint, + )) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertEqual(ret, 0, 'Not able to create directory structure') + dir_name = 'user1' + for i in range(5): + ret = create_link_file( + mount_obj.client_system, + '{}/{}/testfile{}.txt'.format(mount_obj.mountpoint, dir_name, + i), + '{}/{}/testfile{}_sl.txt'.format(mount_obj.mountpoint, + dir_name, i), + soft=True) + self.assertTrue(ret, 'Not able to create soft links') + for i in range(5, 9): + ret = create_link_file( + mount_obj.client_system, + '{}/{}/testfile{}.txt'.format(mount_obj.mountpoint, dir_name, + i), + '{}/{}/testfile{}_hl.txt'.format(mount_obj.mountpoint, + dir_name, i)) + self.assertTrue(ret, 'Not able to create hard links') + g.log.info('Successfully created directory structure consisting all ' + 'file types on mount') + + # Take note of arequal checksum + ret, exp_arequal = collect_mounts_arequal(mount_obj, path=dir_name) + self.assertTrue(ret, 'Failed to get arequal checksum on mount') + + # Get all the subvols in the volume + subvols = get_subvols(self.mnode, self.volname) + self.assertTrue(subvols.get('volume_subvols'), 'Not able to get ' + 'subvols of the volume') + + # Create a dir, pump IO in that dir, offline b1, wait for IO and + # online b1, wait for heal of b1, bring b2 offline... + m_point, m_client = (self.mounts[1].mountpoint, + self.mounts[1].client_system) + cur_off_bricks = '' + for count, off_brick in enumerate(zip(*subvols.get('volume_subvols')), + start=1): + + # Bring offline bricks online by force starting volume + if cur_off_bricks: + self._bring_bricks_online_and_monitor_heal(cur_off_bricks) + + # Create a dir for running IO + ret = mkdir(m_client, '{}/dir{}'.format(m_point, count)) + self.assertTrue( + ret, 'Not able to create directory for ' + 'starting IO before offline of brick') + + # Start IO in the newly created directory + cmd = ('/usr/bin/env python {}/fd_writes.py -n 10 -t 480 -d 5 -c ' + '16 --dir {}/dir{}'.format(self.script_path, m_point, + count)) + proc = g.run_async(m_client, cmd) + self.all_mounts_procs.append(proc) + + # Wait IO to partially fill the dir + sleep(10) + + # Bring a single brick offline from all of subvols + ret = bring_bricks_offline(self.volname, list(off_brick)) + self.assertTrue(ret, + 'Not able to bring {} offline'.format(off_brick)) + + # Validate heal info errors out, on brining bricks offline in < 5s + start_time = datetime.now().replace(microsecond=0) + ret, _, _ = heal_info(self.mnode, self.volname) + end_time = datetime.now().replace(microsecond=0) + self.assertEqual( + ret, 0, 'Not able to query heal info status ' + 'of volume when a brick is offline') + self.assertLess( + end_time - start_time, timedelta(seconds=5), + 'Query of heal info of volume when a brick is ' + 'offline is taking more than 5 seconds') + + # Wait for some more IO to fill dir + sleep(10) + + # Validate arequal on initial static dir + ret, act_arequal = collect_mounts_arequal(mount_obj, path=dir_name) + self.assertTrue( + ret, 'Failed to get arequal checksum on bringing ' + 'a brick offline') + self.assertEqual( + exp_arequal, act_arequal, 'Mismatch of arequal ' + 'checksum before and after killing a brick') + + cur_off_bricks = off_brick + + # Take note of ctime on mount + ret, prev_ctime, _ = g.run(m_client, 'date +%s') + self.assertEqual(ret, 0, 'Not able to get epoch time from client') + + self._bring_bricks_online_and_monitor_heal(cur_off_bricks) + + # Validate IO was happening during brick operations + # and compare ctime of recent file to current epoch time + ret = validate_io_procs(self.all_mounts_procs, + [self.mounts[0]] * len(self.all_mounts_procs)) + self.assertTrue(ret, 'Not able to validate completion of IO on mounts') + self.all_mounts_procs *= 0 # don't validate IO in tearDown + ret, curr_ctime, _ = g.run( + m_client, "find {} -printf '%C@\n' -type f | " + 'sort -r | head -n 1'.format(m_point)) + self.assertEqual( + ret, 0, 'Not able to get ctime of last edited file from the mount') + self.assertGreater( + float(curr_ctime), float(prev_ctime), 'Not able ' + 'to validate IO was happening during brick operations') + + g.log.info('Completed IO continuity test on EC volume successfully') diff --git a/tests/functional/disperse/test_ec_lookup_and_move_operations.py b/tests/functional/disperse/test_ec_lookup_and_move_operations.py new file mode 100644 index 000000000..f9925bfae --- /dev/null +++ b/tests/functional/disperse/test_ec_lookup_and_move_operations.py @@ -0,0 +1,259 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +from unittest import SkipTest + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline, + are_bricks_online) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs', 'nfs']]) +class TestEcLookupAndMoveOperations(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for availability of atleast 3 clients + if len(cls.clients) < 3: + raise SkipTest("This test requires atleast 3 clients") + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ( + "/usr/share/glustolibs/io/scripts/file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + if self.mount_procs: + ret = wait_for_io_to_complete(self.mount_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + def _run_create_files(self, file_count, base_name, mpoint, client): + """Run create files using file_dir_op.py""" + cmd = ("/usr/bin/env python {} create_files -f {} --fixed-file-size" + " 1k --base-file-name {} {}".format(self.script_upload_path, + file_count, base_name, + mpoint)) + proc = g.run_async(client, cmd) + self.mount_procs.append(proc) + + def test_ec_lookup_and_move_operations_all_bricks_online(self): + """ + Test Steps: + 1. Create volume and mount the volume on 3 clients, c1(client1), + c2(client2), and, c3(client3) + 2. On c1, mkdir /c1/dir + 3. On c2, Create 4000 files on mount point i.e. "/" + 4. After step 3, Create next 4000 files on c2 on mount point i.e. "/" + 5. On c1 Create 10000 files on /dir/ + 6. On c3 start moving 4000 files created on step 3 from mount point + to /dir/ + 7. On c3, start ls in a loop for 20 iterations + """ + # Create directory on client1 + dir_on_mount = self.mounts[0].mountpoint + '/dir' + ret = mkdir(self.mounts[0].client_system, dir_on_mount) + self.assertTrue(ret, "unable to create directory on client" + "1 {}".format(self.mounts[0].client_system)) + g.log.info("Directory created on %s successfully", + self.mounts[0].client_system) + + # Create 4000 files on the mountpoint of client2 + cmd = ("/usr/bin/env python {} create_files -f 4000" + " --fixed-file-size 10k --base-file-name file_from_client2_" + " {}".format(self.script_upload_path, + self.mounts[1].mountpoint)) + ret, _, err = g.run(self.mounts[1].client_system, cmd) + self.assertEqual(ret, 0, "File creation on {} failed with {}". + format(self.mounts[1].client_system, err)) + g.log.info("File creation successful on %s", + self.mounts[1].client_system) + + # Next IO to be ran in the background so using mount_procs list + self.mount_procs = [] + # Create next 4000 files on the mountpoint of client2 + self._run_create_files(file_count=4000, + base_name="files_on_client2_background_", + mpoint=self.mounts[1].mountpoint, + client=self.mounts[1].client_system) + + # Create 10000 files from client 1 on dir1 + self._run_create_files(file_count=10000, + base_name="files_on_client1_background_", + mpoint=dir_on_mount, + client=self.mounts[0].client_system) + + # Move the files created on client2 to dir from client3 + cmd = ("for i in `seq 0 3999`; do mv {}/file_from_client2_$i.txt {}; " + "done".format(self.mounts[2].mountpoint, dir_on_mount)) + proc = g.run_async(self.mounts[2].client_system, cmd) + self.mount_procs.append(proc) + + # Perform a lookup in loop from client3 for 20 iterations + cmd = ("ls -R {}".format(self.mounts[2].mountpoint)) + counter = 20 + while counter: + ret, _, err = g.run(self.mounts[2].client_system, cmd) + self.assertEqual(ret, 0, "ls while mv operation being carried" + " failed with {}".format(err)) + g.log.debug("ls successful for the %s time", 21-counter) + counter -= 1 + + self.assertTrue(validate_io_procs(self.mount_procs, self.mounts), + "IO failed on the clients") + # Emptying mount_procs for not validating IO in tearDown + self.mount_procs *= 0 + + def test_ec_lookup_and_move_operations_few_bricks_are_offline(self): + """ + Test Steps: + 1. Mount this volume on 3 mount point, c1, c2, and c3 + 2. Bring down two bricks offline in each subvol. + 3. On client1: under dir1 create files f{1..10000} run in background + 4. On client2: under root dir of mountpoint touch x{1..1000} + 5. On client3: after step 4 action completed, start creating + x{1001..10000} + 6. Bring bricks online which were offline(brought up all the bricks + which were down (2 in each of the two subvols) + 7. While IO on Client1 and Client3 were happening, On client2 move all + the x* files into dir1 + 8. Perform lookup from client 3 + """ + # List two bricks in each subvol + all_subvols_dict = get_subvols(self.mnode, self.volname) + subvols = all_subvols_dict['volume_subvols'] + bricks_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + bricks_to_bring_offline.extend(sample(subvol, 2)) + + # Bring two bricks of each subvol offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, "Bricks are still online") + g.log.info("Bricks are offline %s", bricks_to_bring_offline) + + # Validating the bricks are offline or not + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Few of the bricks are still online in" + " {} in".format(bricks_to_bring_offline)) + g.log.info("%s bricks are offline as expected", + bricks_to_bring_offline) + + # Create directory on client1 + dir_on_mount = self.mounts[0].mountpoint + '/dir1' + ret = mkdir(self.mounts[0].client_system, dir_on_mount) + self.assertTrue(ret, "unable to create directory on client" + " 1 {}".format(self.mounts[0].client_system)) + g.log.info("Dir1 created on %s successfully", + self.mounts[0].client_system) + + # Next IO to be ran in the background so using mount_procs + # and run_async. + self.mount_procs = [] + + # On client1: under dir1 create files f{1..10000} run in background + self._run_create_files(file_count=10000, base_name="f_", + mpoint=dir_on_mount, + client=self.mounts[0].client_system) + + # On client2: under root dir of the mountpoint touch x{1..1000} + cmd = ("/usr/bin/env python {} create_files -f 1000 --fixed-file-size" + " 10k --base-file-name x {}".format(self.script_upload_path, + self.mounts[1].mountpoint)) + ret, _, err = g.run(self.mounts[1].client_system, cmd) + self.assertEqual(ret, 0, "File creation failed on {} with {}". + format(self.mounts[1].client_system, err)) + g.log.info("File creation successful on %s", + self.mounts[1].client_system) + + # On client3: start creating x{1001..10000} + cmd = ("cd {}; for i in `seq 1000 10000`; do touch x$i; done; " + "cd -".format(self.mounts[2].mountpoint)) + proc = g.run_async(self.mounts[2].client_system, cmd) + self.mount_procs.append(proc) + + # Bring bricks online with volume start force + ret, _, err = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, err) + g.log.info("Volume: %s started successfully", self.volname) + + # Check whether bricks are online or not + ret = are_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Bricks {} are still offline". + format(bricks_to_bring_offline)) + g.log.info("Bricks %s are online now", bricks_to_bring_offline) + + # From client2 move all the files with name starting with x into dir1 + cmd = ("for i in `seq 0 999`; do mv {}/x$i.txt {}; " + "done".format(self.mounts[1].mountpoint, dir_on_mount)) + proc = g.run_async(self.mounts[1].client_system, cmd) + self.mount_procs.append(proc) + + # Perform a lookup in loop from client3 for 20 iterations + cmd = ("ls -R {}".format(self.mounts[2].mountpoint)) + counter = 20 + while counter: + ret, _, err = g.run(self.mounts[2].client_system, cmd) + self.assertEqual(ret, 0, "ls while mv operation being carried" + " failed with {}".format(err)) + g.log.debug("ls successful for the %s time", 21-counter) + counter -= 1 + + self.assertTrue(validate_io_procs(self.mount_procs, self.mounts), + "IO failed on the clients") + # Emptying mount_procs for not validating IO in tearDown + self.mount_procs *= 0 + + # Wait for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname,) + self.assertTrue(ret, "Heal didn't completed in the expected time") + g.log.info("Heal completed successfully on %s volume", self.volname) diff --git a/tests/functional/disperse/test_ec_open_fd.py b/tests/functional/disperse/test_ec_open_fd.py new file mode 100644 index 000000000..218713c6f --- /dev/null +++ b/tests/functional/disperse/test_ec_open_fd.py @@ -0,0 +1,174 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests open FD heal for EC volume +""" + +import os +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.brick_libs import (bring_bricks_online, + bring_bricks_offline, + validate_xattr_on_all_bricks) +from glustolibs.gluster.heal_ops import disable_heal +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_libs import (get_subvols, + log_volume_info_and_status) +from glustolibs.gluster.glusterfile import check_if_pattern_in_file +from glustolibs.io.utils import open_file_fd + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcOpenFd(GlusterBaseClass): + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + def test_ec_open_fd(self): + """ + Test Steps: + - disable server side heal + - Create a file + - Set volume option to implement open FD on file + - Bring a brick down,say b1 + - Open FD on file + - Bring brick b1 up + - write to open FD file + - Monitor heal + - Check xattr , ec.version and ec.size of file + - Check stat of file + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + + mountpoint = self.mounts[0].mountpoint + + # Disable server side heal + ret = disable_heal(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to disable server side heal")) + g.log.info("Successfully disabled server side heal") + + # Log Volume Info and Status after disabling server side heal + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed " + "on volume %s", self.volname)) + + # Create a file + cmd = ("cd %s; touch 'file_openfd';" % mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished creating a file while all the bricks are UP') + + # Set volume options + ret = set_volume_options(self.mnode, self.volname, + {"performance.read-after-open": "yes"}) + self.assertTrue(ret, 'Failed to set volume {}' + ' options'.format(self.volname)) + g.log.info('Successfully set %s volume options', self.volname,) + + # Bringing brick b1 offline + sub_vols = get_subvols(self.mnode, self.volname) + subvols_list = sub_vols['volume_subvols'] + bricks_list1 = subvols_list[0] + brick_b1_down = choice(bricks_list1) + ret = bring_bricks_offline(self.volname, + brick_b1_down) + self.assertTrue(ret, 'Brick %s is not offline' % brick_b1_down) + g.log.info('Brick %s is offline successfully', brick_b1_down) + + node = self.mounts[0].client_system + # Open FD + proc = open_file_fd(mountpoint, time=100, + client=node) + + # Bring brick b1 online + ret = bring_bricks_online(self.mnode, self.volname, + [brick_b1_down], + 'glusterd_restart') + self.assertTrue(ret, 'Brick {} is not brought ' + 'online'.format(brick_b1_down)) + g.log.info('Brick %s is online successfully', brick_b1_down) + + # Validate peers are connected + ret = self.validate_peers_are_connected() + self.assertTrue(ret, "Peers are not in connected state after bringing" + " an offline brick to online via `glusterd restart`") + g.log.info("Successfully validated peers are in connected state") + + # Check if write to FD is successful + g.log.info('Open FD on file successful') + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Write to FD is successful") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + g.log.info('Heal has completed successfully') + + file_openfd = os.path.join(mountpoint, 'file_openfd') + + # Check if data exists on file + ret = check_if_pattern_in_file(node, 'xyz', file_openfd) + self.assertEqual(ret, 0, 'xyz does not exists in file') + g.log.info('xyz exists in file') + + file_fd = 'file_openfd' + + # Check if EC version is same on all bricks which are up + ret = validate_xattr_on_all_bricks(bricks_list1, file_fd, + 'trusted.ec.version') + self.assertTrue(ret, "Healing not completed and EC version is " + "not updated") + g.log.info("Healing is completed and EC version is updated") + + # Check if EC size is same on all bricks which are up + ret = validate_xattr_on_all_bricks(bricks_list1, file_fd, + 'trusted.ec.size') + self.assertTrue(ret, "Healing not completed and EC size is " + "not updated") + g.log.info("Healing is completed and EC size is updated") + + # Check stat of file + cmd = "cd %s; du -kh file_openfd" % mountpoint + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('File %s is accessible', file_fd) diff --git a/tests/functional/disperse/test_ec_quorumcount_5.py b/tests/functional/disperse/test_ec_quorumcount_5.py new file mode 100644 index 000000000..c4aadd602 --- /dev/null +++ b/tests/functional/disperse/test_ec_quorumcount_5.py @@ -0,0 +1,309 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test Disperse Quorum Count Set to 5 +""" +from random import sample, choice +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brick_libs import (bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list, + bring_bricks_offline) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import (volume_reset, + set_volume_options) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, expand_volume, + get_subvols) + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcQuorumCount5(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def generate_read_cmd(self, mountpoint, start, end): + """Function which generates readcmd""" + self.readcmd = ("cd {}; for i in `seq {} {}` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done".format(mountpoint, start, end)) + + def test_ec_quorumcount_5(self): + """ + Test Steps: + - Write IO's when all bricks are online + - Get subvol from which bricks to be brought down + - Set volume disperse quorum count to 5 + - Start writing and reading IO's + - Bring a brick down,say b1 + - Validate write and read is successful + - Bring a brick down,say b2 + - Validate write has failed and read is successful + - Start IO's again while quorum is not met on volume + write should fail and read should pass + - Add-brick and log + - Start Rebalance + - Wait for rebalance,which should fail as quorum is not met + - Bring brick online + - Wait for brick to come online + - Check if bricks are online + - Start IO's again when all bricks are online + - IO's should complete successfully + - Start IO's again and reset volume + - Bring down other bricks to max redundancy + - Validating IO's and waiting to complete + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + + mountpoint = self.mounts[0].mountpoint + client1 = self.mounts[0].client_system + client2 = self.mounts[1].client_system + + # Write IO's when all bricks are online + writecmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writes on files sucessfully') + + # Select a subvol from which bricks to be brought down + sub_vols = get_subvols(self.mnode, self.volname) + bricks_list1 = list(choice(sub_vols['volume_subvols'])) + brick_1, brick_2 = sample(bricks_list1, 2) + + # Set volume disperse quorum count to 5 + ret = set_volume_options(self.mnode, self.volname, + {"disperse.quorum-count": "5"}) + self.assertTrue(ret, 'Failed to set volume {}' + ' options'.format(self.volname)) + g.log.info('Successfully set disperse quorum on %s', self.volname) + + # Start writing and reading IO's + procwrite, procread, count = [], [], 1 + for mount_obj in self.mounts: + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 5 " + "--dir-length 10 --max-num-of-dirs 2 " + "--num-of-files 15 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, writecmd, + user=mount_obj.user) + procwrite.append(proc) + count += 10 + + self.generate_read_cmd(mountpoint, '1', '10') + ret = g.run_async(client2, self.readcmd) + procread.append(ret) + + # Brick 1st brick down + ret = bring_bricks_offline(self.volname, + brick_1) + self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1)) + g.log.info('Brick %s is offline successfully', brick_1) + + writecmd = ("cd %s; for i in `seq 101 110` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writes on files sucessfully') + + self.generate_read_cmd(mountpoint, '101', '110') + ret, _, err = g.run(client1, self.readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished reads on files sucessfully') + + # Brick 2nd brick down + ret = bring_bricks_offline(self.volname, + brick_2) + self.assertTrue(ret, 'Brick {} is not offline'.format(brick_2)) + g.log.info('Brick %s is offline successfully', brick_2) + + # Validate write has failed and read is successful + ret = validate_io_procs(procwrite, self.mounts) + self.assertFalse(ret, 'Write successful even after disperse quorum is ' + 'not met') + g.log.info('EXPECTED - Writes failed as disperse quroum is not met') + + ret = validate_io_procs(procread, self.mounts[1]) + self.assertTrue(ret, 'Read operation failed on the client') + g.log.info('Reads on files successful') + + # Start IO's again while quorum is not met on volume + procwrite = [] + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num 20 --dir-depth 1 " + "--dir-length 10 --max-num-of-dirs 1 " + "--num-of-files 10 %s" % ( + self.script_upload_path, + mountpoint)) + proc = g.run_async(client1, writecmd) + procwrite.append(proc) + ret = validate_io_procs(procwrite, self.mounts[0]) + self.assertFalse(ret, 'Write successful even after disperse quorum is ' + 'not met') + g.log.info('EXPECTED - Writes failed as disperse quroum is not met') + + self.generate_read_cmd(mountpoint, '1', '100') + ret, _, err = g.run(client2, self.readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Reads on files successful') + + # Add brick + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info, force=True) + self.assertTrue(ret, ("Failed to expand the volume {}".format + (self.volname))) + g.log.info("Expanding volume %s is successful", self.volname) + + # Log Volume Info and Status after expanding the volume + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume {}".format(self.volname))) + g.log.info("Successful in logging volume info and status of volume %s", + self.volname) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ('Rebalance failed on the volume' + ' {}'.format(self.volname))) + g.log.info('Rebalance has started on volume %s', + self.volname) + + # Wait for rebalance to complete + # Which should also fail as quorum is not met + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=600) + self.assertFalse(ret, "Rebalance passed though disperse quorum " + "is not met on volume") + g.log.info("Expected: Rebalance failed on the volume %s,disperse" + " quorum is not met", self.volname) + + # Bring brick online + brick_list = brick_1, brick_2 + ret = bring_bricks_online(self.mnode, self.volname, + brick_list) + self.assertTrue(ret, 'Brick not brought online') + g.log.info('Brick brought online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, 'Bricks are not online') + g.log.info('EXPECTED : Bricks are online') + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Start IO's again when all bricks are online + writecmd = ("cd %s; for i in `seq 101 200` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + self.generate_read_cmd(mountpoint, '101', '120') + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Writes on client % successful', client1) + + ret, _, err = g.run(client2, self.readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Read on client % successful', client2) + + # Start IO's again + all_mounts_procs, count = [], 30 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Reset volume + ret, _, err = volume_reset(self.mnode, self.volname) + self.assertEqual(ret, 0, err) + g.log.info('Reset of volume %s successful', self.volname) + + # Bring down other bricks to max redundancy + # Bringing bricks offline + bricks_to_offline = sample(bricks_list1, 2) + ret = bring_bricks_offline(self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Redundant bricks not offline') + g.log.info('Redundant bricks are offline successfully') + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, 'IO failed on some of the clients') + g.log.info("Successfully validated all IO's") diff --git a/tests/functional/disperse/test_ec_quorumcount_6.py b/tests/functional/disperse/test_ec_quorumcount_6.py new file mode 100644 index 000000000..5ccc59180 --- /dev/null +++ b/tests/functional/disperse/test_ec_quorumcount_6.py @@ -0,0 +1,286 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test Disperse Quorum Count Set to 6 +""" +from random import sample, choice +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brick_libs import (bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list, + bring_bricks_offline) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import (volume_reset, + set_volume_options) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, expand_volume, + get_subvols) + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcQuorumCount6(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_quorumcount_6(self): + """ + Test Steps: + - Write IO's when all bricks are online + - Get subvol from which bricks to be brought down + - Set volume disperse quorum count to 6 + - Start writing and reading IO's + - Bring a brick down,say b1 + - Validate write has failed and read is successful + - Start IO's again while quorum is not met on volume + write should fail and read should pass + - Add-brick and log + - Start Rebalance + - Wait for rebalance,which should fail as quorum is not met + - Bring brick online + - Wait for brick to come online + - Check if bricks are online + - Start IO's again when all bricks are online + - IO's should complete successfully + - Start IO's again and reset volume + - Bring down other bricks to max redundancy + - Validating IO's and waiting to complete + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + + mountpoint = self.mounts[0].mountpoint + client1 = self.mounts[0].client_system + client2 = self.mounts[1].client_system + + # Write IO's when all bricks are online + writecmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writes on files sucessfully') + + # Select a subvol from which bricks to be brought down + sub_vols = get_subvols(self.mnode, self.volname) + bricks_list1 = list(choice(sub_vols['volume_subvols'])) + brick_1 = sample(bricks_list1, 1) + + # Set volume disperse quorum count to 6 + ret = set_volume_options(self.mnode, self.volname, + {"disperse.quorum-count": "6"}) + self.assertTrue(ret, 'Failed to set volume {}' + ' options'.format(self.volname)) + g.log.info('Successfully set disperse quorum on %s', self.volname) + + # Start writing and reading IO's + procwrite, procread, count = [], [], 1 + for mount_obj in self.mounts: + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 1 " + "--dir-length 10 --max-num-of-dirs 1 " + "--num-of-files 10 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, writecmd, + user=mount_obj.user) + procwrite.append(proc) + count = count + 10 + + readcmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + ret = g.run_async(client2, readcmd) + procread.append(ret) + + # Brick 1st brick down + ret = bring_bricks_offline(self.volname, + brick_1) + self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1)) + g.log.info('Brick %s is offline successfully', brick_1) + + # Validate write has failed and read is successful + ret = validate_io_procs(procwrite, self.mounts) + self.assertFalse(ret, 'Write successful even after disperse quorum is ' + 'not met') + g.log.info('EXPECTED - Writes failed as disperse quroum is not met') + + ret = validate_io_procs(procread, self.mounts[1]) + self.assertTrue(ret, 'Read operation failed on the client') + g.log.info('Reads on files successful') + + # Start IO's again while quorum is not met on volume + + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num 20 --dir-depth 1 " + "--dir-length 10 --max-num-of-dirs 1 " + "--num-of-files 10 %s" % ( + self.script_upload_path, + mountpoint)) + readcmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + + ret, _, err = g.run(client1, writecmd) + self.assertNotEqual(ret, 0, 'Writes passed even after disperse quorum ' + 'not met') + g.log.info('Expected: Writes failed as disperse quorum is not ' + 'met with %s error', err) + + ret, _, err = g.run(client2, readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Reads on files successful') + + # Add brick + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info, force=True) + self.assertTrue(ret, ("Failed to expand the volume {}".format + (self.volname))) + g.log.info("Expanding volume %s is successful", self.volname) + + # Log Volume Info and Status after expanding the volume + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume {}".format(self.volname))) + g.log.info("Successful in logging volume info and status of volume %s", + self.volname) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ('Rebalance failed on the volume' + ' {}'.format(self.volname))) + g.log.info('Rebalance has started on volume %s', + self.volname) + + # Wait for rebalance to complete + # Which should also fail as quorum is not met + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=600) + self.assertFalse(ret, "Rebalance passed though disperse quorum " + "is not met on volume") + g.log.info("Expected: Rebalance failed on the volume %s,disperse" + " quorum is not met", self.volname) + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + brick_1) + self.assertTrue(ret, 'Brick not brought online') + g.log.info('Brick brought online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, 'Bricks are not online') + g.log.info('EXPECTED : Bricks are online') + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Start IO's again when all bricks are online + writecmd = ("cd %s; for i in `seq 101 200` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + readcmd = ("cd %s; for i in `seq 101 200` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Writes on client % successful', client1) + + ret, _, err = g.run(client2, readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Read on client % successful', client2) + + # Start IO's again + all_mounts_procs, count = [], 30 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Reset volume + ret, _, err = volume_reset(self.mnode, self.volname) + self.assertEqual(ret, 0, err) + g.log.info('Reset of volume %s successful', self.volname) + + # Bring down other bricks to max redundancy + # Bringing bricks offline + bricks_to_offline = sample(bricks_list1, 2) + ret = bring_bricks_offline(self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Redundant bricks not offline') + g.log.info('Redundant bricks are offline successfully') + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, 'IO failed on some of the clients') + g.log.info("Successfully validated all IO's") diff --git a/tests/functional/disperse/test_ec_quota.py b/tests/functional/disperse/test_ec_quota.py new file mode 100644 index 000000000..8b3440780 --- /dev/null +++ b/tests/functional/disperse/test_ec_quota.py @@ -0,0 +1,159 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test quota on an EC volume +""" + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.quota_ops import (quota_enable, + quota_disable, + quota_set_soft_timeout, + quota_set_hard_timeout, + quota_limit_usage) +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcQuota(GlusterBaseClass): + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Disable Quota + ret, _, _ = quota_disable(self.mnode, self.volname) + if ret: + raise ExecutionError("Failed to disable quota on the volume %s") + g.log.info("Successfully disabled quota on the volume %") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def set_quota_limit(self, limit): + """ + Set Quota limit on the volume + """ + # Path to set quota limit + path = "/" + + # Set Quota limit + ret, _, _ = quota_limit_usage(self.mnode, self.volname, + path, limit=limit) + self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of " + "the volume %s", path, self.volname)) + g.log.info("Successfully set the Quota limit on %s of the volume %s", + path, self.volname) + + def read_write_files(self, files, mount_dir, client): + """ + Read and write files on the volume + """ + # Write files + for i in range(1, 5): + writecmd = ("cd %s/dir%s; for i in `seq 1 %s` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % (mount_dir, i, files)) + ret, _, _ = g.run(client, writecmd) + self.assertEqual(ret, 0, "Unexpected: File creation failed ") + g.log.info("Expected: File creation succeeded") + + # Reading files + for i in range(1, 5): + readcmd = ("cd %s/dir%s; for i in `seq 1 %s` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % (mount_dir, i, files)) + ret, _, _ = g.run(client, readcmd) + self.assertEqual(ret, 0, "Unexpected: Reading of file failed ") + g.log.info("Expected: Able to read file successfully") + + def test_ec_quota(self): + """ + - Enable quota on the volume + - Set a limit of 4 GB on the root of the volume + - Set Quota soft-timeout to 0 seconds + - Set Quota hard-timeout to 0 second + - Create 10 directories from the mount point + - Create files of around 2.5 GB + - Reading files + - Decrease quota limit to 3 GB on the root of the volume + - Writing files of around 500 MB + - Reading files + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Enable quota on the volume + ret, _, _ = quota_enable(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to enable quota on the volume %s", + self.volname)) + g.log.info("Successfully enabled quota on the volume %s", self.volname) + + # Set a limit of 4 GB on the root of the volume + self.set_quota_limit(limit="4GB") + + # Set Quota soft-timeout to 0 seconds + ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, '0sec') + self.assertEqual(ret, 0, "Failed to set soft timeout") + g.log.info("Quota soft timeout set successful") + + # Set Quota hard-timeout to 0 second + ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, '0sec') + self.assertEqual(ret, 0, "Failed to set hard timeout") + g.log.info("Quota hard timeout set successful") + + # Create 10 directories from the mount point + mount_obj = self.mounts[0] + mount_dir = mount_obj.mountpoint + client = mount_obj.client_system + + for i in range(1, 11): + ret = mkdir(client, "%s/dir%s" % (mount_dir, i)) + self.assertTrue(ret, ("Failed to create dir under %s-%s", + client, mount_dir)) + g.log.info("Directory 'dir%s' created successfully", i) + g.log.info("Successfully created directories on %s:%s", + client, mount_dir) + + # Create files of around 2.5 GB and reading + self.read_write_files(files=100, mount_dir=mount_dir, + client=client) + + # Decrease quota limit to 3 GB on the root of the volume + self.set_quota_limit(limit="3GB") + + # Writing files of around 500 MB and reading + self.read_write_files(files=10, mount_dir=mount_dir, + client=client) diff --git a/tests/functional/disperse/test_ec_quota_errors.py b/tests/functional/disperse/test_ec_quota_errors.py new file mode 100644 index 000000000..8aa333878 --- /dev/null +++ b/tests/functional/disperse/test_ec_quota_errors.py @@ -0,0 +1,415 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from math import ceil +from random import sample +from time import sleep, time +from unittest import SkipTest + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import remove_file +from glustolibs.gluster.lib_utils import (append_string_to_file, + get_disk_usage, + search_pattern_in_file) +from glustolibs.gluster.quota_ops import (quota_enable, quota_fetch_list, + quota_limit_usage, + quota_set_alert_time, + quota_set_hard_timeout, + quota_set_soft_timeout) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.io.utils import validate_io_procs, wait_for_io_to_complete +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestEcQuotaError(GlusterBaseClass): + """ + Description: To check EIO errors changes to EDQUOTE errors when the + specified quota limits are breached + """ + # pylint: disable=too-many-instance-attributes, too-many-statements + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + if cls.volume_type == 'distributed-dispersed': + raise SkipTest('BZ #1707813 limits the functionality of fallocate') + if cls.volume_type == 'dispersed': + raise SkipTest('BZ #1339144 is being hit intermittently') + cls.script_path = '/usr/share/glustolibs/io/scripts/fd_writes.py' + ret = upload_scripts(cls.clients, cls.script_path) + if not ret: + raise ExecutionError('Failed to upload IO script to client') + + def setUp(self): + self.get_super_method(self, 'setUp')() + self.num_of_dirs = 2 + + # For test_ec_quota_errors_on_limit only one client is needed + if 'on_limit' in self.id().split('.')[-1]: + self.num_of_dirs = 1 + self.mounts = [self.mounts[0]] + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + self.all_mount_procs = [] + self.offline_bricks = [] + if not ret: + raise ExecutionError('Failed to setup and mount volume') + + def tearDown(self): + if self.offline_bricks: + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + if ret: + raise ExecutionError('Not able to force start volume to bring ' + 'offline bricks online') + if self.all_mount_procs: + ret = wait_for_io_to_complete(self.all_mount_procs, self.mounts) + if not ret: + raise ExecutionError('Wait for IO completion failed') + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Failed to unmount and cleanup volume') + self.get_super_method(self, 'tearDown')() + + def _get_space_in_gb(self, host, path, size='free'): + """ + Return available or total space on the provided `path` + Kwargs: + size (str) : total/free(default) size to be queried on `path` + """ + space_avail = get_disk_usage(host, path) + self.assertIsNotNone( + space_avail, 'Failed to get disk usage stats of ' + '{} on {}'.format(host, path)) + if size == 'total': + return int(ceil(space_avail['total'])) + return int(ceil(space_avail['free'])) + + def _insert_bp(self, host, logpath): + """ + Generates and inserts a breakpoint in the given logpath on the host + """ + append_string = self.bp_text + str(self.bp_count) + ret = append_string_to_file(host, logpath, append_string) + self.assertTrue( + ret, 'Not able to append string to the file {} ' + 'on {}'.format(logpath, host)) + self.bp_count += 1 + + def _fallocate_file(self): + """ + Perform `fallocate -l <alloc_size> <fqpath>` on <client> + """ + + # Delete the files if exists (sparsefile is created on absolute sizes) + ret = remove_file(self.client, self.fqpath + '*', force=True) + self.assertTrue( + ret, 'Unable to delete existing file for `fallocate` of new file') + sleep(5) + + # Create multiple sparsefiles rather than one big file + sizes = [self.alloc_size] + if self.alloc_size >= self.brick_size: + sizes = ([self.brick_size // 2] * + (self.alloc_size // self.brick_size)) + sizes *= 2 + sizes.append(self.alloc_size % self.brick_size) + rem_size = self.alloc_size - sum(sizes) + if rem_size: + sizes.append(rem_size) + + for count, size in enumerate(sizes, start=1): + ret, _, _ = g.run( + self.client, + 'fallocate -l {}G {}{}'.format(size, self.fqpath, count)) + self.assertEqual( + ret, 0, 'Not able to fallocate {}* file on {}'.format( + self.fqpath, self.client)) + count += 1 + + def _validate_error_in_mount_log(self, pattern, exp_pre=True): + """ + Validate type of error from mount log on setting quota + """ + assert_method = self.assertTrue + assert_msg = ('Fail: Not able to validate presence of "{}" ' + 'in mount log'.format(pattern)) + if not exp_pre: + assert_method = self.assertFalse + assert_msg = ('Fail: Not able to validate absence of "{}" ' + 'in mount log'.format(pattern)) + ret = search_pattern_in_file(self.client, pattern, self.logpath, + self.bp_text + str(self.bp_count - 2), + self.bp_text + str(self.bp_count - 1)) + assert_method(ret, assert_msg) + + # Validate against `quota list` command + if 'quota' in pattern.lower(): + dir_path = '/dir/dir1' + ret = quota_fetch_list(self.mnode, self.volname) + self.assertIsNotNone( + ret.get(dir_path), + 'Not able to get quota list for the path {}'.format(dir_path)) + ret = ret.get(dir_path) + verified = False + if ret['sl_exceeded'] is exp_pre and ret['hl_exceeded'] is exp_pre: + verified = True + self.assertTrue( + verified, 'Failed to validate Quota list command against ' + 'soft and hard limits') + + def _perform_quota_ops_before_brick_down(self): + """ + Refactor of common test steps across three test functions + """ + self.client, self.m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + ret = mkdir(self.client, '%s/dir/dir1' % self.m_point, parents=True) + self.assertTrue(ret, 'Failed to create first dir on mountpoint') + if self.num_of_dirs == 2: + ret = mkdir(self.client, '%s/dir/dir' % self.m_point) + self.assertTrue(ret, 'Failed to create second dir on mountpoint') + + # Types of errors + self.space_error = 'Input/output error|No space left on device' + self.quota_error = 'Disk quota exceeded' + + # Start IO from the clients + cmd = ('/usr/bin/env python {} -n 10 -t 480 -d 10 -c 256 --dir ' + '{}/dir/dir{}') + for count, mount in enumerate(self.mounts, start=1): + proc = g.run_async( + mount.client_system, + cmd.format(self.script_path, mount.mountpoint, count)) + self.all_mount_procs.append(proc) + + # fallocate a large file and perform IO on remaining space + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, 'Failed to get list of online ' + 'bricks') + brick_node, brick_path = online_bricks[0].split(':') + self.brick_size = self._get_space_in_gb(brick_node, + brick_path, + size='total') + self.free_disk_size = self._get_space_in_gb(self.client, self.m_point) + self.fqpath = self.m_point + '/sparsefile' + self.rem_size = 1 # Only 1G will be available to the mount + self.alloc_size = self.free_disk_size - self.rem_size + self._fallocate_file() + + # Insert breakpoint in the log + self.bp_text = 'breakpoint_' + str(ceil(time())) + '_' + self.bp_count = 1 + self.logpath = ('/var/log/glusterfs/mnt-' + self.volname + + '_glusterfs.log') + self._insert_bp(self.client, self.logpath) + + # Create file with size greater than available mount space + self.cmd = ('cd {}; cat /dev/urandom | tr -dc [:space:][:print:] ' + '| head -c {}G > datafile_{};') + self.fqpath = self.m_point + '/dir/dir1' + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 2, self.bp_count)) + self.assertFalse( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Process should not allow data more ' + 'than available space to be written') + sleep(10) + self._insert_bp(self.client, self.logpath) + + # Validate space error in the mount log + self._validate_error_in_mount_log(pattern=self.space_error) + + # Enable quota and set all alert timeouts to 0secs + ret, _, _ = quota_enable(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Not able to enable quota on the volume') + for alert_type, msg in ((quota_set_alert_time, + 'alert'), (quota_set_soft_timeout, 'soft'), + (quota_set_hard_timeout, 'hard')): + ret, _, _ = alert_type(self.mnode, self.volname, '0sec') + self.assertEqual( + ret, 0, 'Failed to set quota {} timeout to 0sec'.format(msg)) + + # Expose only 20G and set quota's on the dir + self.rem_size = 20 # Only 20G will be available to whole mount + self.alloc_size = self.free_disk_size - self.rem_size + self.fqpath = self.m_point + '/sparsefile' + self._fallocate_file() + + self._insert_bp(self.client, self.logpath) + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir1', + limit='10GB') + self.assertEqual(ret, 0, 'Not able to set quota limit on /dir/dir1') + if self.num_of_dirs == 2: + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir2', + limit='5GB') + self.assertEqual(ret, 0, 'Not able to set quota limit on ' + '/dir/dir2') + + # Write data more than available quota and validate error + sleep(10) + self.rem_size = 1 # Only 1G will be availble to /dir/dir1 + self.alloc_size = 9 + self.fqpath = self.m_point + '/dir/dir1/sparsefile' + self._fallocate_file() + + self.fqpath = self.m_point + '/dir/dir1' + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 2, self.bp_count)) + self.assertFalse( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Process should not allow data more ' + 'than available space to be written') + sleep(10) + self._insert_bp(self.client, self.logpath) + self._validate_error_in_mount_log(pattern=self.quota_error) + self._validate_error_in_mount_log(pattern=self.space_error, + exp_pre=False) + + def _perform_quota_ops_after_brick_down(self): + """ + Refactor of common test steps across three test functions + """ + # Increase the quota limit on dir/dir1 and validate no errors on writes + self.alloc_size = self.free_disk_size - 50 + self.fqpath = self.m_point + '/sparsefile' + self._fallocate_file() + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir1', + limit='40GB') + self.assertEqual(ret, 0, 'Not able to expand quota limit on /dir/dir1') + sleep(15) + self._insert_bp(self.client, self.logpath) + self.fqpath = self.m_point + '/dir/dir1' + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 3, self.bp_count)) + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Not able to write data even after expanding quota limit') + sleep(10) + self._insert_bp(self.client, self.logpath) + self._validate_error_in_mount_log(pattern=self.quota_error, + exp_pre=False) + self._validate_error_in_mount_log(pattern=self.space_error, + exp_pre=False) + + # Decrease the quota limit and validate error on reaching quota + self._insert_bp(self.client, self.logpath) + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir1', + limit='15GB') + self.assertEqual(ret, 0, 'Not able to expand quota limit on /dir/dir1') + sleep(10) + self.fqpath = self.m_point + '/dir/dir1' + self.rem_size = self._get_space_in_gb(self.client, self.fqpath) + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 3, self.bp_count)) + self.assertFalse( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Process should not allow data more ' + 'than available space to be written') + sleep(10) + self._insert_bp(self.client, self.logpath) + self._validate_error_in_mount_log(pattern=self.quota_error) + self._validate_error_in_mount_log(pattern=self.space_error, + exp_pre=False) + + def test_ec_quota_errors_on_brick_down(self): + """ + Steps: + - Create and mount EC volume on two clients + - Create two dirs on the mount and perform parallel IO from clients + - Simulate disk full to validate EIO errors when no space is left + - Remove simulation and apply different quota limits on two dirs + - Bring down redundant bricks from the volume + - Validate EDQUOTE error on reaching quota limit and extend quota to + validate absence of EDQUOTE error + - Reduce the quota limit and validate EDQUOTE error upon reaching quota + - Remove quota limits, unmount and cleanup the volume + """ + self._perform_quota_ops_before_brick_down() + + # Bring redundant bricks offline + subvols = get_subvols(self.mnode, self.volname) + self.assertTrue(subvols.get('volume_subvols'), 'Not able to get ' + 'subvols of the volume') + self.offline_bricks = [] + for subvol in subvols['volume_subvols']: + self.offline_bricks.extend( + sample(subvol, + self.volume.get('voltype')['redundancy_count'])) + ret = bring_bricks_offline(self.volname, self.offline_bricks) + self.assertTrue(ret, 'Not able to bring redundant bricks offline') + + self._perform_quota_ops_after_brick_down() + + # Bring offline bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, 'Not able to bring offline bricks online') + self.offline_bricks *= 0 + + g.log.info('Pass: Validating quota errors on brick down is successful') + + def test_ec_quota_errors_with_multiple_ios(self): + """ + Steps: + - Create and mount EC volume on two clients + - Create two dirs on the mount and perform parallel IO from clients + - Simulate disk full to validate EIO errors when no space is left + - Remove simulation and apply quota limits on base dir + - Validate EDQUOTE error on reaching quota limit and extend quota to + validate absence of EDQUOTE error + - Reduce the quota limit and validate EDQUOTE error upon reaching quota + - Remove quota limits, unmount and cleanup the volume + """ + self._perform_quota_ops_before_brick_down() + self._perform_quota_ops_after_brick_down() + g.log.info('Pass: Validating quota errors with multiple IOs is ' + 'successful') + + def test_ec_quota_errors_on_limit(self): + """ + Steps: + - Create and mount EC volume on one client + - Create a dir on the mount and perform IO from clients + - Simulate disk full to validate EIO errors when no space is left + - Remove simulation and apply quota limits on the dir + - Validate EDQUOTE error on reaching quota limit and extend quota to + validate absence of EDQUOTE error + - Reduce the quota limit and validate EDQUOTE error upon reaching quota + - Remove quota limits, unmount and cleanup the volume + """ + + # Only a single client is used + self._perform_quota_ops_before_brick_down() + self._perform_quota_ops_after_brick_down() + g.log.info('Pass: Validating quota errors on limit breach is ' + 'successful') diff --git a/tests/functional/disperse/test_ec_read_from_hardlink.py b/tests/functional/disperse/test_ec_read_from_hardlink.py new file mode 100644 index 000000000..469bc673b --- /dev/null +++ b/tests/functional/disperse/test_ec_read_from_hardlink.py @@ -0,0 +1,111 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.mount_ops import mount_volume, umount_volume +from glustolibs.gluster.volume_ops import (set_volume_options, + volume_reset) +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs', 'nfs']]) +class TestEcReadFromHardlink(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Setup volume + if not self.setup_volume(): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + self.get_super_method(self, 'tearDown')() + + # Unmount the volume + ret = umount_volume(mclient=self.mounts[0].client_system, + mpoint=self.mounts[0].mountpoint) + if not ret: + raise ExecutionError("Unable to umount the volume") + g.log.info("Unmounting of the volume %s succeeded", self.volname) + + # The reason for volume reset is, metadata-cache is enabled + # by group, can't disable the group in glusterfs. + ret, _, _ = volume_reset(self.mnode, self.volname) + if ret: + raise ExecutionError("Unable to reset the volume {}". + format(self.volname)) + g.log.info("Volume: %s reset successful ", self.volname) + + # Cleanup the volume + if not self.cleanup_volume(): + raise ExecutionError("Unable to perform volume clenaup") + g.log.info("Volume cleanup is successfull") + + def test_ec_read_from_hardlink(self): + """ + Test steps: + 1. Enable metadata-cache(md-cache) options on the volume + 2. Touch a file and create a hardlink for it + 3. Read data from the hardlink. + 4. Read data from the actual file. + """ + options = {'group': 'metadata-cache'} + # Set metadata-cache options as group + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, "Unable to set the volume options {}". + format(options)) + g.log.info("Able to set the %s options", options) + + # Mounting the volume on one client + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mounts[0].mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + self.assertEqual(ret, 0, ("Volume {} is not mounted"). + format(self.volname)) + g.log.info("Volume mounted successfully : %s", self.volname) + + file_name = self.mounts[0].mountpoint + "/test1" + content = "testfile" + hard_link = self.mounts[0].mountpoint + "/test1_hlink" + cmd = 'echo "{content}" > {file}'.format(file=file_name, + content=content) + + # Creating a file with data + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Sucessful in creating a file with data") + g.log.info("file created successfully on %s", + self.mounts[0].mountpoint) + + # Creating a hardlink for the file created + ret = create_link_file(self.mounts[0].client_system, + file_name, hard_link) + self.assertTrue(ret, "Link file creation failed") + g.log.info("Link file creation for %s is successful", file_name) + + # Reading from the file as well as the hardlink + for each in (file_name, hard_link): + ret, out, _ = g.run(self.mounts[0].client_system, + "cat {}".format(each)) + self.assertEqual(ret, 0, "Unable to read the {}".format(each)) + self.assertEqual(content, out.strip('\n'), "The content {} and" + " data in file {} is not same". + format(content, each)) + g.log.info("Read of %s file is successful", each) diff --git a/tests/functional/disperse/test_ec_remove_brick_operations.py b/tests/functional/disperse/test_ec_remove_brick_operations.py new file mode 100644 index 000000000..184c72b63 --- /dev/null +++ b/tests/functional/disperse/test_ec_remove_brick_operations.py @@ -0,0 +1,324 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + + This test verifies remove brick operation on EC + volume. + +""" +from time import sleep +from glusto.core import Glusto as g +from glustolibs.io.utils import (validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.brick_ops import (remove_brick) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import (get_subvols, + get_volume_info, + log_volume_info_and_status) +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.rebalance_ops import ( + wait_for_remove_brick_to_complete) + + +@runs_on([['distributed-dispersed'], ['glusterfs']]) +class EcRemoveBrickOperations(GlusterBaseClass): + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Ensure we have sufficient subvols + self.volume['voltype']['dist_count'] = 4 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + # Start IO on mounts + self.counter = 1 + self.all_mounts_procs = [] + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 2 " + "--dir-length 5 " + "--max-num-of-dirs 3 " + "--num-of-files 3 %s" % (self.script_upload_path, + self.counter, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.counter += 10 + self.io_validation_complete = False + + # Adding a delay of 10 seconds before test method starts. This + # is to ensure IO's are in progress and giving some time to fill data + sleep(10) + + def test_remove_brick_operations(self): + """ + Steps: + 1. Remove data brick count number of bricks from the volume + should fail + 2. step 1 with force option should fail + 3. Remove redundant brick count number of bricks from the volume + should fail + 4. step 3 with force option should fail + 5. Remove data brick count+1 number of bricks from the volume + should fail + 6. step 5 with force option should fail + 7. Remove disperse count number of bricks from the volume with + one wrong brick path should fail + 8. step 7 with force option should fail + 9. Start remove brick on first subvol bricks + 10. Remove all the subvols to make a pure EC vol + by start remove brick on second subvol bricks + 11. Start remove brick on third subvol bricks + 12. Write files and perform read on mountpoints + """ + # pylint: disable=too-many-locals + # pylint: disable=too-many-statements + + subvols_list = get_subvols(self.mnode, self.volname) + volinfo = get_volume_info(self.mnode, self.volname) + initial_brickcount = volinfo[self.volname]['brickCount'] + data_brick_count = (self.volume['voltype']['disperse_count'] - + self.volume['voltype']['redundancy_count']) + + # Try to remove data brick count number of bricks from the volume + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:data_brick_count]) + ret, _, _ = remove_brick(self.mnode, self.volname, + bricks_list_to_remove, + option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Try to remove redundant brick count number of bricks from the volume + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:self.volume['voltype'] + ['redundancy_count']]) + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume" + "%s" % (bricks_list_to_remove, self.volname))) + + # Try to remove data brick count+1 number of bricks from the volume + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:data_brick_count + 1]) + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Try to remove disperse count number of bricks from the volume with + # one wrong brick path + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:self.volume['voltype']['disperse_count']]) + bricks_list_to_remove[0] = bricks_list_to_remove[0] + "wrong_path" + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Verify that the brick count is intact + volinfo = get_volume_info(self.mnode, self.volname) + latest_brickcount = volinfo[self.volname]['brickCount'] + self.assertEqual(initial_brickcount, latest_brickcount, + ("Brick count is not expected to " + "change, but changed")) + + # Start remove brick on first subvol bricks + bricks_list_to_remove = subvols_list['volume_subvols'][0] + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 0, ("Failed to remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Verify that the brick count is intact + volinfo = get_volume_info(self.mnode, self.volname) + latest_brickcount = volinfo[self.volname]['brickCount'] + self.assertEqual(initial_brickcount, latest_brickcount, + ("Brick count is not expected to " + "change, but changed")) + + # Wait for remove brick to complete + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + bricks_list_to_remove) + self.assertTrue( + ret, ("Remove brick is not yet complete on the volume " + "%s" % self.volname)) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Commit the remove brick operation + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="commit") + self.assertEqual( + ret, 0, ("Failed to commit remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Remove all the subvols to make a pure EC vol + # Start remove brick on second subvol bricks + bricks_list_to_remove = subvols_list['volume_subvols'][1] + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 0, ("Failed to remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Wait for remove brick to complete + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + bricks_list_to_remove) + self.assertTrue( + ret, ("Remove brick is not yet complete on the volume " + "%s", self.volname)) + + # Commit the remove brick operation + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="commit") + self.assertEqual( + ret, 0, ("Failed to commit remove bricks %s from the volume" + " %s" % (bricks_list_to_remove, self.volname))) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Start remove brick on third subvol bricks + bricks_list_to_remove = subvols_list['volume_subvols'][2] + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual(ret, 0, ("Failed to remove bricks %s from " + "the volume %s" % ( + bricks_list_to_remove, self.volname))) + + # Wait for remove brick to complete + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + bricks_list_to_remove) + self.assertTrue( + ret, ("Remove brick is not yet complete on the volume " + "%s" % self.volname)) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Commit the remove brick operation + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="commit") + self.assertEqual( + ret, 0, ("Failed to commit remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Log volume info and status + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed " + "on volume %s" % self.volname)) + g.log.info("Successful in logging volume info and status " + "of volume %s", self.volname) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.io_validation_complete = True + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Write some files on the mount point + cmd1 = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;" + "do touch file$i; done" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd1) + self.assertEqual(ret, 0, ("Write operation failed on client " + "%s " % self.mounts[0].client_system)) + g.log.info("Writes on mount point successful") + + # Perform read operation on mountpoint + cmd2 = ("cd %s; ls -lRt;" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd2) + self.assertEqual(ret, 0, ("Read operation failed on client " + "%s " % self.mounts[0].client_system)) + g.log.info("Read on mount point successful") + + def tearDown(self): + # Wait for IO to complete if io validation is not executed in the + # test method + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/disperse/test_ec_replace_brick.py b/tests/functional/disperse/test_ec_replace_brick.py new file mode 100644 index 000000000..b695cc03d --- /dev/null +++ b/tests/functional/disperse/test_ec_replace_brick.py @@ -0,0 +1,373 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests replace brick on an EC volume +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (collect_mounts_arequal, + validate_io_procs) +from glustolibs.gluster.brick_libs import (get_all_bricks, + wait_for_bricks_to_be_online, + are_bricks_online) +from glustolibs.gluster.volume_libs import replace_brick_from_volume +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcBrickReplace(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path1 = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + cls.script_upload_path2 = ("/usr/share/glustolibs/io/scripts/" + "fd_writes.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path1, + cls.script_upload_path2]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_replace_brick(self): + """ + - Start resource consumption tool + - Create directory dir1 + - Create 5 directory and 5 files in dir of mountpoint + - Rename all files inside dir1 at mountpoint + - Create softlink and hardlink of files in dir1 of mountpoint + - Delete op for deleting all file in one of the dirs inside dir1 + - Change chmod, chown, chgrp + - Create tiny, small, medium and large file + - Get arequal before replacing brick + - Replace brick + - Get arequal after replacing brick + - Compare Arequal's + - Create IO's + - Replace brick while IO's are going on + - Validating IO's and waiting for it to complete + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Starting resource consumption using top + log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log' + cmd = ("for i in {1..20};do top -n 1 -b|egrep " + "'RES|gluster' & free -h 2>&1 >> %s ;" + "sleep 10;done" % (log_file_mem_monitor)) + g.log.info(cmd) + cmd_list_procs = [] + for server in self.servers: + proc = g.run_async(server, cmd) + cmd_list_procs.append(proc) + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint. Start is an + # offset to be added to dirname to act on diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start + 1))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + start += 5 + + # chmod, chown, chgrp inside dir1 + # start and end used as offset to access diff files + # at diff clients. + start, end = 2, 5 + for mount_obj in self.mounts: + dir_file_range = '%s..%s' % (str(start), str(end)) + cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing mode of files has failed") + g.log.info("Mode of files have been changed successfully") + + cmd = ('chown root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing owner of files has failed") + g.log.info("Owner of files have been changed successfully") + + cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing group of files has failed") + g.log.info("Group of files have been changed successfully") + start += 5 + end += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Get arequal before replacing brick + ret, result_before_replacing_brick = ( + collect_mounts_arequal(self.mounts[0])) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before replacing of brick ' + 'is successful') + + # Replacing a brick of random choice + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info) + self.assertTrue(ret, "Unexpected:Replace brick is not successful") + g.log.info("Expected : Replace brick is successful") + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Unexpected:Bricks are not online") + g.log.info("Expected : Bricks are online") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Unexpected:Heal has not yet completed') + g.log.info('Heal has completed successfully') + + # Check if bricks are online + all_bricks = get_all_bricks(self.mnode, self.volname) + ret = are_bricks_online(self.mnode, self.volname, all_bricks) + self.assertTrue(ret, 'Unexpected:All bricks are not online') + g.log.info('All bricks are online') + + # Get areequal after replacing brick + ret, result_after_replacing_brick = ( + collect_mounts_arequal(self.mounts[0])) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting areequal after replacing of brick ' + 'is successful') + + # Comparing arequals + self.assertEqual(result_before_replacing_brick, + result_after_replacing_brick, + 'Arequals are not equals before replacing ' + 'brick and after replacing brick') + g.log.info('Arequals are equals before replacing brick ' + 'and after replacing brick') + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 2 " + "--dir-length 10 " + "--max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path1, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Replacing a brick while IO's are going on + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info) + self.assertTrue(ret, "Unexpected:Replace brick is not successful") + g.log.info("Expected : Replace brick is successful") + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Unexpected:Bricks are not online") + g.log.info("Expected : Bricks are online") + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all io's") + + # Create 2 directories and start IO's which opens FD + ret = mkdir(self.mounts[0].client_system, "%s/count{1..2}" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create directories") + g.log.info("Directories created on %s successfully", self.mounts[0]) + + all_fd_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("cd %s ;/usr/bin/env python %s -n 10 -t 120 " + "-d 5 -c 16 --dir count%s" % ( + mount_obj.mountpoint, + self.script_upload_path2, count)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_fd_procs.append(proc) + count += 1 + + # Replacing a brick while open FD IO's are going on + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info) + self.assertTrue(ret, "Unexpected:Replace brick is not successful") + g.log.info("Expected : Replace brick is successful") + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Unexpected:Bricks are not online") + g.log.info("Expected : Bricks are online") + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_fd_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all io's") + + # Close connection and check file exist for memory log + ret = file_exists(self.mnode, + '/var/log/glusterfs/mem_usage.log') + self.assertTrue(ret, "Unexpected:Memory log file does " + "not exist") + g.log.info("Memory log file exists") + for proc in cmd_list_procs: + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Memory logging failed") + g.log.info("Memory logging is successful") diff --git a/tests/functional/disperse/test_ec_replace_brick_after_add_brick.py b/tests/functional/disperse/test_ec_replace_brick_after_add_brick.py new file mode 100644 index 000000000..8aae20b0b --- /dev/null +++ b/tests/functional/disperse/test_ec_replace_brick_after_add_brick.py @@ -0,0 +1,168 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import (remove_file, + occurences_of_pattern_in_file) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.volume_libs import (replace_brick_from_volume, + expand_volume) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['dispersed'], ['glusterfs']]) +class TestEcReplaceBrickAfterAddBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ( + "/usr/share/glustolibs/io/scripts/file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients {}". + format(cls.clients)) + + @classmethod + def tearDownClass(cls): + for each_client in cls.clients: + ret = remove_file(each_client, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to delete file {}". + format(cls.script_upload_path)) + cls.get_super_method(cls, 'tearDownClass')() + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + self.get_super_method(self, 'tearDown')() + + def test_ec_replace_brick_after_add_brick(self): + """ + Test Steps: + 1. Create a pure-ec volume (say 1x(4+2)) + 2. Mount volume on two clients + 3. Create some files and dirs from both mnts + 4. Add bricks in this case the (4+2) ie 6 bricks + 5. Create a new dir(common_dir) and in that directory create a distinct + directory(using hostname as dirname) for each client and pump IOs + from the clients(dd) + 6. While IOs are in progress replace any of the bricks + 7. Check for errors if any collected after step 6 + """ + # pylint: disable=unsubscriptable-object,too-many-locals + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to get the bricks from the {}" + " volume".format(self.volname)) + + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on the mounts") + self.all_mounts_procs *= 0 + + # Expand the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Expanding volume failed") + + # Create a new dir(common_dir) on mountpoint + common_dir = self.mounts[0].mountpoint + "/common_dir" + ret = mkdir(self.mounts[0].client_system, common_dir) + self.assertTrue(ret, "Directory creation failed") + + # Create distinct directory for each client under common_dir + distinct_dir = common_dir + "/$HOSTNAME" + for each_client in self.clients: + ret = mkdir(each_client, distinct_dir) + self.assertTrue(ret, "Directory creation failed") + + # Run dd in the background and stdout,stderr to error.txt for + # validating any errors after io completion. + run_dd_cmd = ("cd {}; for i in `seq 1 1000`; do dd if=/dev/urandom " + "of=file$i bs=4096 count=10 &>> error.txt; done". + format(distinct_dir)) + for each_client in self.clients: + proc = g.run_async(each_client, run_dd_cmd) + self.all_mounts_procs.append(proc) + + # Get random brick from the bricks + brick_to_replace = choice(all_bricks) + node_from_brick_replace, _ = brick_to_replace.split(":") + + # Replace brick from the same node + servers_info_of_replaced_node = {} + servers_info_of_replaced_node[node_from_brick_replace] = ( + self.all_servers_info[node_from_brick_replace]) + + ret = replace_brick_from_volume(self.mnode, self.volname, + node_from_brick_replace, + servers_info_of_replaced_node, + src_brick=brick_to_replace) + self.assertTrue(ret, "Replace brick failed") + + self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on the mounts") + self.all_mounts_procs *= 0 + + err_msg = "Too many levels of symbolic links" + dd_log_file = distinct_dir + "/error.txt" + for each_client in self.clients: + ret = occurences_of_pattern_in_file(each_client, err_msg, + dd_log_file) + self.assertEqual(ret, 0, "Either file {} doesn't exist or {} " + "messages seen while replace brick operation " + "in-progress".format(dd_log_file, err_msg)) + + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + "Heal failed on the volume {}".format(self.volname)) diff --git a/tests/functional/disperse/test_ec_truncate_file_with_brick_down.py b/tests/functional/disperse/test_ec_truncate_file_with_brick_down.py new file mode 100755 index 000000000..ac9db90fa --- /dev/null +++ b/tests/functional/disperse/test_ec_truncate_file_with_brick_down.py @@ -0,0 +1,145 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +import time + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.lib_utils import is_core_file_created +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline, + are_bricks_online) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.misc.misc_libs import reboot_nodes_and_wait_to_come_online + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs', 'nfs']]) +class TestEcTruncateFileWithBrickDown(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + def test_ec_truncate_file_with_brick_down(self): + """ + Test steps: + 1. Create a volume, start and mount it on a client + 2. Bring down redundant bricks in the subvol + 3. Create a file on the volume using "touch" + 4. Truncate the file using "O_TRUNC" + 5. Bring the brick online + 6. Write data on the file and wait for heal completion + 7. Check for crashes and coredumps + """ + # pylint: disable=unsubscriptable-object + for restart_type in ("volume_start", "node_reboot"): + # Time stamp from mnode for checking cores at the end of test + ret, test_timestamp, _ = g.run(self.mnode, "date +%s") + self.assertEqual(ret, 0, "date command failed") + test_timestamp = test_timestamp.strip() + + # Create a file using touch + file_name = self.mounts[0].mountpoint + "/test_1" + ret, _, err = g.run(self.mounts[0].client_system, "touch {}". + format(file_name)) + self.assertEqual(ret, 0, "File creation failed") + g.log.info("File Created successfully") + + # List two bricks in each subvol + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + bricks_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + bricks_to_bring_offline.extend(sample(subvol, 2)) + + # Bring two bricks of each subvol offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, "Bricks are still online") + + # Validating the bricks are offline or not + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Few of the bricks are still online in" + " {} in".format(bricks_to_bring_offline)) + + # Truncate the file + cmd = ( + 'python -c "import os, sys; fd = os.open(\'{}\', os.O_TRUNC )' + '; os.close( fd )"').format(file_name) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info("File truncated successfully") + + # Bring back the bricks online + if restart_type == "volume_start": + # Bring back bricks online by volume start + ret, _, err = volume_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, err) + g.log.info("All bricks are online") + elif restart_type == "node_reboot": + # Bring back the bricks online by node restart + for brick in bricks_to_bring_offline: + node_to_reboot = brick.split(":")[0] + ret = reboot_nodes_and_wait_to_come_online(node_to_reboot) + self.assertTrue(ret, "Reboot Failed on node: " + "{}".format(node_to_reboot)) + g.log.info("Node: %s rebooted successfully", + node_to_reboot) + time.sleep(60) + + # Check whether bricks are online or not + ret = are_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Bricks {} are still offline". + format(bricks_to_bring_offline)) + + # write data to the file + cmd = ('python -c "import os, sys;fd = os.open(\'{}\', ' + 'os.O_RDWR) ;' + 'os.write(fd, \'This is test after truncate\'.encode());' + ' os.close(fd)"').format(file_name) + + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info("Data written successfully on to the file") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "Heal pending for file {}".format(file_name)) + + # check for any crashes on servers and client + for nodes in (self.servers, [self.clients[0]]): + ret = is_core_file_created(nodes, test_timestamp) + self.assertTrue(ret, + "Cores found on the {} nodes".format(nodes)) diff --git a/tests/functional/disperse/test_ec_uss_snapshot.py b/tests/functional/disperse/test_ec_uss_snapshot.py new file mode 100644 index 000000000..fec1754f6 --- /dev/null +++ b/tests/functional/disperse/test_ec_uss_snapshot.py @@ -0,0 +1,328 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test USS and snapshot on an EC volume +""" + +from unittest import skip +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.snap_ops import (snap_create, + snap_activate, + snap_delete, + snap_delete_all) +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.gluster.uss_ops import (enable_uss, + uss_list_snaps, + disable_uss) +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcUssSnapshot(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Delete all snapshots and disable uss + Cleanup and umount volume + """ + # Deleting all snapshot + ret, _, _ = snap_delete_all(self.mnode) + if ret: + raise ExecutionError("Snapshot Delete Failed") + g.log.info("Successfully deleted all snapshots") + + # Disable uss for volume + ret, _, _ = disable_uss(self.mnode, self.volname) + if ret: + raise ExecutionError("Failed to disable uss") + g.log.info("Successfully disabled uss for volume" + "%s", self.volname) + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + @skip('Skipping due to Bug 1828820') + def test_ec_uss_snapshot(self): + """ + - Start resource consumption tool + - Create directory dir1 + - Create 5 directory and 5 files in dir of mountpoint + - Rename all files inside dir1 at mountpoint + - Create softlink and hardlink of files in dir1 of mountpoint + - Delete op for deleting all file in one of the dirs inside dir1 + - Create tiny, small, medium and large file + - Create IO's + - Enable USS + - Create a Snapshot + - Activate Snapshot + - List snapshot and the contents inside snapshot + - Delete Snapshot + - Create Snapshot with same name + - Activate Snapshot + - List snapshot and the contents inside snapshot + - Validating IO's and waiting for it to complete + - Close connection and check file exist for memory log + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Starting resource consumption using top + log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log' + cmd = ("for i in {1..20};do top -n 1 -b|egrep " + "'RES|gluster' & free -h 2>&1 >> %s ;" + "sleep 10;done" % (log_file_mem_monitor)) + g.log.info(cmd) + cmd_list_procs = [] + for server in self.servers: + proc = g.run_async(server, cmd) + cmd_list_procs.append(proc) + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint. Start is an + # offset to be added to dirname to act on diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME;' + 'done;' + % (mount_obj.mountpoint, str(start + 1))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + start += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 2 " + "--dir-length 10 " + "--max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Enable USS + ret, _, _ = enable_uss(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to enable USS on volume") + g.log.info("Successfully enabled USS on volume") + + # Create Snapshot + ret, _, _ = snap_create(self.mnode, self.volname, + "ec_snap", timestamp=False) + self.assertEqual(ret, 0, "Failed to create snapshot ec_snap") + g.log.info("Snapshot ec_snap of volume %s created" + "successfully.", self.volname) + + # Activate snapshot + ret, _, _ = snap_activate(self.mnode, "ec_snap") + self.assertEqual(ret, 0, "Failed to activate snapshot ec_snap") + g.log.info("Snapshot activated successfully") + + # List contents inside snaphot and wait before listing + sleep(5) + for mount_obj in self.mounts: + ret, out, _ = uss_list_snaps(mount_obj.client_system, + mount_obj.mountpoint) + self.assertEqual(ret, 0, "Directory Listing Failed for" + " Activated Snapshot") + self.assertIn("ec_snap", out.split("\n"), "Failed to " + "validate ec_snap under .snaps directory") + g.log.info("Activated Snapshot listed Successfully") + + # Delete Snapshot ec_snap + ret, _, _ = snap_delete(self.mnode, "ec_snap") + self.assertEqual(ret, 0, "Failed to delete snapshot") + g.log.info("Snapshot deleted Successfully") + + # Creating snapshot with the same name + ret, _, _ = snap_create(self.mnode, self.volname, + "ec_snap", timestamp=False) + self.assertEqual(ret, 0, "Failed to create snapshot ec_snap") + g.log.info("Snapshot ec_snap of volume %s created" + "successfully.", self.volname) + + # Activate snapshot ec_snap + ret, _, _ = snap_activate(self.mnode, "ec_snap") + self.assertEqual(ret, 0, "Failed to activate snapshot ec_snap") + g.log.info("Snapshot activated successfully") + + # List contents inside ec_snap and wait before listing + sleep(5) + for mount_obj in self.mounts: + ret, out, _ = uss_list_snaps(mount_obj.client_system, + mount_obj.mountpoint) + self.assertEqual(ret, 0, "Directory Listing Failed for" + " Activated Snapshot") + self.assertIn("ec_snap", out.split('\n'), "Failed to " + "validate ec_snap under .snaps directory") + g.log.info("Activated Snapshot listed Successfully") + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all io's") + + # Close connection and check file exist for memory log + ret = file_exists(self.mnode, + '/var/log/glusterfs/mem_usage.log') + self.assertTrue(ret, "Unexpected:Memory log file does " + "not exist") + g.log.info("Memory log file exists") + for proc in cmd_list_procs: + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Memory logging failed") + g.log.info("Memory logging is successful") diff --git a/tests/functional/disperse/test_no_fresh_lookup_on_directory.py b/tests/functional/disperse/test_no_fresh_lookup_on_directory.py new file mode 100644 index 000000000..7be18b4e1 --- /dev/null +++ b/tests/functional/disperse/test_no_fresh_lookup_on_directory.py @@ -0,0 +1,183 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.brickdir import file_exists +from glustolibs.gluster.mount_ops import mount_volume, umount_volume +from glustolibs.gluster.volume_libs import set_volume_options, get_subvols +from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + get_online_bricks_list) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import wait_for_volume_process_to_be_online + + +@runs_on([['distributed-dispersed', 'distributed-replicated', + 'distributed-arbiter'], ['glusterfs']]) +class TestNoFreshLookUpBrickDown(GlusterBaseClass): + + def setUp(self): + """ + setUp method for every test + """ + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setting client log-level to Debug + self.volume['options'] = {'diagnostics.client-log-level': 'DEBUG'} + + # Creating Volume and mounting + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" % self.volname) + g.log.info("Volume is created and started") + + def tearDown(self): + """ + tearDown method for every test + """ + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + ret = umount_volume(mclient=self.mounts[0].client_system, + mpoint=self.mountpoint) + if not ret: + raise ExecutionError("Unable to umount the volume") + g.log.info("Unmounting of the volume %s succeeded", self.volname) + + # Resetting the volume option set in the setup + ret = set_volume_options(self.mnode, self.volname, + {'diagnostics.client-log-level': 'INFO'}) + if not ret: + raise ExecutionError("Unable to set the client log level to INFO") + g.log.info("Volume option is set successfully.") + + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Unable to perform volume clenaup") + g.log.info("Volume cleanup is successfull") + + def do_lookup(self, dirname): + """ + Performes a look up on the directory. + """ + ret = file_exists(self.mounts[0].client_system, dirname) + self.assertTrue(ret, "Directory %s doesn't exists " % dirname) + g.log.info("Directory present on the %s", + self.mounts[0].client_system) + + def match_occurences(self, first_count, search_pattern, filename): + """ + Validating the count of the search pattern before and after + lookup. + """ + newcount = occurences_of_pattern_in_file(self.mounts[0].client_system, + search_pattern, filename) + self.assertEqual(first_count, newcount, "Failed: The lookups logged" + " for the directory <dirname> are more than expected") + g.log.info("No more new lookups for the dir1") + + def test_no_fresh_lookup(self): + """ + The testcase covers negative lookup of a directory in distributed- + replicated and distributed-dispersed volumes + 1. Mount the volume on one client. + 2. Create a directory + 3. Validate the number of lookups for the directory creation from the + log file. + 4. Perform a new lookup of the directory + 5. No new lookups should have happened on the directory, validate from + the log file. + 6. Bring down one subvol of the volume and repeat step 4, 5 + 7. Bring down one brick from the online bricks and repeat step 4, 5 + 8. Start the volume with force and wait for all process to be online. + """ + + # Mounting the volume on a distinct directory for the validation of + # testcase + self.mountpoint = "/mnt/" + self.volname + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname) + g.log.info("Volume mounted successfully : %s", self.volname) + + # Distinct log file for the validation of this test + filename = "/var/log/glusterfs/mnt-" + self.volname + ".log" + # Creating a dir on the mount point. + dirname = self.mountpoint + "/dir1" + ret = mkdir(host=self.mounts[0].client_system, fqpath=dirname) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("dir1 created successfully for %s", + self.mounts[0].client_system) + + search_pattern = "/dir1: Calling fresh lookup" + + # Check log file for the pattern in the log file + first_count = occurences_of_pattern_in_file( + self.mounts[0].client_system, search_pattern, filename) + self.assertGreater(first_count, 0, "Unable to find " + "pattern in the given file") + g.log.info("Searched for the pattern in the log file successfully") + + # Perform a lookup of the directory dir1 + self.do_lookup(dirname) + + # Recheck for the number of lookups from the log file + self.match_occurences(first_count, search_pattern, filename) + + # Bring down one subvol of the volume + ret = get_subvols(self.mnode, self.volname) + brick_list = choice(ret['volume_subvols']) + ret = bring_bricks_offline(self.volname, brick_list) + self.assertTrue(ret, "Unable to bring the given bricks offline") + g.log.info("Able to bring all the bricks in the subvol offline") + + # Do a lookup on the mountpoint for the directory dir1 + self.do_lookup(dirname) + + # Re-check the number of occurences of lookup + self.match_occurences(first_count, search_pattern, filename) + + # From the online bricks, bring down one brick + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Unable to fetch online bricks") + g.log.info("Able to fetch the online bricks") + offline_brick = choice(online_bricks) + ret = bring_bricks_offline(self.volname, [offline_brick]) + self.assertTrue(ret, "Unable to bring the brick %s offline " % + offline_brick) + g.log.info("Successfully brought the brick %s offline", offline_brick) + + # Do a lookup on the mounpoint and check for new lookups from the log + self.do_lookup(dirname) + self.match_occurences(first_count, search_pattern, filename) + + # Start volume with force + ret, _, err = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Unable to force start the volume %s " % err) + g.log.info("Volume started successfully") + + # Wait for all the processess to be online. + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Some processes are offline") + g.log.info("All processes of the volume") |