diff options
| author | Leela Venkaiah G <lgangava@redhat.com> | 2020-08-10 16:59:28 +0530 | 
|---|---|---|
| committer | Arthy Loganathan <aloganat@redhat.com> | 2020-08-19 06:50:42 +0000 | 
| commit | 45fce18404361aa7093b3f8a85f50eda87fa5b82 (patch) | |
| tree | f5bb7865040b7bf1f162473fa276c260956f2e9c /tests | |
| parent | 81440d1bab4d43785b37d285877b235ddd9ac6b6 (diff) | |
[Test] Validate AFR, arbiter self-heal with IO
- Validate `heal info` returns before timeout with IO
- Validate `heal info` returns before timeout with IO and brick down
- Validate data heal on file append in AFR, arbiter
- Validate entry heal on file append in AFR, arbiter
Change-Id: I803b931cd82d97b5c20bd23cd5670cb9e6f04176
Signed-off-by: Leela Venkaiah G <lgangava@redhat.com>
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/functional/afr/test_repl_heal_with_io.py | 306 | 
1 files changed, 306 insertions, 0 deletions
diff --git a/tests/functional/afr/test_repl_heal_with_io.py b/tests/functional/afr/test_repl_heal_with_io.py new file mode 100644 index 000000000..efcb4a4e3 --- /dev/null +++ b/tests/functional/afr/test_repl_heal_with_io.py @@ -0,0 +1,306 @@ +#  Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +#  This program is free software; you can redistribute it and/or modify +#  it under the terms of the GNU General Public License as published by +#  the Free Software Foundation; either version 2 of the License, or +#  any later version. +# +#  This program is distributed in the hope that it will be useful, +#  but WITHOUT ANY WARRANTY; without even the implied warranty of +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#  GNU General Public License for more details. +# +#  You should have received a copy of the GNU General Public License along +#  with this program; if not, write to the Free Software Foundation, Inc., +#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from time import sleep, time + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import bring_bricks_offline +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.heal_ops import heal_info +from glustolibs.gluster.volume_libs import ( +    get_subvols, wait_for_volume_process_to_be_online) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.io.utils import wait_for_io_to_complete + + +@runs_on([[ +    'arbiter', 'distributed-arbiter', 'replicated', 'distributed-replicated' +], ['glusterfs', 'nfs']]) +class TestHealWithIO(GlusterBaseClass): +    def setUp(self): +        self.get_super_method(self, 'setUp')() + +        # A single mount is enough for all the tests +        self.mounts = [self.mounts[0]] + +        # For `test_heal_info_...` tests 6 replicas are needed +        if ('test_heal_info' in self.id().split('.')[-1] +                and self.volume_type.find('distributed') >= 0): +            self.volume['voltype']['dist_count'] = 6 + +        if not self.setup_volume_and_mount_volume(mounts=self.mounts): +            raise ExecutionError('Failed to setup and mount ' +                                 '{}'.format(self.volname)) + +        self.client, self.m_point = (self.mounts[0].client_system, +                                     self.mounts[0].mountpoint) +        self.file_path = self.m_point + '/test_file' +        self._io_cmd = ('cat /dev/urandom | tr -dc [:space:][:print:] | ' +                        'head -c {} ') +        # IO has to run for longer length for covering two scenarios in arbiter +        # volume type +        self.io_time = 600 if self.volume_type.find('arbiter') >= 0 else 300 +        self.proc = '' + +    def tearDown(self): +        if self.proc: +            ret = wait_for_io_to_complete([self.proc], [self.mounts[0]]) +            if not ret: +                raise ExecutionError('Wait for IO completion failed on client') + +        if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): +            raise ExecutionError('Not able to unmount and cleanup ' +                                 '{}'.format(self.volname)) +        self.get_super_method(self, 'tearDown')() + +    def _validate_heal(self, timeout=8): +        """ +        Validates `heal info` command returns in less than `timeout` value +        """ +        start_time = time() +        ret, _, _ = heal_info(self.mnode, self.volname) +        end_time = time() +        self.assertEqual(ret, 0, 'Not able to query heal info status') +        self.assertLess( +            end_time - start_time, timeout, +            'Query of heal info of volume took more than {} ' +            'seconds'.format(timeout)) + +    def _validate_io(self, delay=5): +        """ +        Validates IO was happening during main test, measures by looking at +        time delay between issue and return of `async_communicate` +        """ +        start_time = time() +        ret, _, err = self.proc.async_communicate() +        end_time = time() +        self.assertEqual(ret, 0, 'IO failed to complete with error ' +                         '{}'.format(err)) +        self.assertGreater( +            end_time - start_time, delay, +            'Unable to validate IO was happening during main test') +        self.proc = '' + +    def _bring_brick_offline(self, bricks_list, arb_brick=False): +        """ +        Bring arbiter brick offline if `arb_brick` is true else one of data +        bricks will be offline'd +        """ +        # Pick up only `data` brick +        off_brick, b_type = bricks_list[:-1], 'data' +        if arb_brick: +            # Pick only `arbiter` brick +            off_brick, b_type = [bricks_list[-1]], 'arbiter' +        elif not arb_brick and self.volume_type.find('replicated') >= 0: +            # Should pick all bricks if voltype is `replicated` +            off_brick = bricks_list + +        ret = bring_bricks_offline(self.volname, choice(off_brick)) +        self.assertTrue(ret, +                        'Unable to bring `{}` brick offline'.format(b_type)) + +    def _get_hashed_subvol_index(self, subvols): +        """ +        Return `index` of hashed_volume from list of subvols +        """ +        index = 0 +        if self.volume_type.find('distributed') >= 0: +            hashed_subvol, index = find_hashed_subvol( +                subvols, '', +                self.file_path.rsplit('/', maxsplit=1)[1]) +            self.assertIsNotNone(hashed_subvol, +                                 'Unable to find hashed subvolume') +        return index + +    def _validate_brick_down_scenario(self, +                                      validate_heal=False, +                                      monitor_heal=False): +        """ +        Refactor of common steps across volume type for validating brick down +        scenario +        """ +        if validate_heal: +            # Wait for ample amount of IO to be written to file +            sleep(180) + +            # Validate heal info shows o/p and exit in <8s +            self._validate_heal() + +        # Force start volume and verify all process are online +        ret, _, _ = volume_start(self.mnode, self.volname, force=True) +        self.assertEqual(ret, 0, 'Unable to force start volume') + +        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) +        self.assertTrue( +            ret, 'Not able to confirm all process of volume are online') + +        if monitor_heal: +            # Wait for IO to be written to file +            sleep(30) + +            # Monitor heal and validate data was appended successfully to file +            ret = monitor_heal_completion(self.mnode, self.volname) +            self.assertTrue(ret, +                            'Self heal is not completed post brick online') + +    def _perform_heal_append_scenario(self): +        """ +        Refactor of common steps in `entry_heal` and `data_heal` tests +        """ +        # Find hashed subvol of the file with IO +        subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] +        index = self._get_hashed_subvol_index(subvols) + +        # Bring down one of the `data` bricks of hashed subvol +        self._bring_brick_offline(bricks_list=subvols[index]) + +        cmd = ('{} >> {}; '.format(self._io_cmd.format('1G'), self.file_path)) +        ret, _, _ = g.run(self.client, cmd) +        self.assertEqual( +            ret, 0, 'Unable to append 1G of data to existing ' +            'file on mount post offline of a brick') + +        # Start volume and verify all process are online +        self._validate_brick_down_scenario() + +        # Start conitnuous IO and monitor heal completion +        cmd = ('count={}; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' +               '((count--)); done;'.format(self.io_time, +                                           self._io_cmd.format('1M'), +                                           self.file_path)) +        self.proc = g.run_async(self.client, cmd) +        self._validate_brick_down_scenario(monitor_heal=True) + +        # Bring down `arbiter` brick and perform validation +        if self.volume_type.find('arbiter') >= 0: +            self._bring_brick_offline(bricks_list=subvols[index], +                                      arb_brick=True) +            self._validate_brick_down_scenario(monitor_heal=True) + +        self._validate_io() + +    def test_heal_info_with_io(self): +        """ +        Description: Validate heal info command with IO + +        Steps: +        - Create and mount a 6x3 replicated volume +        - Create a file and perform IO continuously on this file +        - While IOs are happening issue `heal info` command and validate o/p +          not taking much time +        """ +        cmd = ('count=90; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' +               '((count--)); done;'.format(self._io_cmd.format('5M'), +                                           self.file_path)) +        self.proc = g.run_async(self.client, cmd) + +        # Wait for IO to be written to file +        sleep(30) + +        # Validate heal info shows o/p and exit in <5s +        self._validate_heal() + +        # Validate IO was happening +        self._validate_io() + +        g.log.info('Pass: Test heal info with IO is complete') + +    def test_heal_info_with_io_and_brick_down(self): +        """ +        Description: Validate heal info command with IO and brick down + +        Steps: +        - Create and mount a 6x3 replicated volume +        - Create a file and perform IO continuously on this file +        - While IOs are happening, bring down one of the brick where the file +          is getting hashed to +        - After about a period of ~5 min issue `heal info` command and +          validate o/p not taking much time +        - Repeat the steps for arbiter on bringing arbiter brick down +        """ +        cmd = ('count={}; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' +               '((count--)); done;'.format(self.io_time, +                                           self._io_cmd.format('5M'), +                                           self.file_path)) +        self.proc = g.run_async(self.client, cmd) + +        # Wait for IO to be written to file +        sleep(30) + +        # Find hashed subvol of the file with IO +        subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] +        index = self._get_hashed_subvol_index(subvols) + +        # Bring down one of the `data` bricks of hashed subvol +        self._bring_brick_offline(bricks_list=subvols[index]) + +        # Validate heal and bring volume online +        self._validate_brick_down_scenario(validate_heal=True) + +        # Bring down `arbiter` brick and perform validation +        if self.volume_type.find('arbiter') >= 0: +            self._bring_brick_offline(bricks_list=subvols[index], +                                      arb_brick=True) + +            # Validate heal and bring volume online +            self._validate_brick_down_scenario(validate_heal=True) + +        self._validate_io() + +        g.log.info('Pass: Test heal info with IO and brick down is complete') + +    def test_data_heal_on_file_append(self): +        """ +        Description: Validate appends to a self healing file (data heal check) + +        Steps: +        - Create and mount a 1x2 replicated volume +        - Create a file of ~ 1GB from the mount +        - Bring down a brick and write more data to the file +        - Bring up the offline brick and validate appending data to the file +          succeeds while file self heals +        - Repeat the steps for arbiter on bringing arbiter brick down +        """ +        cmd = ('{} >> {}; '.format(self._io_cmd.format('1G'), self.file_path)) +        ret, _, _ = g.run(self.client, cmd) +        self.assertEqual(ret, 0, 'Unable to create 1G of file on mount') + +        # Perform `data_heal` test +        self._perform_heal_append_scenario() + +        g.log.info('Pass: Test data heal on file append is complete') + +    def test_entry_heal_on_file_append(self): +        """ +        Description: Validate appends to a self healing file (entry heal check) + +        Steps: +        - Create and mount a 1x2 replicated volume +        - Bring down a brick and write data to the file +        - Bring up the offline brick and validate appending data to the file +          succeeds while file self heals +        - Repeat the steps for arbiter on bringing arbiter brick down +        """ + +        # Perform `entry_heal` test +        self._perform_heal_append_scenario() + +        g.log.info('Pass: Test entry heal on file append is complete')  | 
