summaryrefslogtreecommitdiffstats
path: root/tests/functional/arbiter/test_verify_metadata_and_data_heal.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional/arbiter/test_verify_metadata_and_data_heal.py')
-rw-r--r--tests/functional/arbiter/test_verify_metadata_and_data_heal.py297
1 files changed, 297 insertions, 0 deletions
diff --git a/tests/functional/arbiter/test_verify_metadata_and_data_heal.py b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py
new file mode 100644
index 0000000..d48e36e
--- /dev/null
+++ b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py
@@ -0,0 +1,297 @@
+# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+ bring_bricks_online,
+ get_online_bricks_list)
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_libs import (
+ is_heal_complete, is_volume_in_split_brain, monitor_heal_completion,
+ wait_for_self_heal_daemons_to_be_online)
+from glustolibs.gluster.heal_ops import (disable_self_heal_daemon,
+ enable_self_heal_daemon, trigger_heal)
+from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal,
+ del_user, group_add, group_del)
+from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.io.utils import list_all_files_and_dirs_mounts
+
+
+@runs_on([['arbiter', 'replicated'], ['glusterfs']])
+class TestMetadataAndDataHeal(GlusterBaseClass):
+ '''Description: Verify shd heals files after performing metadata and data
+ operations while a brick was down'''
+ def _dac_helper(self, host, option):
+ '''Helper for creating, deleting users and groups'''
+
+ # Permission/Ownership changes required only for `test_metadata..`
+ # tests, using random group and usernames
+ if 'metadata' not in self.test_dir:
+ return
+
+ if option == 'create':
+ # Groups
+ for group in ('qa_func', 'qa_system'):
+ if not group_add(host, group):
+ raise ExecutionError('Unable to {} group {} on '
+ '{}'.format(option, group, host))
+
+ # User
+ if not add_user(host, 'qa_all', group='qa_func'):
+ raise ExecutionError('Unable to {} user {} under {} on '
+ '{}'.format(option, 'qa_all', 'qa_func',
+ host))
+ elif option == 'delete':
+ # Groups
+ for group in ('qa_func', 'qa_system'):
+ if not group_del(host, group):
+ raise ExecutionError('Unable to {} group {} on '
+ '{}'.format(option, group, host))
+
+ # User
+ if not del_user(host, 'qa_all'):
+ raise ExecutionError('Unable to {} user on {}'.format(
+ option, host))
+
+ def setUp(self):
+ self.get_super_method(self, 'setUp')()
+
+ # A single mount is enough for all the tests
+ self.mounts = self.mounts[0:1]
+ self.client = self.mounts[0].client_system
+
+ # Use testcase name as test directory
+ self.test_dir = self.id().split('.')[-1]
+ self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir
+
+ if not self.setup_volume_and_mount_volume(mounts=self.mounts):
+ raise ExecutionError('Failed to setup and mount '
+ '{}'.format(self.volname))
+
+ # Crete group and user names required for the test
+ self._dac_helper(host=self.client, option='create')
+
+ def tearDown(self):
+ # Delete group and user names created as part of setup
+ self._dac_helper(host=self.client, option='delete')
+
+ if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts):
+ raise ExecutionError('Not able to unmount and cleanup '
+ '{}'.format(self.volname))
+
+ self.get_super_method(self, 'tearDown')()
+
+ def _perform_io_and_disable_self_heal(self):
+ '''Refactor of steps common to all tests: Perform IO, disable heal'''
+ ret = mkdir(self.client, self.fqpath)
+ self.assertTrue(ret,
+ 'Directory creation failed on {}'.format(self.client))
+ self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c '
+ # Create 6 dir's, 6 files and 6 files in each subdir with 10K data
+ file_io = ('''cd {0}; for i in `seq 1 6`;
+ do mkdir dir.$i; {1} 10K > file.$i;
+ for j in `seq 1 6`;
+ do {1} 10K > dir.$i/file.$j; done;
+ done;'''.format(self.fqpath, self.io_cmd))
+ ret, _, err = g.run(self.client, file_io)
+ self.assertEqual(ret, 0, 'Unable to create directories and data files')
+ self.assertFalse(err, '{0} failed with {1}'.format(file_io, err))
+
+ # Disable self heal deamon
+ self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname),
+ 'Disabling self-heal-daemon falied')
+
+ def _perform_brick_ops_and_enable_self_heal(self, op_type):
+ '''Refactor of steps common to all tests: Brick down and perform
+ metadata/data operations'''
+ # First brick in the subvol will always be online and used for self
+ # heal, so make keys match brick index
+ self.op_cmd = {
+ # Metadata Operations (owner and permission changes)
+ 'metadata': {
+ 2:
+ '''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \
+ dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''',
+ 3:
+ '''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \
+ dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''',
+ # 4 - Will be used for final data consistency check
+ 4:
+ '''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \
+ dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''',
+ },
+ # Data Operations (append data to the files)
+ 'data': {
+ 2:
+ '''cd {0}; for i in `seq 1 3`;
+ do {1} 2K >> file.$i;
+ for j in `seq 1 3`;
+ do {1} 2K >> dir.$i/file.$j; done;
+ done;''',
+ 3:
+ '''cd {0}; for i in `seq 1 3`;
+ do {1} 3K >> file.$i;
+ for j in `seq 1 3`;
+ do {1} 3K >> dir.$i/file.$j; done;
+ done;''',
+ # 4 - Will be used for final data consistency check
+ 4:
+ '''cd {0}; for i in `seq 1 6`;
+ do {1} 4K >> file.$i;
+ for j in `seq 1 6`;
+ do {1} 4K >> dir.$i/file.$j; done;
+ done;''',
+ },
+ }
+ bricks = get_online_bricks_list(self.mnode, self.volname)
+ self.assertIsNotNone(bricks,
+ 'Not able to get list of bricks in the volume')
+
+ # Make first brick always online and start operations from second brick
+ for index, brick in enumerate(bricks[1:], start=2):
+
+ # Bring brick offline
+ ret = bring_bricks_offline(self.volname, brick)
+ self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks))
+
+ # Perform metadata/data operation
+ cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd)
+ ret, _, err = g.run(self.client, cmd)
+ self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err))
+ self.assertFalse(err, '{0} failed with {1}'.format(cmd, err))
+
+ # Bring brick online
+ ret = bring_bricks_online(
+ self.mnode,
+ self.volname,
+ brick,
+ bring_bricks_online_methods='volume_start_force')
+
+ # Assert metadata/data operations resulted in pending heals
+ self.assertFalse(is_heal_complete(self.mnode, self.volname))
+
+ # Enable and wait self heal daemon to be online
+ self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname),
+ 'Enabling self heal daemon failed')
+ self.assertTrue(
+ wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname),
+ 'Not all self heal daemons are online')
+
+ def _validate_heal_completion_and_arequal(self, op_type):
+ '''Refactor of steps common to all tests: Validate heal from heal
+ commands, verify arequal, perform IO and verify arequal after IO'''
+
+ # Validate heal completion
+ self.assertTrue(monitor_heal_completion(self.mnode, self.volname),
+ 'Self heal is not completed within timeout')
+ self.assertFalse(
+ is_volume_in_split_brain(self.mnode, self.volname),
+ 'Volume is in split brain even after heal completion')
+
+ subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
+ self.assertTrue(subvols, 'Not able to get list of subvols')
+ arbiter = self.volume_type.find('arbiter') >= 0
+ stop = len(subvols[0]) - 1 if arbiter else len(subvols[0])
+
+ # Validate arequal
+ self._validate_arequal_and_perform_lookup(subvols, stop)
+
+ # Perform some additional metadata/data operations
+ cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd)
+ ret, _, err = g.run(self.client, cmd)
+ self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err))
+ self.assertFalse(err, '{0} failed with {1}'.format(cmd, err))
+
+ # Validate arequal after additional operations
+ self._validate_arequal_and_perform_lookup(subvols, stop)
+
+ def _validate_arequal_and_perform_lookup(self, subvols, stop):
+ '''Refactor of steps common to all tests: Validate arequal from bricks
+ backend and perform a lookup of all files from mount'''
+ for subvol in subvols:
+ ret, arequal = collect_bricks_arequal(subvol[0:stop])
+ self.assertTrue(
+ ret, 'Unable to get `arequal` checksum on '
+ '{}'.format(subvol[0:stop]))
+ self.assertEqual(
+ len(set(arequal)), 1, 'Mismatch of `arequal` '
+ 'checksum among {} is identified'.format(subvol[0:stop]))
+
+ # Perform a lookup of all files and directories on mounts
+ self.assertTrue(list_all_files_and_dirs_mounts(self.mounts),
+ 'Failed to list all files and dirs from mount')
+
+ def test_metadata_heal_from_shd(self):
+ '''Description: Verify files heal after switching on `self-heal-daemon`
+ when metadata operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform
+ metadata operations
+ 3. Set `self-heal-daemon` to `on` and wait for heal completion
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'metadata'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info('Pass: Verification of metadata heal after switching on '
+ '`self heal daemon` is complete')
+
+ def test_metadata_heal_from_heal_cmd(self):
+ '''Description: Verify files heal after triggering heal command when
+ metadata operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform
+ metadata operations
+ 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal`
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'metadata'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+
+ # Invoke `glfsheal`
+ self.assertTrue(trigger_heal(self.mnode, self.volname),
+ 'Unable to trigger index heal on the volume')
+
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info(
+ 'Pass: Verification of metadata heal via `glfsheal` is complete')
+
+ def test_data_heal_from_shd(self):
+ '''Description: Verify files heal after triggering heal command when
+ data operations are performed while a brick was down
+
+ Steps:
+ 1. Create, mount and run IO on volume
+ 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data
+ operations
+ 3. Set `self-heal-daemon` to `on` and wait for heal completion
+ 4. Validate areequal checksum on backend bricks
+ '''
+ op_type = 'data'
+ self._perform_io_and_disable_self_heal()
+ self._perform_brick_ops_and_enable_self_heal(op_type=op_type)
+ self._validate_heal_completion_and_arequal(op_type=op_type)
+ g.log.info('Pass: Verification of data heal after switching on '
+ '`self heal daemon` is complete')