diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/functional/afr/test_metadata_self_heal_client_side_heal.py | 606 |
1 files changed, 606 insertions, 0 deletions
diff --git a/tests/functional/afr/test_metadata_self_heal_client_side_heal.py b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py new file mode 100644 index 000000000..166059276 --- /dev/null +++ b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py @@ -0,0 +1,606 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# pylint: disable=too-many-locals,too-many-statements,too-many-branches + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain) +from glustolibs.gluster.lib_utils import (add_user, del_user, + collect_bricks_arequal) +from glustolibs.gluster.mount_ops import (umount_volume, + mount_volume) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete, + collect_mounts_arequal) + + +@runs_on([['distributed-replicated', 'replicated'], + ['glusterfs']]) +class TestAFRMetaDataSelfHealClientSideHeal(GlusterBaseClass): + @classmethod + def setUpClass(cls): + + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not upload_scripts(cls.clients, [cls.script_upload_path]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Create users + self.users = ['qa_func', 'qa_system', 'qa_perf', 'qa_all'] + for mount_object in self.mounts: + for user in self.users: + if not add_user(mount_object.client_system, user): + raise ExecutionError("Failed to create user " + "{}".format(user)) + g.log.info("Successfully created all users.") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status. + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + if not list_all_files_and_dirs_mounts(self.mounts): + raise ExecutionError("Failed to list all files and dirs") + g.log.info("Listing all files and directories is successful") + + # Delete user + for mount_object in self.mounts: + for user in self.users: + if not del_user(mount_object.client_system, user): + raise ExecutionError("Failed to delete user: {}" + .format(user)) + g.log.info("Successfully deleted all users") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def trigger_heal_from_mount_point(self): + """ + Trigger heal from mount point using read. + """ + # Unmouting and remounting volume to update the volume graph + # in client. + ret, _, _ = umount_volume( + self.mounts[0].client_system, self.mounts[0].mountpoint) + self.assertFalse(ret, "Failed to unmount volume.") + + ret, _, _ = mount_volume( + self.volname, 'glusterfs', self.mounts[0].mountpoint, + self.mnode, self.mounts[0].client_system) + self.assertFalse(ret, "Failed to remount volume.") + g.log.info('Successfully umounted and remounted volume.') + + # Trigger heal from client side + cmd = ("/usr/bin/env python {0} read {1}/{2}".format( + self.script_upload_path, self.mounts[0].mountpoint, + self.test_meta_data_self_heal_folder)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, 'Failed to trigger heal on %s' + % self.mounts[0].client_system) + g.log.info("Successfully triggered heal from mount point.") + + def validate_io_on_clients(self): + """ + Validate I/O on client mount points. + """ + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + def check_arequal_from_mount_point_and_bricks(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + # Check arequals for "replicated" + all_bricks = get_all_bricks(self.mnode, self.volname) + if self.volume_type == "replicated": + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Get arequal on bricks and compare with mount_point_total + ret, arequals = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(mount_point_total, brick_total, + 'Arequals for mountpoint and brick ' + 'are not equal') + g.log.info('Arequals for mountpoint and brick are equal') + g.log.info('All arequals are equal for replicated') + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + g.log.info("Number of subvolumes in volume %s:", num_subvols) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + subvol_brick_list = subvols_dict['volume_subvols'][i] + ret, arequal = collect_bricks_arequal([subvol_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first') + + # Get arequal for every brick and compare with first brick + first_brick_total = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(subvol_brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(first_brick_total, brick_total, + 'Arequals for subvol and brick are ' + 'not equal') + g.log.info('Arequals for subvol and brick are equal') + g.log.info('All arequals are equal for distributed-replicated') + + def check_permssions_on_bricks(self, bricks_list): + """ + Check permssions on a given set of bricks. + """ + for brick in bricks_list: + node, brick_path = brick.split(':') + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, folder)) + + self.assertEqual('555', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + # Verify for group for each file + if file_list: + for file_name in file_list: + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 1..50 + for i in range(1, 51): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('666', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 51..100 + for i in range(51, 101): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('444', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 1..35 + for i in range(1, 36): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 36..70 + for i in range(36, 71): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 71..100 + for i in range(71, 101): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + def test_metadata_self_heal_client_side_heal(self): + """ + Testcase steps: + 1.Turn off the options self heal daemon + 2.Create IO + 3.Calculate arequal of the bricks and mount point + 4.Bring down "brick1" process + 5.Change the permissions of the directories and files + 6.Change the ownership of the directories and files + 7.Change the group of the directories and files + 8.Bring back the brick "brick1" process + 9.Execute "find . | xargs stat" from the mount point to trigger heal + 10.Verify the changes in permissions are not self healed on brick1 + 11.Verify the changes in permissions on all bricks but brick1 + 12.Verify the changes in ownership are not self healed on brick1 + 13.Verify the changes in ownership on all the bricks but brick1 + 14.Verify the changes in group are not successfully self-healed + on brick1 + 15.Verify the changes in group on all the bricks but brick1 + 16.Turn on the option metadata-self-heal + 17.Execute "find . | xargs md5sum" from the mount point to trgger heal + 18.Wait for heal to complete + 19.Verify the changes in permissions are self-healed on brick1 + 20.Verify the changes in ownership are successfully self-healed + on brick1 + 21.Verify the changes in group are successfully self-healed on brick1 + 22.Calculate arequal check on all the bricks and mount point + """ + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"self-heal-daemon": "off"}) + self.assertTrue(ret, 'Failed to set options self-heal-daemon ' + 'and metadata-self-heal to OFF') + g.log.info("Options are set successfully") + + # Creating files on client side + self.test_meta_data_self_heal_folder = 'test_meta_data_self_heal' + for mount_object in self.mounts: + command = ("cd {0}/ ; mkdir {1} ; cd {1}/ ;" + "for i in `seq 1 100` ; " + "do mkdir dir.$i ; " + "for j in `seq 1 5` ; " + "do dd if=/dev/urandom of=dir.$i/file.$j " + "bs=1K count=$j ; done ; done ;".format + (mount_object.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_object.client_system, command, + user=mount_object.user) + self.all_mounts_procs.append(proc) + + # Validate IO + self.validate_io_on_clients() + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() + + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + bricks_to_bring_offline = [] + bricks_to_be_online = [] + for subvol in subvols: + bricks_to_bring_offline.append(subvol[0]) + for brick in subvol[1:]: + bricks_to_be_online.append(brick) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Change the permissions of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chmod 555 dir.$i ; done ; ' + 'for i in `seq 1 50` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 666 dir.$i/file.$j ; done ; done ; ' + 'for i in `seq 51 100` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 444 dir.$i/file.$j ; done ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the ownership of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{} ; ' + 'for i in `seq 1 35` ; ' + 'do chown -R qa_func dir.$i ; done ; ' + 'for i in `seq 36 70` ; ' + 'do chown -R qa_system dir.$i ; done ; ' + 'for i in `seq 71 100` ; ' + 'do chown -R qa_perf dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the group of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chgrp -R qa_all dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Verify the changes are not self healed on brick1 for each subvol + for brick in bricks_to_bring_offline: + node, brick_path = brick.split(':') + + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + + self.assertEqual('755', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('644', ret['access'], + "Permissions mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node" + " {} for file {}".format(node, + file_name)) + + # Verify the changes are self healed on all bricks except brick1 + # for each subvol + self.check_permssions_on_bricks(bricks_to_be_online) + + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"metadata-self-heal": "on"}) + self.assertTrue(ret, 'Failed to set options to ON.') + g.log.info("Options are set successfully") + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Verify the changes are self healed on brick1 for each subvol + self.check_permssions_on_bricks(bricks_to_bring_offline) + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() |