From 94dd903a6829041b5791bbc36b309892f4cf3616 Mon Sep 17 00:00:00 2001 From: kshithijiyer Date: Wed, 29 Apr 2020 10:45:13 +0530 Subject: [Test] Add tc to check volume metadata-self-heal Testcase steps: 1.Turn off the options self heal daemon 2.Create IO 3.Calculate arequal of the bricks and mount point 4.Bring down "brick1" process 5.Change the permissions of the directories and files 6.Change the ownership of the directories and files 7.Change the group of the directories and files 8.Bring back the brick "brick1" process 9.Execute "find . | xargs stat" from the mount point to trigger heal 10.Verify the changes in permissions are not self healed on brick1 11.Verify the changes in permissions on all bricks but brick1 12.Verify the changes in ownership are not self healed on brick1 13.Verify the changes in ownership on all the bricks but brick1 14.Verify the changes in group are not successfully self-healed on brick1 15.Verify the changes in group on all the bricks but brick1 16.Turn on the option metadata-self-heal 17.Execute "find . | xargs md5sum" from the mount point to trgger heal 18.Wait for heal to complete 19.Verify the changes in permissions are self-healed on brick1 20.Verify the changes in ownership are successfully self-healed on brick1 21.Verify the changes in group are successfully self-healed on brick1 22.Calculate arequal check on all the bricks and mount point Change-Id: Ia7fb1b272c3c6bf85093690819b68bd83efefe14 Co-authored-by: Vitalii Koriakov Signed-off-by: Vitalii Koriakov Signed-off-by: kshithijiyer --- .../test_metadata_self_heal_client_side_heal.py | 606 +++++++++++++++++++++ 1 file changed, 606 insertions(+) create mode 100644 tests/functional/afr/test_metadata_self_heal_client_side_heal.py (limited to 'tests/functional/afr') diff --git a/tests/functional/afr/test_metadata_self_heal_client_side_heal.py b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py new file mode 100644 index 000000000..166059276 --- /dev/null +++ b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py @@ -0,0 +1,606 @@ +# Copyright (C) 2020 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# pylint: disable=too-many-locals,too-many-statements,too-many-branches + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain) +from glustolibs.gluster.lib_utils import (add_user, del_user, + collect_bricks_arequal) +from glustolibs.gluster.mount_ops import (umount_volume, + mount_volume) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete, + collect_mounts_arequal) + + +@runs_on([['distributed-replicated', 'replicated'], + ['glusterfs']]) +class TestAFRMetaDataSelfHealClientSideHeal(GlusterBaseClass): + @classmethod + def setUpClass(cls): + + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not upload_scripts(cls.clients, [cls.script_upload_path]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Create users + self.users = ['qa_func', 'qa_system', 'qa_perf', 'qa_all'] + for mount_object in self.mounts: + for user in self.users: + if not add_user(mount_object.client_system, user): + raise ExecutionError("Failed to create user " + "{}".format(user)) + g.log.info("Successfully created all users.") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status. + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + if not list_all_files_and_dirs_mounts(self.mounts): + raise ExecutionError("Failed to list all files and dirs") + g.log.info("Listing all files and directories is successful") + + # Delete user + for mount_object in self.mounts: + for user in self.users: + if not del_user(mount_object.client_system, user): + raise ExecutionError("Failed to delete user: {}" + .format(user)) + g.log.info("Successfully deleted all users") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def trigger_heal_from_mount_point(self): + """ + Trigger heal from mount point using read. + """ + # Unmouting and remounting volume to update the volume graph + # in client. + ret, _, _ = umount_volume( + self.mounts[0].client_system, self.mounts[0].mountpoint) + self.assertFalse(ret, "Failed to unmount volume.") + + ret, _, _ = mount_volume( + self.volname, 'glusterfs', self.mounts[0].mountpoint, + self.mnode, self.mounts[0].client_system) + self.assertFalse(ret, "Failed to remount volume.") + g.log.info('Successfully umounted and remounted volume.') + + # Trigger heal from client side + cmd = ("/usr/bin/env python {0} read {1}/{2}".format( + self.script_upload_path, self.mounts[0].mountpoint, + self.test_meta_data_self_heal_folder)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, 'Failed to trigger heal on %s' + % self.mounts[0].client_system) + g.log.info("Successfully triggered heal from mount point.") + + def validate_io_on_clients(self): + """ + Validate I/O on client mount points. + """ + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + def check_arequal_from_mount_point_and_bricks(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + # Check arequals for "replicated" + all_bricks = get_all_bricks(self.mnode, self.volname) + if self.volume_type == "replicated": + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Get arequal on bricks and compare with mount_point_total + ret, arequals = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(mount_point_total, brick_total, + 'Arequals for mountpoint and brick ' + 'are not equal') + g.log.info('Arequals for mountpoint and brick are equal') + g.log.info('All arequals are equal for replicated') + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + g.log.info("Number of subvolumes in volume %s:", num_subvols) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + subvol_brick_list = subvols_dict['volume_subvols'][i] + ret, arequal = collect_bricks_arequal([subvol_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first') + + # Get arequal for every brick and compare with first brick + first_brick_total = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(subvol_brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(first_brick_total, brick_total, + 'Arequals for subvol and brick are ' + 'not equal') + g.log.info('Arequals for subvol and brick are equal') + g.log.info('All arequals are equal for distributed-replicated') + + def check_permssions_on_bricks(self, bricks_list): + """ + Check permssions on a given set of bricks. + """ + for brick in bricks_list: + node, brick_path = brick.split(':') + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, folder)) + + self.assertEqual('555', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + # Verify for group for each file + if file_list: + for file_name in file_list: + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 1..50 + for i in range(1, 51): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('666', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 51..100 + for i in range(51, 101): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('444', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 1..35 + for i in range(1, 36): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 36..70 + for i in range(36, 71): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 71..100 + for i in range(71, 101): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + def test_metadata_self_heal_client_side_heal(self): + """ + Testcase steps: + 1.Turn off the options self heal daemon + 2.Create IO + 3.Calculate arequal of the bricks and mount point + 4.Bring down "brick1" process + 5.Change the permissions of the directories and files + 6.Change the ownership of the directories and files + 7.Change the group of the directories and files + 8.Bring back the brick "brick1" process + 9.Execute "find . | xargs stat" from the mount point to trigger heal + 10.Verify the changes in permissions are not self healed on brick1 + 11.Verify the changes in permissions on all bricks but brick1 + 12.Verify the changes in ownership are not self healed on brick1 + 13.Verify the changes in ownership on all the bricks but brick1 + 14.Verify the changes in group are not successfully self-healed + on brick1 + 15.Verify the changes in group on all the bricks but brick1 + 16.Turn on the option metadata-self-heal + 17.Execute "find . | xargs md5sum" from the mount point to trgger heal + 18.Wait for heal to complete + 19.Verify the changes in permissions are self-healed on brick1 + 20.Verify the changes in ownership are successfully self-healed + on brick1 + 21.Verify the changes in group are successfully self-healed on brick1 + 22.Calculate arequal check on all the bricks and mount point + """ + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"self-heal-daemon": "off"}) + self.assertTrue(ret, 'Failed to set options self-heal-daemon ' + 'and metadata-self-heal to OFF') + g.log.info("Options are set successfully") + + # Creating files on client side + self.test_meta_data_self_heal_folder = 'test_meta_data_self_heal' + for mount_object in self.mounts: + command = ("cd {0}/ ; mkdir {1} ; cd {1}/ ;" + "for i in `seq 1 100` ; " + "do mkdir dir.$i ; " + "for j in `seq 1 5` ; " + "do dd if=/dev/urandom of=dir.$i/file.$j " + "bs=1K count=$j ; done ; done ;".format + (mount_object.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_object.client_system, command, + user=mount_object.user) + self.all_mounts_procs.append(proc) + + # Validate IO + self.validate_io_on_clients() + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() + + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + bricks_to_bring_offline = [] + bricks_to_be_online = [] + for subvol in subvols: + bricks_to_bring_offline.append(subvol[0]) + for brick in subvol[1:]: + bricks_to_be_online.append(brick) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Change the permissions of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chmod 555 dir.$i ; done ; ' + 'for i in `seq 1 50` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 666 dir.$i/file.$j ; done ; done ; ' + 'for i in `seq 51 100` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 444 dir.$i/file.$j ; done ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the ownership of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{} ; ' + 'for i in `seq 1 35` ; ' + 'do chown -R qa_func dir.$i ; done ; ' + 'for i in `seq 36 70` ; ' + 'do chown -R qa_system dir.$i ; done ; ' + 'for i in `seq 71 100` ; ' + 'do chown -R qa_perf dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the group of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chgrp -R qa_all dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Verify the changes are not self healed on brick1 for each subvol + for brick in bricks_to_bring_offline: + node, brick_path = brick.split(':') + + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + + self.assertEqual('755', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('644', ret['access'], + "Permissions mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node" + " {} for file {}".format(node, + file_name)) + + # Verify the changes are self healed on all bricks except brick1 + # for each subvol + self.check_permssions_on_bricks(bricks_to_be_online) + + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"metadata-self-heal": "on"}) + self.assertTrue(ret, 'Failed to set options to ON.') + g.log.info("Options are set successfully") + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Verify the changes are self healed on brick1 for each subvol + self.check_permssions_on_bricks(bricks_to_bring_offline) + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() -- cgit