# Copyright (C) 2016-2017 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_ops import (set_volume_options, get_volume_options) from glustolibs.gluster.volume_libs import get_subvols from glustolibs.gluster.brick_libs import (bring_bricks_offline, bring_bricks_online, are_bricks_offline, get_all_bricks) from glustolibs.gluster.heal_libs import (monitor_heal_completion, is_heal_complete, is_volume_in_split_brain) from glustolibs.io.utils import (validate_io_procs, list_all_files_and_dirs_mounts, wait_for_io_to_complete) @runs_on([['distributed-replicated'], ['glusterfs', 'nfs', 'cifs']]) class VolumeSetDataSelfHealTests(GlusterBaseClass): def setUp(self): """ setUp method for every test """ # calling GlusterBaseClass setUp GlusterBaseClass.setUp.im_func(self) self.all_mounts_procs = [] self.io_validation_complete = False # Setup Volume and Mount Volume g.log.info("Starting to Setup Volume %s", self.volname) ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") def tearDown(self): """ If test method failed before validating IO, tearDown waits for the IO's to complete and checks for the IO exit status Cleanup and umount volume """ if not self.io_validation_complete: g.log.info("Wait for IO to complete as IO validation did not " "succeed in test method") ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) if not ret: raise ExecutionError("IO failed on some of the clients") g.log.info("IO is successful on all mounts") # List all files and dirs created g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) if not ret: raise ExecutionError("Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") # Cleanup and umount volume g.log.info("Starting to Unmount Volume and Cleanup Volume") ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: raise ExecutionError("Failed to umount the vol & cleanup Volume") g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass teardown GlusterBaseClass.tearDown.im_func(self) def test_volume_set_option_data_self_heal(self): """ - turn off self-heal-daemon option - turn off data-self-heal option - check if the options are set correctly - create IO - calculate arequal If it is distribute-replicate, the areequal-check sum of nodes in each replica set should match - bring down "brick1" - modify IO - bring back the brick1 - execute "find . | xargs stat" from the mount point to trigger background data self-heal - calculate arequal If it is distribute-replicate, arequal's checksum of brick which was down should not match with the bricks which was up in the replica set but for other replicaset where all bricks are up should match the areequal-checksum - check if the data of existing files are not modified in brick1 - turn on the option data-self-heal - execute "find . -type f | xargs md5sum" from the mount point - wait for heal to complete - calculate areequal If it is distribute-replicate, the areequal-check sum of nodes in each replica set should match """ # pylint: disable=too-many-locals,too-many-statements,too-many-branches all_bricks = get_all_bricks(self.mnode, self.volname) # Setting options options = {"self-heal-daemon": "off", "data-self-heal": "off"} g.log.info('Setting options %s...', options) ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options %s' % options) g.log.info("Successfully set %s for volume %s", options, self.volname) # Check if options are set to off options_dict = get_volume_options(self.mnode, self.volname) self.assertEqual(options_dict['cluster.self-heal-daemon'], 'off', 'Option self-heal-daemon is not set to off') self.assertEqual(options_dict['cluster.data-self-heal'], 'off', 'Option data-self-heal is not set to off') g.log.info('Option are set to off: %s', options) # Creating files on client side for mount_obj in self.mounts: g.log.info("Generating data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files and dirs...') command = ('cd %s ; ' 'mkdir test_data_self_heal ;' 'cd test_data_self_heal ; ' 'for i in `seq 1 100` ; ' 'do dd if=/dev/urandom of=file.$i bs=128K count=$i ; ' 'done ;' % mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO g.log.info("Wait for IO to complete and validate IO ...") ret = validate_io_procs(self.all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") self.io_validation_complete = True g.log.info("IO is successful on all mounts") # Check arequals # get the subvolumes g.log.info("Starting to get sub-volumes for volume %s", self.volname) subvols_dict = get_subvols(self.mnode, self.volname) num_subvols = len(subvols_dict['volume_subvols']) g.log.info("Number of subvolumes in volume: %s", num_subvols) # Get arequals and compare for i in range(0, num_subvols): # Get arequal for first brick subvol_brick_list = subvols_dict['volume_subvols'][i] node, brick_path = subvol_brick_list[0].split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, arequal, _ = g.run(node, command) first_brick_total = arequal.splitlines()[-1].split(':')[-1] # Get arequal for every brick and compare with first brick for brick in subvol_brick_list: node, brick_path = brick.split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, brick_arequal, _ = g.run(node, command) self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick) g.log.info('Getting arequal for %s is successful', brick) brick_total = brick_arequal.splitlines()[-1].split(':')[-1] self.assertEqual(first_brick_total, brick_total, 'Arequals for subvol and %s are not equal' % brick) g.log.info('Arequals for subvol and %s are equal', brick) g.log.info('All arequals are equal for distributed-replicated') # Select bricks to bring offline bricks_to_bring_offline = [get_all_bricks(self.mnode, self.volname)[0]] # Get files/dir size g.log.info('Getting file/dir list on brick to be offline') node, brick_path = bricks_to_bring_offline[0].split(':') # Get files/dir list command = 'cd %s ; ls' % brick_path ret, file_list, _ = g.run(node, command) self.assertFalse(ret, 'Failed to ls files on %s' % node) brick_file_dir_list = file_list.splitlines() # Get files/dir size before bringing brick offline g.log.info('Getting file/dir size on brick to be offline') brick_file_dir_dict_before_offline = {} for file_dir in brick_file_dir_list: command = 'cd %s ; du -h %s' % (brick_path, file_dir) ret, file_info, _ = g.run(node, command) self.assertFalse(ret, 'Failed to get file size on %s' % node) file_size = file_info.split('\t')[0] brick_file_dir_dict_before_offline[file_dir] = file_size # Bring brick 1 offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_to_bring_offline) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Modify data self.all_mounts_procs = [] for mount_obj in self.mounts: g.log.info("Adding data for %s:%s", mount_obj.client_system, mount_obj.mountpoint) # changing files g.log.info('Creating dirs and files...') command = ('cd test_data_self_heal ; ' 'for i in `seq 1 100` ; ' 'do dd if=/dev/urandom of=file.$i bs=512K count=$i ; ' 'done ;' % mount_obj.mountpoint) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) self.all_mounts_procs.append(proc) self.io_validation_complete = False # Validate IO g.log.info("Wait for IO to complete and validate IO ...") ret = validate_io_procs(self.all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") self.io_validation_complete = True g.log.info("IO is successful on all mounts") # Bring brick online g.log.info('Bringing bricks %s online...', bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Trigger heal from mount point g.log.info('Triggering heal from mount point...') for mount_obj in self.mounts: g.log.info("Triggering heal for %s:%s", mount_obj.client_system, mount_obj.mountpoint) command = ('cd %s/test_data_self_heal ; find . | xargs stat' % mount_obj.mountpoint) ret, _, _ = g.run(mount_obj.client_system, command) self.assertFalse(ret, 'Failed to start "find . | xargs stat" ' 'on %s' % mount_obj.client_system) # Check arequals g.log.info("Starting to get sub-volumes for volume %s", self.volname) subvols_dict = get_subvols(self.mnode, self.volname) subvols = subvols_dict['volume_subvols'] # Get arequals for first subvol and compare first_brick = all_bricks[0] node, brick_path = first_brick.split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, arequal, _ = g.run(node, command) first_brick_total = arequal.splitlines()[-1].split(':')[-1] for brick in subvols[0]: g.log.info('Getting arequal on bricks %s...', brick) node, brick_path = brick.split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, arequal, _ = g.run(node, command) self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick) g.log.info('Getting arequal for %s is successful', brick) brick_total = arequal.splitlines()[-1].split(':')[-1] if brick != first_brick: self.assertNotEqual(first_brick_total, brick_total, 'Arequals for mountpoint and %s ' 'are equal' % brick) g.log.info('Arequals for mountpoint and %s are not equal', brick) else: self.assertEqual(first_brick_total, brick_total, 'Arequals for mountpoint and %s ' 'are not equal' % brick) g.log.info('Arequals for mountpoint and %s are equal', brick) # Get arequals for all subvol except first and compare num_subvols = len(subvols_dict['volume_subvols']) for i in range(1, num_subvols): # Get arequal for first brick subvol_brick_list = subvols_dict['volume_subvols'][i] node, brick_path = subvol_brick_list[0].split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, arequal, _ = g.run(node, command) first_brick_total = arequal.splitlines()[-1].split(':')[-1] # Get arequal for every brick and compare with first brick for brick in subvol_brick_list: node, brick_path = brick.split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, brick_arequal, _ = g.run(node, command) self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick) g.log.info('Getting arequal for %s is successful', brick) brick_total = brick_arequal.splitlines()[-1].split(':')[-1] self.assertEqual(first_brick_total, brick_total, 'Arequals for subvol and %s are not equal' % brick) g.log.info('Arequals for subvol and %s are equal', brick) g.log.info('All arequals are equal for distributed-replicated') # Get files/dir size after bringing brick online g.log.info('Getting file/dir size on brick after bringing online') brick_file_dir_dict_after_online = {} for file_dir in brick_file_dir_list: command = 'cd %s ; du -h %s' % (brick_path, file_dir) ret, file_info, _ = g.run(node, command) self.assertFalse(ret, 'Failed to get file size on %s' % node) file_size = file_info.split('\t')[0] brick_file_dir_dict_after_online[file_dir] = file_size # Compare dicts with file size g.log.info('Compare file/dir size on brick before bringing offline and' ' after bringing online') self.assertFalse(cmp(brick_file_dir_dict_before_offline, brick_file_dir_dict_after_online), 'file/dir size on brick before bringing offline and ' 'after bringing online are not equal') g.log.info('file/dir size on brick before bringing offline and ' 'after bringing online are equal') # Setting options options = {"data-self-heal": "on"} g.log.info('Setting options %s...', options) ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options %s' % options) g.log.info("Option 'data-self-heal' is set to 'on' successfully") # Start heal from mount point g.log.info('Starting heal from mount point...') for mount_obj in self.mounts: g.log.info("Start heal for %s:%s", mount_obj.client_system, mount_obj.mountpoint) command = ('cd %s/test_data_self_heal ; ' ' find . | xargs md5sum' % mount_obj.mountpoint) _, _, _ = g.run(mount_obj.client_system, command) # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check if heal is completed ret = is_heal_complete(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Check arequals # get the subvolumes g.log.info("Starting to get sub-volumes for volume %s", self.volname) subvols_dict = get_subvols(self.mnode, self.volname) num_subvols = len(subvols_dict['volume_subvols']) g.log.info("Number of subvolumes in volume: %s", num_subvols) # Get arequals and compare for i in range(0, num_subvols): # Get arequal for first brick subvol_brick_list = subvols_dict['volume_subvols'][i] node, brick_path = subvol_brick_list[0].split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, arequal, _ = g.run(node, command) first_brick_total = arequal.splitlines()[-1].split(':')[-1] # Get arequal for every brick and compare with first brick for brick in subvol_brick_list: node, brick_path = brick.split(':') command = ('arequal-checksum -p %s ' '-i .glusterfs -i .landfill -i .trashcan' % brick_path) ret, brick_arequal, _ = g.run(node, command) self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick) g.log.info('Getting arequal for %s is successful', brick) brick_total = brick_arequal.splitlines()[-1].split(':')[-1] self.assertEqual(first_brick_total, brick_total, 'Arequals for subvol and %s are not equal' % brick) g.log.info('Arequals for subvol and %s are equal', brick) g.log.info('All arequals are equal for distributed-replicated')