diff options
Diffstat (limited to 'tests/functional/afr')
| -rwxr-xr-x | tests/functional/afr/heal/test_self_heal.py | 251 | 
1 files changed, 251 insertions, 0 deletions
| diff --git a/tests/functional/afr/heal/test_self_heal.py b/tests/functional/afr/heal/test_self_heal.py index cf4ac6b17..7837d958c 100755 --- a/tests/functional/afr/heal/test_self_heal.py +++ b/tests/functional/afr/heal/test_self_heal.py @@ -414,3 +414,254 @@ class TestSelfHeal(GlusterBaseClass):          self.assertTrue(ret, "IO failed on some of the clients")          self.io_validation_complete = True          g.log.info("IO is successful on all mounts") + +    def test_entry_self_heal_heal_command(self): +        """ +        Test Entry-Self-Heal (heal command) + +        Description: +        - set the volume option +        "metadata-self-heal": "off" +        "entry-self-heal": "off" +        "data-self-heal": "off" +        - create IO +        - get areequal before getting bricks offline +        - set the volume option +        "self-heal-daemon": "off" +        - bring down all bricks processes from selected set +        - get areequal after getting bricks offline and compare with +        arequal after bringing bricks offline +        - modify the data +        - get areequal before getting bricks online +        - bring bricks online +        - set the volume option +        "self-heal-daemon": "on" +        - check daemons and start healing +        - check if heal is completed +        - check for split-brain +        - get areequal after getting bricks online and compare with +        arequal before bringing bricks online +        """ + +        # Setting options +        g.log.info('Setting options...') +        options = {"metadata-self-heal": "off", +                   "entry-self-heal": "off", +                   "data-self-heal": "off", +                   } +        ret = set_volume_options(self.mnode, self.volname, options) +        self.assertTrue(ret, 'Failed to set options %s' % options) +        g.log.info("Options " +                   "'metadata-self-heal', " +                   "'entry-self-heal', " +                   "'data-self-heal', " +                   "are set to 'off'") + +        # Start IO on mounts +        g.log.info("Starting IO on all mounts...") +        self.all_mounts_procs = [] +        for mount_obj in self.mounts: +            g.log.info("Starting IO on %s:%s" +                       % (mount_obj.client_system, +                          mount_obj.mountpoint)) +            cmd = ("python %s create_deep_dirs_with_files " +                   "--dirname-start-num %d " +                   "--dir-length 2 " +                   "--dir-depth 2 " +                   "--max-num-of-dirs 2 " +                   "--num-of-files 20 %s" % (self.script_upload_path, +                                             self.counter, +                                             mount_obj.mountpoint)) +            proc = g.run_async(mount_obj.client_system, cmd, +                               user=mount_obj.user) +            self.all_mounts_procs.append(proc) +            self.counter = self.counter + 10 +            g.log.info("IO on %s:%s is started successfully" +                       % (mount_obj.client_system, +                          mount_obj.mountpoint)) +        self.io_validation_complete = False + +        # Validate IO +        g.log.info("Wait for IO to complete and validate IO ...") +        ret = validate_io_procs(self.all_mounts_procs, self.mounts) +        self.assertTrue(ret, "IO failed on some of the clients") +        self.io_validation_complete = True +        g.log.info("IO is successful on all mounts") + +        # Command list to do different operations with data - +        # create, rename, copy and delete +        cmd_list = ["python %s create_files -f 20 %s", +                    "python %s mv -i '.trashcan' %s", +                    "python %s copy --dest-dir new_dir %s", +                    "python %s delete %s", +                    ] + +        for cmd in cmd_list: +            # Get areequal before getting bricks offline +            g.log.info('Getting areequal before getting bricks offline...') +            ret, result_before_offline = collect_mounts_arequal(self.mounts) +            self.assertTrue(ret, 'Failed to get arequal') +            g.log.info('Getting areequal before getting bricks offline ' +                       'is successful') + +            # Setting options +            g.log.info('Setting options...') +            options = {"self-heal-daemon": "off", +                       } +            ret = set_volume_options(self.mnode, self.volname, options) +            self.assertTrue(ret, 'Failed to set options %s' % options) +            g.log.info("Option 'self-heal-daemon' " +                       "is set to 'off' successfully") + +            # Select bricks to bring offline +            bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( +                self.mnode, self.volname)) +            bricks_to_bring_offline = filter(None, ( +                    bricks_to_bring_offline_dict['hot_tier_bricks'] + +                    bricks_to_bring_offline_dict['cold_tier_bricks'] + +                    bricks_to_bring_offline_dict['volume_bricks'])) + +            # Bring brick offline +            g.log.info('Bringing bricks %s offline...' +                       % bricks_to_bring_offline) +            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) +            self.assertTrue(ret, 'Failed to bring bricks %s offline' % +                            bricks_to_bring_offline) + +            ret = are_bricks_offline(self.mnode, self.volname, +                                     bricks_to_bring_offline) +            self.assertTrue(ret, 'Bricks %s are not offline' +                            % bricks_to_bring_offline) +            g.log.info('Bringing bricks %s offline is successful' +                       % bricks_to_bring_offline) + +            # Get areequal after getting bricks offline +            g.log.info('Getting areequal after getting bricks offline...') +            ret, result_after_offline = collect_mounts_arequal(self.mounts) +            self.assertTrue(ret, 'Failed to get arequal') +            g.log.info('Getting areequal after getting bricks offline ' +                       'is successful') + +            # Checking areequals before bringing bricks offline +            # and after bringing bricks offline +            self.assertEqual(result_before_offline, result_after_offline, +                             'Checksums are not equal') +            g.log.info('Checksums before bringing bricks offline ' +                       'and after bringing bricks offline are equal') + +            # Modify the data +            g.log.info("Start modifying IO on all mounts...") +            self.all_mounts_procs = [] +            for mount_obj in self.mounts: +                g.log.info("Modifying IO on %s:%s", mount_obj.client_system, +                           mount_obj.mountpoint) +                cmd = cmd % (self.script_upload_path, mount_obj.mountpoint) +                proc = g.run_async(mount_obj.client_system, cmd, +                                   user=mount_obj.user) +                self.all_mounts_procs.append(proc) +                g.log.info("IO on %s:%s is modified successfully" +                           % (mount_obj.client_system, +                              mount_obj.mountpoint)) +            self.io_validation_complete = False + +            # Validate IO +            g.log.info("Wait for IO to complete and validate IO ...") +            ret = validate_io_procs(self.all_mounts_procs, self.mounts) +            self.assertTrue(ret, "IO failed on some of the clients") +            self.io_validation_complete = True +            g.log.info("IO is successful on all mounts") + +            # Get areequal before getting bricks online +            g.log.info('Getting areequal before getting bricks online...') +            ret, result_before_online = collect_mounts_arequal(self.mounts) +            self.assertTrue(ret, 'Failed to get arequal') +            g.log.info('Getting areequal before getting bricks online ' +                       'is successful') + +            # List all files and dirs created +            g.log.info("List all files and directories:") +            ret = list_all_files_and_dirs_mounts(self.mounts) +            if not ret: +                raise ExecutionError("Failed to list all files and dirs") +            g.log.info("Listing all files and directories is successful") + +            # Bring brick online +            g.log.info('Bringing bricks %s online...' +                       % bricks_to_bring_offline) +            ret = bring_bricks_online(self.mnode, self.volname, +                                      bricks_to_bring_offline) +            self.assertTrue(ret, 'Failed to bring bricks %s online' +                            % bricks_to_bring_offline) +            g.log.info('Bringing bricks %s online is successful' +                       % bricks_to_bring_offline) + +            # Setting options +            g.log.info('Setting options...') +            options = {"self-heal-daemon": "on", +                       } +            ret = set_volume_options(self.mnode, self.volname, options) +            self.assertTrue(ret, 'Failed to set options %s' % options) +            g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + +            # Wait for volume processes to be online +            g.log.info("Wait for volume processes to be online") +            ret = wait_for_volume_process_to_be_online(self.mnode, +                                                       self.volname) +            self.assertTrue(ret, ("Failed to wait for volume %s processes to " +                                  "be online", self.volname)) +            g.log.info("Successful in waiting for volume %s processes to be " +                       "online", self.volname) + +            # Verify volume's all process are online +            g.log.info("Verifying volume's all process are online") +            ret = verify_all_process_of_volume_are_online(self.mnode, +                                                          self.volname) +            self.assertTrue(ret, ("Volume %s : All process are not online" +                                  % self.volname)) +            g.log.info("Volume %s : All process are online" % self.volname) + +            # Wait for self-heal-daemons to be online +            g.log.info("Waiting for self-heal-daemons to be online") +            ret = is_shd_daemonized(self.all_servers) +            self.assertTrue(ret, "Either No self heal daemon process found") +            g.log.info("All self-heal-daemons are online") + +            # Start healing +            ret = trigger_heal(self.mnode, self.volname) +            self.assertTrue(ret, 'Heal is not started') +            g.log.info('Healing is started') + +            # Monitor heal completion +            ret = monitor_heal_completion(self.mnode, self.volname) +            self.assertTrue(ret, 'Heal has not yet completed') + +            # Check if heal is completed +            ret = is_heal_complete(self.mnode, self.volname) +            self.assertTrue(ret, 'Heal is not complete') +            g.log.info('Heal is completed successfully') + +            # Check for split-brain +            ret = is_volume_in_split_brain(self.mnode, self.volname) +            self.assertFalse(ret, 'Volume is in split-brain state') +            g.log.info('Volume is not in split-brain state') + +            # Get areequal after getting bricks online +            g.log.info('Getting areequal after getting bricks online...') +            ret, result_after_online = collect_mounts_arequal(self.mounts) +            self.assertTrue(ret, 'Failed to get arequal') +            g.log.info('Getting areequal after getting bricks online ' +                       'is successful') + +            # List all files and dirs created +            g.log.info("List all files and directories:") +            ret = list_all_files_and_dirs_mounts(self.mounts) +            if not ret: +                raise ExecutionError("Failed to list all files and dirs") +            g.log.info("Listing all files and directories is successful") + +            # Checking areequals before bringing bricks online +            # and after bringing bricks online +            self.assertEqual(result_before_online, result_after_online, +                             'Checksums are not equal') +            g.log.info('Checksums before bringing bricks online ' +                       'and after bringing bricks online are equal') | 
