diff options
308 files changed, 37625 insertions, 4437 deletions
diff --git a/.github/ISSUE_TEMPLATE b/.github/ISSUE_TEMPLATE new file mode 100644 index 000000000..758a11961 --- /dev/null +++ b/.github/ISSUE_TEMPLATE @@ -0,0 +1,38 @@ +<!-- Please use this template while reporting an issue, providing as much information as possible to make debugging quicker. Thank you! --> + +**Description of problem:** + + +**Operating system**: + +**Python Version**: + +**Terminal dump of the issue observed:**: +<details> + +</details> + +**Exact glusto-tests log where the issue was observed:**: +<details> + + + +</details> + +**Setup info:** +<details> +Number of nodes: + +Number of clients: + +Number of servers: + +Testcase or library: + +How reproducible: + +</details> + +**Additional info:** + + diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 000000000..4a204b599 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,25 @@ +# Number of days of inactivity before an issue becomes stale +daysUntilStale: 90 +# Number of days of inactivity before a stale issue is closed +daysUntilClose: 14 +# Issues with these labels will never be considered stale +exemptLabels: + - ToBeTriaged + - ToBeFixed +# Label to use when marking an issue as stale +staleLabel: wontfix + +# Comment to post when marking an issue as stale. Set to `false` to disable +markComment: > + Thank you for your contributions. + + Noticed that this issue is not having any activity in last 3 months! We + are marking this issue as stale because it has not had any recent activity + and we have decided not to fix it while triaging. + + It will be closed in 14 days if no one responds with a comment here. + +# Comment to post when closing a stale issue. Set to `false` to disable +closeComment: > + Closing this issue as there was no update since my last update on issue. + If this is an issue which is still valid, feel free to reopen it. @@ -138,15 +138,11 @@ For more info about running tests on PyUnit, Pytest and Nose Tests, refer the [d glusto-tests can also be executed using `tox`: -``` # tox -e functional -- glusto -c 'config.yml' --pytest='-v -s -k test_demo1' -``` glusto-tests can also be executed with python3 using `tox`: -``` # tox -e functional3 -- glusto -c 'config.yml' --pytest='-v -s -k test_demo1' -``` **NOTE:** - Please note that glusto-tests is not completely compatible with python3. diff --git a/glustolibs-gluster/glustolibs/gluster/brick_libs.py b/glustolibs-gluster/glustolibs/gluster/brick_libs.py index c3e5afed8..b92832dd1 100644 --- a/glustolibs-gluster/glustolibs/gluster/brick_libs.py +++ b/glustolibs-gluster/glustolibs/gluster/brick_libs.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,20 +17,20 @@ """ Description: Module for gluster brick related helper functions. """ import random -from math import ceil +from math import floor import time from glusto.core import Glusto as g from glustolibs.gluster.brickmux_ops import is_brick_mux_enabled +from glustolibs.gluster.gluster_init import restart_glusterd from glustolibs.gluster.volume_ops import (get_volume_info, get_volume_status) -from glustolibs.gluster.volume_libs import (get_subvols, is_tiered_volume, +from glustolibs.gluster.volume_libs import (get_subvols, get_client_quorum_info, get_volume_type_info) +from glustolibs.gluster.lib_utils import (get_extended_attributes_info) def get_all_bricks(mnode, volname): """Get list of all the bricks of the specified volume. - If the volume is 'Tier' volume, the list will contain both - 'hot tier' and 'cold tier' bricks. Args: mnode (str): Node on which command has to be executed @@ -45,19 +45,7 @@ def get_all_bricks(mnode, volname): g.log.error("Unable to get the volinfo of %s.", volname) return None - if 'Tier' in volinfo[volname]['typeStr']: - # Get bricks from hot-tier in case of Tier volume - hot_tier_bricks = get_hot_tier_bricks(mnode, volname) - if hot_tier_bricks is None: - return None - # Get cold-tier bricks in case of Tier volume - cold_tier_bricks = get_cold_tier_bricks(mnode, volname) - if cold_tier_bricks is None: - return None - - return hot_tier_bricks + cold_tier_bricks - - # Get bricks from a non Tier volume + # Get bricks from a volume all_bricks = [] if 'bricks' in volinfo[volname]: if 'brick' in volinfo[volname]['bricks']: @@ -76,88 +64,6 @@ def get_all_bricks(mnode, volname): return None -def get_hot_tier_bricks(mnode, volname): - """Get list of hot-tier bricks of the specified volume - - Args: - mnode (str): Node on which command has to be executed - volname (str): Name of the volume - - Returns: - list : List of hot-tier bricks of the volume on Success. - NoneType: None on failure. - """ - volinfo = get_volume_info(mnode, volname) - if volinfo is None: - g.log.error("Unable to get the volinfo of %s.", volname) - return None - - if 'Tier' not in volinfo[volname]['typeStr']: - g.log.error("Volume %s is not a tiered volume", volname) - return None - - hot_tier_bricks = [] - if 'bricks' in volinfo[volname]: - if 'hotBricks' in volinfo[volname]['bricks']: - if 'brick' in volinfo[volname]['bricks']['hotBricks']: - for brick in volinfo[volname]['bricks']['hotBricks']['brick']: - if 'name' in brick: - hot_tier_bricks.append(brick['name']) - else: - g.log.error("brick %s doesn't have the key 'name' " - "for the volume: %s", brick, volname) - return None - else: - g.log.error("Bricks not found in hotBricks section of volume " - "info for the volume %s", volname) - return None - return hot_tier_bricks - else: - g.log.error("Bricks not found for the volume %s", volname) - return None - - -def get_cold_tier_bricks(mnode, volname): - """Get list of cold-tier bricks of the specified volume - - Args: - mnode (str): Node on which command has to be executed - volname (str): Name of the volume - - Returns: - list : List of cold-tier bricks of the volume on Success. - NoneType: None on failure. - """ - volinfo = get_volume_info(mnode, volname) - if volinfo is None: - g.log.error("Unable to get the volinfo of %s.", volname) - return None - - if 'Tier' not in volinfo[volname]['typeStr']: - g.log.error("Volume %s is not a tiered volume", volname) - return None - - cold_tier_bricks = [] - if 'bricks' in volinfo[volname]: - if 'coldBricks' in volinfo[volname]['bricks']: - if 'brick' in volinfo[volname]['bricks']['coldBricks']: - for brick in volinfo[volname]['bricks']['coldBricks']['brick']: - if 'name' in brick: - cold_tier_bricks.append(brick['name']) - else: - g.log.error("brick %s doesn't have the key 'name' " - "for the volume: %s", brick, volname) - return None - else: - g.log.error("Bricks not found in coldBricks section of volume " - "info for the volume %s", volname) - return None - return cold_tier_bricks - else: - g.log.error("Bricks not found for the volume %s", volname) - return None - - def bring_bricks_offline(volname, bricks_list, bring_bricks_offline_methods=None): """Bring the bricks specified in the bricks_list offline. @@ -304,10 +210,9 @@ def bring_bricks_online(mnode, volname, bricks_list, "the bricks '%s' online", volname, bricks_list) elif bring_brick_online_method == 'glusterd_restart': - bring_brick_online_command = "service glusterd restart" brick_node, _ = brick.split(":") - ret, _, _ = g.run(brick_node, bring_brick_online_command) - if ret != 0: + ret = restart_glusterd(brick_node) + if not ret: g.log.error("Unable to restart glusterd on node %s", brick_node) _rc = False @@ -504,41 +409,29 @@ def select_bricks_to_bring_offline(mnode, volname): being empty list. Example: brick_to_bring_offline = { - 'is_tier': False, - 'hot_tier_bricks': [], - 'cold_tier_bricks': [], 'volume_bricks': [] } """ # Defaulting the values to empty list bricks_to_bring_offline = { - 'is_tier': False, - 'hot_tier_bricks': [], - 'cold_tier_bricks': [], 'volume_bricks': [] - } + } volinfo = get_volume_info(mnode, volname) if volinfo is None: g.log.error("Unable to get the volume info for volume %s", volname) return bricks_to_bring_offline - if is_tiered_volume(mnode, volname): - bricks_to_bring_offline['is_tier'] = True - # Select bricks from tiered volume. - bricks_to_bring_offline = ( - select_tier_volume_bricks_to_bring_offline(mnode, volname)) - else: - # Select bricks from non-tiered volume. - volume_bricks = select_volume_bricks_to_bring_offline(mnode, volname) - bricks_to_bring_offline['volume_bricks'] = volume_bricks + # Select bricks from the volume. + volume_bricks = select_volume_bricks_to_bring_offline(mnode, volname) + bricks_to_bring_offline['volume_bricks'] = volume_bricks return bricks_to_bring_offline def select_volume_bricks_to_bring_offline(mnode, volname): """Randomly selects bricks to bring offline without affecting the cluster - from a non-tiered volume. + from a volume. Args: mnode (str): Node on which commands will be executed. @@ -546,14 +439,10 @@ def select_volume_bricks_to_bring_offline(mnode, volname): Returns: list: On success returns list of bricks that can be brough offline. - If volume doesn't exist or is a tiered volume returns empty list + If volume doesn't exist returns empty list """ volume_bricks_to_bring_offline = [] - # Check if volume is tiered - if is_tiered_volume(mnode, volname): - return volume_bricks_to_bring_offline - # get volume type volume_type_info = get_volume_type_info(mnode, volname) volume_type = volume_type_info['volume_type_info']['typeStr'] @@ -598,162 +487,6 @@ def select_volume_bricks_to_bring_offline(mnode, volname): return volume_bricks_to_bring_offline -def select_tier_volume_bricks_to_bring_offline(mnode, volname): - """Randomly selects bricks to bring offline without affecting the cluster - from a tiered volume. - - Args: - mnode (str): Node on which commands will be executed. - volname (str): Name of the volume. - - Returns: - dict: On success returns dict. Value of each key is list of bricks to - bring offline. - If volume doesn't exist or is not a tiered volume returns dict - with value of each item being empty list. - Example: - brick_to_bring_offline = { - 'hot_tier_bricks': [], - 'cold_tier_bricks': [], - } - """ - # Defaulting the values to empty list - bricks_to_bring_offline = { - 'hot_tier_bricks': [], - 'cold_tier_bricks': [], - } - - volinfo = get_volume_info(mnode, volname) - if volinfo is None: - g.log.error("Unable to get the volume info for volume %s", volname) - return bricks_to_bring_offline - - if is_tiered_volume(mnode, volname): - # Select bricks from both hot tier and cold tier. - hot_tier_bricks = (select_hot_tier_bricks_to_bring_offline - (mnode, volname)) - cold_tier_bricks = (select_cold_tier_bricks_to_bring_offline - (mnode, volname)) - bricks_to_bring_offline['hot_tier_bricks'] = hot_tier_bricks - bricks_to_bring_offline['cold_tier_bricks'] = cold_tier_bricks - return bricks_to_bring_offline - - -def select_hot_tier_bricks_to_bring_offline(mnode, volname): - """Randomly selects bricks to bring offline without affecting the cluster - from a hot tier. - - Args: - mnode (str): Node on which commands will be executed. - volname (str): Name of the volume. - - Returns: - list: On success returns list of bricks that can be brough offline - from hot tier. If volume doesn't exist or is a non tiered volume - returns empty list. - """ - hot_tier_bricks_to_bring_offline = [] - - # Check if volume is tiered - if not is_tiered_volume(mnode, volname): - return hot_tier_bricks_to_bring_offline - - # get volume type - volume_type_info = get_volume_type_info(mnode, volname) - hot_tier_type = volume_type_info['hot_tier_type_info']['hotBrickType'] - - # get subvols - subvols_dict = get_subvols(mnode, volname) - hot_tier_subvols = subvols_dict['hot_tier_subvols'] - - # select bricks from distribute volume - if hot_tier_type == 'Distribute': - hot_tier_bricks_to_bring_offline = [] - - # select bricks from replicated, distributed-replicated volume - if (hot_tier_type == 'Replicate' or - hot_tier_type == 'Distributed-Replicate'): - # Get replica count - hot_tier_replica_count = (volume_type_info - ['hot_tier_type_info']['hotreplicaCount']) - - # Get quorum info - quorum_info = get_client_quorum_info(mnode, volname) - hot_tier_quorum_info = quorum_info['hot_tier_quorum_info'] - - # Get list of bricks to bring offline - hot_tier_bricks_to_bring_offline = ( - get_bricks_to_bring_offline_from_replicated_volume( - hot_tier_subvols, hot_tier_replica_count, - hot_tier_quorum_info)) - - return hot_tier_bricks_to_bring_offline - - -def select_cold_tier_bricks_to_bring_offline(mnode, volname): - """Randomly selects bricks to bring offline without affecting the cluster - from a cold tier. - - Args: - mnode (str): Node on which commands will be executed. - volname (str): Name of the volume. - - Returns: - list: On success returns list of bricks that can be brough offline - from cold tier. If volume doesn't exist or is a non tiered volume - returns empty list. - """ - cold_tier_bricks_to_bring_offline = [] - - # Check if volume is tiered - if not is_tiered_volume(mnode, volname): - return cold_tier_bricks_to_bring_offline - - # get volume type - volume_type_info = get_volume_type_info(mnode, volname) - cold_tier_type = volume_type_info['cold_tier_type_info']['coldBrickType'] - - # get subvols - subvols_dict = get_subvols(mnode, volname) - cold_tier_subvols = subvols_dict['cold_tier_subvols'] - - # select bricks from distribute volume - if cold_tier_type == 'Distribute': - cold_tier_bricks_to_bring_offline = [] - - # select bricks from replicated, distributed-replicated volume - elif (cold_tier_type == 'Replicate' or - cold_tier_type == 'Distributed-Replicate'): - # Get replica count - cold_tier_replica_count = (volume_type_info['cold_tier_type_info'] - ['coldreplicaCount']) - - # Get quorum info - quorum_info = get_client_quorum_info(mnode, volname) - cold_tier_quorum_info = quorum_info['cold_tier_quorum_info'] - - # Get list of bricks to bring offline - cold_tier_bricks_to_bring_offline = ( - get_bricks_to_bring_offline_from_replicated_volume( - cold_tier_subvols, cold_tier_replica_count, - cold_tier_quorum_info)) - - # select bricks from Disperse, Distribured-Disperse volume - elif (cold_tier_type == 'Disperse' or - cold_tier_type == 'Distributed-Disperse'): - - # Get redundancy count - cold_tier_redundancy_count = (volume_type_info['cold_tier_type_info'] - ['coldredundancyCount']) - - # Get list of bricks to bring offline - cold_tier_bricks_to_bring_offline = ( - get_bricks_to_bring_offline_from_disperse_volume( - cold_tier_subvols, cold_tier_redundancy_count)) - - return cold_tier_bricks_to_bring_offline - - def get_bricks_to_bring_offline_from_replicated_volume(subvols_list, replica_count, quorum_info): @@ -761,13 +494,10 @@ def get_bricks_to_bring_offline_from_replicated_volume(subvols_list, for a replicated volume. Args: - subvols_list: list of subvols. It can be volume_subvols, - hot_tier_subvols or cold_tier_subvols. + subvols_list: list of subvols. For example: subvols = volume_libs.get_subvols(mnode, volname) volume_subvols = subvols_dict['volume_subvols'] - hot_tier_subvols = subvols_dict['hot_tier_subvols'] - cold_tier_subvols = subvols_dict['cold_tier_subvols'] replica_count: Replica count of a Replicate or Distributed-Replicate volume. quorum_info: dict containing quorum info of the volume. The dict should @@ -776,8 +506,6 @@ def get_bricks_to_bring_offline_from_replicated_volume(subvols_list, For example: quorum_dict = get_client_quorum_info(mnode, volname) volume_quorum_info = quorum_info['volume_quorum_info'] - hot_tier_quorum_info = quorum_info['hot_tier_quorum_info'] - cold_tier_quorum_info = quorum_info['cold_tier_quorum_info'] Returns: list: List of bricks that can be brought offline without affecting the @@ -805,7 +533,7 @@ def get_bricks_to_bring_offline_from_replicated_volume(subvols_list, offline_bricks_limit = int(replica_count) - int(quorum_count) elif 'auto' in quorum_type: - offline_bricks_limit = ceil(int(replica_count) / 2) + offline_bricks_limit = floor(int(replica_count) // 2) elif quorum_type is None: offline_bricks_limit = int(replica_count) - 1 @@ -835,18 +563,15 @@ def get_bricks_to_bring_offline_from_disperse_volume(subvols_list, for a disperse volume. Args: - subvols_list: list of subvols. It can be volume_subvols, - hot_tier_subvols or cold_tier_subvols. + subvols_list: list of subvols. For example: subvols = volume_libs.get_subvols(mnode, volname) volume_subvols = subvols_dict['volume_subvols'] - hot_tier_subvols = subvols_dict['hot_tier_subvols'] - cold_tier_subvols = subvols_dict['cold_tier_subvols'] redundancy_count: Redundancy count of a Disperse or Distributed-Disperse volume. Returns: - list: List of bricks that can be brought offline without affecting the + list: List of bricks that can be brought offline without affecting the cluster.On any failure return empty list. """ list_of_bricks_to_bring_offline = [] @@ -927,3 +652,42 @@ def is_broken_symlinks_present_on_bricks(mnode, volname): "%s on node %s.", (brick_path, brick_node)) return True return False + + +def validate_xattr_on_all_bricks(bricks_list, file_path, xattr): + """Checks if the xattr of the file/dir is same on all bricks. + + Args: + bricks_list (list): List of bricks. + file_path (str): The path to the file/dir. + xattr (str): The file attribute to get from file. + + Returns: + True if the xattr is same on all the fqpath. False otherwise + + Example: + validate_xattr_on_all_bricks("bricks_list", + "dir1/file1", + "xattr") + """ + + time_counter = 250 + g.log.info("The heal monitoring timeout is : %d minutes", + (time_counter // 60)) + while time_counter > 0: + attr_vals = {} + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + attr_vals[brick] = ( + get_extended_attributes_info(brick_node, + ["{0}/{1}".format(brick_path, + file_path)], + attr_name=xattr)) + ec_version_vals = [list(val.values())[0][xattr] for val in + list(attr_vals.values())] + if len(set(ec_version_vals)) == 1: + return True + else: + time.sleep(120) + time_counter -= 120 + return False diff --git a/glustolibs-gluster/glustolibs/gluster/brickdir.py b/glustolibs-gluster/glustolibs/gluster/brickdir.py index ffc868b93..e864e8247 100644 --- a/glustolibs-gluster/glustolibs/gluster/brickdir.py +++ b/glustolibs-gluster/glustolibs/gluster/brickdir.py @@ -20,7 +20,6 @@ import os from glusto.core import Glusto as g -from glustolibs.gluster.gluster_init import get_gluster_version from glustolibs.gluster.volume_libs import get_volume_type @@ -81,11 +80,12 @@ def get_hashrange(brickdir_path): """ (host, _) = brickdir_path.split(':') - gluster_version = get_gluster_version(host) - # Check for the Gluster version and then volume type - """If the GLuster version is lower than 6.0, the hash range - can be calculated for all volume types""" - if gluster_version < 6.0: + ret = get_volume_type(brickdir_path) + if ret in ('Replicate', 'Disperse', 'Arbiter'): + g.log.info("Cannot find hash-range for Replicate/Disperse/Arbiter" + " volume type on Gluster 6.0 and higher.") + return "Skipping for Replicate/Disperse/Arbiter volume type" + else: ret = check_hashrange(brickdir_path) hash_range_low = ret[0] hash_range_high = ret[1] @@ -94,24 +94,6 @@ def get_hashrange(brickdir_path): else: g.log.error("Could not get hashrange") return None - elif gluster_version >= 6.0: - ret = get_volume_type(brickdir_path) - if ret in ('Replicate', 'Disperse', 'Arbiter'): - g.log.info("Cannot find hash-range for Replicate/Disperse/Arbiter" - " volume type on Gluster 6.0 and higher.") - return "Skipping for Replicate/Disperse/Arbiter volume type" - else: - ret = check_hashrange(brickdir_path) - hash_range_low = ret[0] - hash_range_high = ret[1] - if ret is not None: - return (hash_range_low, hash_range_high) - else: - g.log.error("Could not get hashrange") - return None - else: - g.log.info("Failed to get hash range") - return None def file_exists(host, filename): @@ -149,22 +131,14 @@ class BrickDir(object): def _get_hashrange(self): """get the hash range for a brick from a remote system""" - gluster_version = get_gluster_version(self._host) - if gluster_version < 6.0: + ret = get_volume_type(self._path) + if ret in ('Replicate', 'Disperse', 'Arbiter'): + g.log.info("Cannot find hash-range as the volume type under" + " test is Replicate/Disperse/Arbiter") + else: self._hashrange = get_hashrange(self._path) self._hashrange_low = self._hashrange[0] self._hashrange_high = self._hashrange[1] - elif gluster_version >= 6.0: - ret = get_volume_type(self._path) - if ret in ('Replicate', 'Disperse', 'Arbiter'): - g.log.info("Cannot find hash-range as the volume type under" - " test is Replicate/Disperse/Arbiter") - else: - self._hashrange = get_hashrange(self._path) - self._hashrange_low = self._hashrange[0] - self._hashrange_high = self._hashrange[1] - else: - g.log.info("Failed to get hashrange") @property def path(self): @@ -207,12 +181,10 @@ class BrickDir(object): if self.hashrange is None or self._hashrange_high is None: self._get_hashrange() if self._get_hashrange() is None: - gluster_version = get_gluster_version(self._host) - if gluster_version >= 6.0: - ret = get_volume_type(self._path) - if ret in ('Replicate', 'Disperse', 'Arbiter'): - g.log.info("Cannot find hash-range as the volume type" - " under test is Replicate/Disperse/Arbiter") + ret = get_volume_type(self._path) + if ret in ('Replicate', 'Disperse', 'Arbiter'): + g.log.info("Cannot find hash-range as the volume type" + " under test is Replicate/Disperse/Arbiter") else: return self._hashrange_high diff --git a/glustolibs-gluster/glustolibs/gluster/brickmux_libs.py b/glustolibs-gluster/glustolibs/gluster/brickmux_libs.py index 1206b4682..cb82d8434 100644 --- a/glustolibs-gluster/glustolibs/gluster/brickmux_libs.py +++ b/glustolibs-gluster/glustolibs/gluster/brickmux_libs.py @@ -66,7 +66,10 @@ def get_all_bricks_from_servers_multivol(servers, servers_info): for item in list(zip_longest(*list(servers_bricks.values()))): for brick in item: - server = server_ip.next() + try: + server = server_ip.next() # Python 2 + except AttributeError: + server = next(server_ip) # Python 3 if brick: bricks_list.append(server + ":" + brick) brickCount += 1 diff --git a/glustolibs-gluster/glustolibs/gluster/brickmux_ops.py b/glustolibs-gluster/glustolibs/gluster/brickmux_ops.py index eeb4e2a50..b56434741 100755 --- a/glustolibs-gluster/glustolibs/gluster/brickmux_ops.py +++ b/glustolibs-gluster/glustolibs/gluster/brickmux_ops.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2017-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -119,7 +119,7 @@ def check_brick_pid_matches_glusterfsd_pid(mnode, volname): "of brick path %s", brick_node, brick_path) _rc = False - cmd = "pidof glusterfsd" + cmd = "pgrep -x glusterfsd" ret, pid, _ = g.run(brick_node, cmd) if ret != 0: g.log.error("Failed to run the command %s on " @@ -127,7 +127,7 @@ def check_brick_pid_matches_glusterfsd_pid(mnode, volname): _rc = False else: - glusterfsd_pid = pid.split() + glusterfsd_pid = pid.split('\n')[:-1] if brick_pid not in glusterfsd_pid: g.log.error("Brick pid %s doesn't match glusterfsd " @@ -149,8 +149,10 @@ def get_brick_processes_count(mnode): int: Number of brick processes running on the node. None: If the command fails to execute. """ - ret, out, _ = g.run(mnode, "pidof glusterfsd") + ret, out, _ = g.run(mnode, "pgrep -x glusterfsd") if not ret: - return len(out.split(" ")) + list_of_pids = out.split("\n") + list_of_pids.pop() + return len(list_of_pids) else: return None diff --git a/glustolibs-gluster/glustolibs/gluster/ctdb_libs.py b/glustolibs-gluster/glustolibs/gluster/ctdb_libs.py new file mode 100644 index 000000000..9dfa5f8f6 --- /dev/null +++ b/glustolibs-gluster/glustolibs/gluster/ctdb_libs.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Description: + Samba ctdb base classes. + Pre-requisite: + Please install samba ctdb packages + on all servers +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.ctdb_ops import ( + edit_hook_script, + enable_ctdb_cluster, + create_nodes_file, + create_public_address_file, + start_ctdb_service, + is_ctdb_status_healthy, + teardown_samba_ctdb_cluster) +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + setup_volume, + wait_for_volume_process_to_be_online) + + +class SambaCtdbBaseClass(GlusterBaseClass): + """ + Creates samba ctdb cluster + """ + @classmethod + def setUpClass(cls): + """ + Setup variable for samba ctdb test. + """ + super(SambaCtdbBaseClass, cls).setUpClass() + + cls.ctdb_volume_rep_count = int(len(cls.ctdb_nodes)) + cls.primary_node = cls.servers[0] + g.log.info("VOLUME REP COUNT %s", cls.ctdb_volume_rep_count) + + cls.ctdb_vips = (g.config['gluster']['cluster_config'] + ['smb']['ctdb_vips']) + cls.ctdb_nodes = (g.config['gluster']['cluster_config'] + ['smb']['ctdb_nodes']) + cls.ctdb_volname = (g.config['gluster']['cluster_config'] + ['smb']['ctdb_volname']) + cls.ctdb_volume_config = (g.config['gluster']['cluster_config']['smb'] + ['ctdb_volume_config']) + + @classmethod + def setup_samba_ctdb_cluster(cls): + """ + Create ctdb-samba cluster if doesn't exists + + Returns: + bool: True if successfully setup samba else false + """ + # Check if ctdb setup is up and running + if is_ctdb_status_healthy(cls.primary_node): + g.log.info("ctdb setup already up skipping " + "ctdb setup creation") + return True + g.log.info("Proceeding with ctdb setup creation") + for mnode in cls.servers: + ret = edit_hook_script(mnode, cls.ctdb_volname) + if not ret: + return False + ret = enable_ctdb_cluster(mnode) + if not ret: + return False + ret = create_nodes_file(mnode, cls.ctdb_nodes) + if not ret: + return False + ret = create_public_address_file(mnode, cls.ctdb_vips) + if not ret: + return False + server_info = cls.all_servers_info + ctdb_config = cls.ctdb_volume_config + g.log.info("Setting up ctdb volume %s", cls.ctdb_volname) + ret = setup_volume(mnode=cls.primary_node, + all_servers_info=server_info, + volume_config=ctdb_config) + if not ret: + g.log.error("Failed to setup ctdb volume %s", cls.ctdb_volname) + return False + g.log.info("Successful in setting up volume %s", cls.ctdb_volname) + + # Wait for volume processes to be online + g.log.info("Wait for volume %s processes to be online", + cls.ctdb_volname) + ret = wait_for_volume_process_to_be_online(cls.mnode, cls.ctdb_volname) + if not ret: + g.log.error("Failed to wait for volume %s processes to " + "be online", cls.ctdb_volname) + return False + g.log.info("Successful in waiting for volume %s processes to be " + "online", cls.ctdb_volname) + + # start ctdb services + ret = start_ctdb_service(cls.servers) + if not ret: + return False + + ret = is_ctdb_status_healthy(cls.primary_node) + if not ret: + g.log.error("CTDB setup creation failed - exiting") + return False + g.log.info("CTDB setup creation successfull") + return True + + @classmethod + def tearDownClass(cls, delete_samba_ctdb_cluster=False): + """ + Teardown samba ctdb cluster. + """ + super(SambaCtdbBaseClass, cls).tearDownClass() + + if delete_samba_ctdb_cluster: + ret = teardown_samba_ctdb_cluster( + cls.servers, cls.ctdb_volname) + if not ret: + raise ExecutionError("Cleanup of samba ctdb " + "cluster failed") + g.log.info("Teardown samba ctdb cluster succeeded") + else: + g.log.info("Skipping teardown samba ctdb cluster...") diff --git a/glustolibs-gluster/glustolibs/gluster/ctdb_ops.py b/glustolibs-gluster/glustolibs/gluster/ctdb_ops.py new file mode 100644 index 000000000..8bf57ba05 --- /dev/null +++ b/glustolibs-gluster/glustolibs/gluster/ctdb_ops.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python +# Copyright (C) 2020 Red Hat, Inc. <http://www.redeat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +CTDB library operations +pre-requisite : CTDB and Samba packages +needs to be installed on all the server nodes. +""" + +import re +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.lib_utils import (add_services_to_firewall, + is_rhel6, list_files) +from glustolibs.gluster.mount_ops import umount_volume +from glustolibs.gluster.volume_libs import cleanup_volume + + +def edit_hook_script(mnode, ctdb_volname): + """ + Edit the hook scripts with ctdb volume name + + Args: + mnode (str): Node on which commands has to be executed. + ctdb_volname (str): Name of the ctdb volume + Returns: + bool: True if successfully edits the hook-scripts else false + """ + # Replace META='all' to META=ctdb_volname setup hook script + cmd = ("sed -i -- 's/META=\"all\"/META=\"%s\"/g' " + "/var/lib/glusterd/hooks/1" + "/start/post/S29CTDBsetup.sh") + ret, _, _ = g.run(mnode, cmd % ctdb_volname) + if ret: + g.log.error("Hook script - S29CTDBsetup edit failed on %s", mnode) + return False + + g.log.info("Hook script - S29CTDBsetup edit success on %s", mnode) + # Replace META='all' to META=ctdb_volname teardown hook script + cmd = ("sed -i -- 's/META=\"all\"/META=\"%s\"/g' " + "/var/lib/glusterd/hooks/1" + "/stop/pre/S29CTDB-teardown.sh") + + ret, _, _ = g.run(mnode, cmd % ctdb_volname) + if ret: + g.log.error("Hook script - S29CTDB-teardown edit failed on %s", mnode) + return False + g.log.info("Hook script - S29CTDBteardown edit success on %s", mnode) + return True + + +def enable_ctdb_cluster(mnode): + """ + Edit the smb.conf to add clustering = yes + + Args: + mnode (str): Node on which commands has to be executed. + + Returns: + bool: True if successfully enable ctdb cluster else false + """ + # Add clustering = yes in smb.conf if not already there + cmd = (r"grep -q 'clustering = yes' " + r"/etc/samba/smb.conf || sed -i.bak '/\[global\]/a " + r"clustering = yes' /etc/samba/smb.conf") + ret, _, _ = g.run(mnode, cmd) + if ret: + g.log.error("Failed to add cluster = yes to smb.conf in %s", mnode) + return False + g.log.info("Successfully added 'clustering = yes' to smb.conf " + "in all nodes") + return True + + +def check_file_availability(mnode, file_path, filename): + """ + Check for ctdb files and delete + + Args: + mnode(str): Node on which command is executed + filepath(str): Absolute path of the file to be validated + filename(str): File to be deleted if available in /etc/ctdb/ + + Returns: + bool: True if concerned files are available else false + """ + if file_path in list_files(mnode, "/etc/ctdb/", filename): + ret, _, _ = g.run(mnode, "rm -rf %s" % file_path) + if ret: + return False + return True + + +def create_nodes_file(mnode, node_ips): + """ + Create nodes file and add node ips + + Args: + mnode (str): Node on which commands has to be executed. + + Returns: + bool: True if successfully create nodes file else false + """ + # check if nodes file is available and delete + node_file_path = "/etc/ctdb/nodes" + ret = check_file_availability(mnode, node_file_path, "nodes") + if not ret: + g.log.info("Failed to delete pre-existing nodes file in %s", mnode) + return False + g.log.info("Deleted pre-existing nodes file in %s", mnode) + for node_ip in node_ips: + ret, _, _ = g.run(mnode, "echo -e %s " + ">> %s" % (node_ip, node_file_path)) + if ret: + g.log.error("Failed to add nodes list in %s", mnode) + return False + g.log.info("Nodes list added succssfully to %s" + "file in all servers", node_file_path) + return True + + +def create_public_address_file(mnode, vips): + """ + Create public_addresses file and add vips + + Args: + mnode (str): Node on which commands has to be executed. + vips (list): List of virtual ips + + Returns: + bool: True if successfully creates public_address file else false + """ + publicip_file_path = "/etc/ctdb/public_addresses" + ret = check_file_availability(mnode, + publicip_file_path, + "public_addresses") + if not ret: + g.log.info("Failed to delete pre-existing public_addresses" + "file in %s", mnode) + return False + g.log.info("Deleted pre-existing public_addresses" + "file in %s", mnode) + for vip in vips: + ret, _, _ = g.run(mnode, "echo -e %s >>" + " %s" % (vip, publicip_file_path)) + if ret: + g.log.error("Failed to add vip list in %s", mnode) + return False + g.log.info("vip list added succssfully to %s" + "file in all node", publicip_file_path) + return True + + +def ctdb_service_status(servers, mnode): + """ + Status of ctdb service on the specified node. + + Args: + mnode (str): Node on which ctdb status needs to be checked + + Returns: + tuple: Tuple containing three elements (ret, out, err). + The first element 'ret' is of type 'int' and is the return value + of command execution. + + The second element 'out' is of type 'str' and is the stdout value + of the command execution. + + The third element 'err' is of type 'str' and is the stderr value + of the command execution. + """ + g.log.info("Getting ctdb service status on %s", mnode) + if is_rhel6(servers): + return g.run(mnode, "service ctdb status") + return g.run(mnode, "systemctl status ctdb") + + +def is_ctdb_service_running(servers, mnode): + """ + Check if ctdb service is running on node + + Args: + servers (str|list): list|str of cluster nodes + mnode (str): Node on which ctdb service has to be checked + + Returns: + bool: True if ctdb service running else False + """ + g.log.info("Check if ctdb service is running on %s", mnode) + ret, out, _ = ctdb_service_status(servers, mnode) + if ret: + g.log.error("Execution error service ctdb status " + "on %s", mnode) + return False + if "Active: active (running)" in out: + g.log.info("ctdb service is running on %s", mnode) + return True + else: + g.log.error("ctdb service is not " + "running on %s", mnode) + return False + + +def start_ctdb_service(servers): + """ + start ctdb services on all nodes & + wait for 40 seconds + + Args: + servers (list): IP of samba nodes + + Returns: + bool: True if successfully starts ctdb service else false + """ + cmd = "pgrep ctdb || service ctdb start" + for mnode in servers: + ret, out, _ = g.run(mnode, cmd) + if ret: + g.log.error("Unable to start ctdb on server %s", str(out)) + return False + if not is_ctdb_service_running(servers, mnode): + g.log.error("ctdb services not running %s", str(out)) + return False + g.log.info("Start ctdb on server %s successful", mnode) + # sleep for 40sec as ctdb status takes time to enable + sleep(40) + return True + + +def stop_ctdb_service(servers): + """ + stop ctdb services on all nodes + + Args: + servers (list): IP of samba nodes + + Returns: + bool: True if successfully stops ctdb service else false + """ + cmd = "service ctdb stop" + for mnode in servers: + ret, out, _ = g.run(mnode, cmd) + if ret: + g.log.error("Unable to stop ctdb on server %s", str(out)) + return False + if is_ctdb_service_running(servers, mnode): + g.log.error("ctdb services still running %s", str(out)) + return False + g.log.info("Stop ctdb on server %s successful", mnode) + return True + + +def ctdb_server_firewall_settings(servers): + """ + Do firewall settings for ctdb + + Args: + servers(list): IP of sambe nodes + + Returns: + bool: True if successfully added firewall services else false + """ + # List of services to enable + services = ['samba', 'rpc-bind'] + ret = add_services_to_firewall(servers, services, True) + if not ret: + g.log.error("Failed to set firewall zone " + "permanently on ctdb nodes") + return False + + # Add ctdb and samba port + if not is_rhel6(servers): + for mnode in servers: + ret, _, _ = g.run(mnode, "firewall-cmd --add-port=4379/tcp " + "--add-port=139/tcp") + if ret: + g.log.error("Failed to add firewall port in %s", mnode) + return False + g.log.info("samba ctdb port added successfully in %s", mnode) + ret, _, _ = g.run(mnode, "firewall-cmd --add-port=4379/tcp " + "--add-port=139/tcp --permanent") + if ret: + g.log.error("Failed to add firewall port permanently in %s", + mnode) + return False + return True + + +def parse_ctdb_status(status): + """ + Parse the ctdb status output + + Number of nodes:4 + pnn:0 <ip> OK (THIS NODE) + pnn:1 <ip> OK + pnn:2 <ip> OK + pnn:3 <ip> UHEALTHY + Generation:763624485 + Size:4 + hash:0 lmaster:0 + hash:1 lmaster:1 + hash:2 lmaster:2 + hash:3 lmaster:3 + Recovery mode:NORMAL (0) + Recovery master:3 + + Args: + status: output of ctdb status(string) + + Returns: + dict: {<ip>: status} + """ + cmd = r'pnn\:\d+\s*(\S+)\s*(\S+)' + ip_nodes = re.findall(cmd, status, re.S) + if ip_nodes: + # Empty dictionary to capture ctdb status output + node_status = {} + for item in ip_nodes: + node_status[item[0]] = item[1] + g.log.info("ctdb node status %s", node_status) + return node_status + else: + return {} + + +def ctdb_status(mnode): + """ + Execute ctdb status + + Args: + mnode(str): primary node out of the servers + + Returns: + tuple: Tuple containing three elements (ret, out, err). + The first element 'ret' is of type 'int' and is the return value + of command execution. + + The second element 'out' is of type 'str' and is the stdout value + of the command execution. + + The third element 'err' is of type 'str' and is the stderr value + of the command execution. + + """ + cmd = "ctdb status" + return g.run(mnode, cmd) + + +def is_ctdb_status_healthy(mnode): + """ + Check if ctdb is up & running + + Args: + mnode(str): primary node out of the servers + + Returns: + bool: True if ctdb status healthy else false + """ + # Get the ctdb status details + status_res = ctdb_status(mnode) + if status_res[0]: + g.log.info("CTDB is not enabled for the cluster") + return False + # Get the ctdb status output + output = status_res[1] + # Parse the ctdb status output + node_status = parse_ctdb_status(output) + if not node_status: + g.log.error("ctdb status return empty list") + return False + for node_ip, status in node_status.iteritems(): + # Check if ctdb status is OK or not + if node_status[node_ip] != 'OK': + g.log.error("CTDB node %s is %s", + str(node_ip), status) + return False + g.log.info("CTDB node %s is %s", + str(node_ip), status) + return True + + +def edit_hookscript_for_teardown(mnode, ctdb_volname): + """ + Edit the hook scripts with ctdb volume name + + Args: + mnode (str): Node on which commands has to be executed. + ctdb_volname (str): Name of ctdb volume + Returns: + bool: True if successfully edits hook-scripts else false + """ + # Replace META='ctdb_vol' to META=all setup hook script + cmd = ("sed -i -- 's/META=\"%s\"/META=\"all\"/g' " + "/var/lib/glusterd/hooks/1" + "/start/post/S29CTDBsetup.sh" % ctdb_volname) + ret, _, _ = g.run(mnode, cmd) + if ret: + g.log.error("Hook script - S29CTDBsetup edit failed on %s", mnode) + return False + + g.log.info("Hook script - S29CTDBsetup edit success on %s", mnode) + # Replace META='all' to META=ctdb_volname teardown hook script + cmd = ("sed -i -- 's/META=\"%s\"/META=\"all\"/g' " + "/var/lib/glusterd/hooks/1" + "/stop/pre/S29CTDB-teardown.sh" % ctdb_volname) + ret, _, _ = g.run(mnode, cmd) + if ret: + g.log.error("Hook script - S29CTDB-teardown edit failed on %s", mnode) + return False + g.log.info("Hook script - S29CTDBteardown edit success on %s", mnode) + return True + + +def teardown_samba_ctdb_cluster(servers, ctdb_volname): + """ + Tear down samba ctdb setup + + Args: + servers (list): Nodes in ctdb cluster to teardown entire + cluster + ctdb_volname (str): Name of ctdb volume + + Returns: + bool: True if successfully tear downs ctdb cluster else false + """ + + node_file_path = "/etc/ctdb/nodes" + publicip_file_path = "/etc/ctdb/public_addresses" + g.log.info("Executing force cleanup...") + # Stop ctdb service + if stop_ctdb_service(servers): + for mnode in servers: + # check if nodes file is available and delete + ret = check_file_availability(mnode, node_file_path, "nodes") + if not ret: + g.log.info("Failed to delete existing " + "nodes file in %s", mnode) + return False + g.log.info("Deleted existing nodes file in %s", mnode) + + # check if public_addresses file is available and delete + ret = check_file_availability(mnode, publicip_file_path, + "public_addresses") + if not ret: + g.log.info("Failed to delete existing public_addresses" + " file in %s", mnode) + return False + g.log.info("Deleted existing public_addresses" + "file in %s", mnode) + + ctdb_mount = '/gluster/lock' + ret, _, _ = umount_volume(mnode, ctdb_mount, 'glusterfs') + if ret: + g.log.error("Unable to unmount lock volume in %s", mnode) + return False + if not edit_hookscript_for_teardown(mnode, ctdb_volname): + return False + mnode = servers[0] + ret = cleanup_volume(mnode, ctdb_volname) + if not ret: + g.log.error("Failed to delete ctdb volume - %s", ctdb_volname) + return False + return True + return False diff --git a/glustolibs-gluster/glustolibs/gluster/dht_test_utils.py b/glustolibs-gluster/glustolibs/gluster/dht_test_utils.py index 55dcce5c7..11f2eda62 100644 --- a/glustolibs-gluster/glustolibs/gluster/dht_test_utils.py +++ b/glustolibs-gluster/glustolibs/gluster/dht_test_utils.py @@ -29,7 +29,6 @@ import glustolibs.gluster.constants as k import glustolibs.gluster.exceptions as gex from glustolibs.gluster.brickdir import BrickDir from glustolibs.gluster.volume_libs import get_subvols, get_volume_type -from glustolibs.gluster.gluster_init import get_gluster_version from glustolibs.misc.misc_libs import upload_scripts @@ -39,9 +38,8 @@ def run_layout_tests(mnode, fqpath, layout, test_type): brick_path_list = ret.get('brickdir_paths') for brickdir_path in brick_path_list: (server_ip, _) = brickdir_path.split(':') - if (get_gluster_version(server_ip) >= 6.0 and - get_volume_type(brickdir_path) in ('Replicate', 'Disperse', - 'Arbiter')): + if get_volume_type(brickdir_path) in ('Replicate', 'Disperse', + 'Arbiter'): g.log.info("Cannot check for layout completeness as" " volume under test is Replicate/Disperse/Arbiter") else: @@ -342,6 +340,44 @@ def find_new_hashed(subvols, parent_path, oldname): return None +def find_specific_hashed(subvols, parent_path, subvol, existing_names=None): + """ Finds filename that hashes to a specific subvol. + + Args: + subvols(list): list of subvols + parent_path(str): parent path (relative to mount) of "oldname" + subvol(str): The subvol to which the new name has to be hashed + existing_names(int|list): The name(s) already hashed to subvol + + Returns: + (Class Object): For success returns an object of type NewHashed + holding information pertaining to new name. + None, otherwise + Note: The new hash will be searched under the same parent + """ + # pylint: disable=protected-access + if not isinstance(existing_names, list): + existing_names = [existing_names] + brickobject = create_brickobjectlist(subvols, parent_path) + if brickobject is None: + g.log.error("could not form brickobject list") + return None + count = -1 + for item in range(1, 5000, 1): + newhash = calculate_hash(brickobject[0]._host, str(item)) + for brickdir in brickobject: + count += 1 + if (subvol._fqpath == brickdir._fqpath and + item not in existing_names): + ret = brickdir.hashrange_contains_hash(newhash) + if ret: + g.log.debug("oldhashed %s new %s count %s", + subvol, brickdir._host, str(count)) + return NewHashed(item, brickdir, count) + count = -1 + return None + + class NewHashed(object): ''' Helper Class to hold new hashed info @@ -420,3 +456,5 @@ def is_layout_complete(mnode, volname, dirpath): return False elif hash_difference < 1: g.log.error("Layout has overlaps") + + return True diff --git a/glustolibs-gluster/glustolibs/gluster/geo_rep_ops.py b/glustolibs-gluster/glustolibs/gluster/geo_rep_ops.py index 7e12113c9..7d0f5a73e 100755 --- a/glustolibs-gluster/glustolibs/gluster/geo_rep_ops.py +++ b/glustolibs-gluster/glustolibs/gluster/geo_rep_ops.py @@ -292,7 +292,7 @@ def georep_config_set(mnode, mastervol, slaveip, slavevol, config, value, """ if user: - cmd = ("gluster volume geo-replication %s %s::%s config %s %s" % + cmd = ("gluster volume geo-replication %s %s@%s::%s config %s %s" % (mastervol, user, slaveip, slavevol, config, value)) else: cmd = ("gluster volume geo-replication %s %s::%s config %s %s" % diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py index 0d8731994..65061cb13 100644..100755 --- a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py +++ b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2018-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ from glustolibs.gluster.peer_ops import ( from glustolibs.gluster.gluster_init import ( restart_glusterd, stop_glusterd, wait_for_glusterd_to_start) from glustolibs.gluster.samba_libs import share_volume_over_smb +from glustolibs.gluster.shared_storage_ops import is_shared_volume_mounted from glustolibs.gluster.volume_libs import ( cleanup_volume, log_volume_info_and_status, @@ -59,6 +60,9 @@ from glustolibs.gluster.volume_ops import ( set_volume_options, volume_reset, volume_start) from glustolibs.io.utils import log_mounts_info from glustolibs.gluster.geo_rep_libs import setup_master_and_slave_volumes +from glustolibs.gluster.nfs_ganesha_ops import ( + teardown_nfs_ganesha_cluster) +from glustolibs.misc.misc_libs import kill_process class runs_on(g.CarteTestClass): @@ -192,6 +196,11 @@ class GlusterBaseClass(TestCase): Returns (bool): True if all peers are in connected with other peers. False otherwise. """ + + # If the setup has single node server, by pass this validation. + if len(cls.servers) == 1: + return True + # Validate if peer is connected from all the servers g.log.info("Validating if servers %s are connected from other servers " "in the cluster", cls.servers) @@ -256,10 +265,13 @@ class GlusterBaseClass(TestCase): False otherwise. """ if error_or_failure_exists: + shared_storage_mounted = False + if is_shared_volume_mounted(cls.mnode): + shared_storage_mounted = True ret = stop_glusterd(cls.servers) if not ret: g.log.error("Failed to stop glusterd") - cmd_list = ("pkill pidof glusterd", + cmd_list = ("pkill `pidof glusterd`", "rm /var/run/glusterd.socket") for server in cls.servers: for cmd in cmd_list: @@ -268,11 +280,29 @@ class GlusterBaseClass(TestCase): g.log.error("Failed to stop glusterd") return False for server in cls.servers: - cmd_list = ("rm -rf /var/lib/glusterd/vols/*", - "rm -rf /var/lib/glusterd/snaps/*", - "rm -rf /var/lib/glusterd/peers/*", - "rm -rf {}/*/*".format( - cls.all_servers_info[server]['brick_root'])) + ret, out, _ = g.run(server, "pgrep glusterfsd", "root") + if not ret: + ret = kill_process(server, + process_ids=out.strip().split('\n')) + if not ret: + g.log.error("Unable to kill process {}".format( + out.strip().split('\n'))) + return False + if not shared_storage_mounted: + cmd_list = ( + "rm -rf /var/lib/glusterd/vols/*", + "rm -rf /var/lib/glusterd/snaps/*", + "rm -rf /var/lib/glusterd/peers/*", + "rm -rf {}/*/*".format( + cls.all_servers_info[server]['brick_root'])) + else: + cmd_list = ( + "for vol in `ls /var/lib/glusterd/vols/ | " + "grep -v gluster_shared_storage`;do " + "rm -rf /var/lib/glusterd/vols/$vol;done", + "rm -rf /var/lib/glusterd/snaps/*" + "rm -rf {}/*/*".format( + cls.all_servers_info[server]['brick_root'])) for cmd in cmd_list: ret, _, _ = g.run(server, cmd, "root") if ret: @@ -288,10 +318,11 @@ class GlusterBaseClass(TestCase): if not ret: g.log.error("Failed to bring glusterd up") return False - ret = peer_probe_servers(cls.mnode, cls.servers) - if not ret: - g.log.error("Failed to peer probe servers") - return False + if not shared_storage_mounted: + ret = peer_probe_servers(cls.mnode, cls.servers) + if not ret: + g.log.error("Failed to peer probe servers") + return False for client in cls.clients: cmd_list = ("umount /mnt/*", "rm -rf /mnt/*") for cmd in cmd_list: @@ -303,10 +334,10 @@ class GlusterBaseClass(TestCase): return True @classmethod - def setup_volume(cls, volume_create_force=False): + def setup_volume(cls, volume_create_force=False, only_volume_create=False): """Setup the volume: - Create the volume, Start volume, Set volume - options, enable snapshot/quota/tier if specified in the config + options, enable snapshot/quota if specified in the config file. - Wait for volume processes to be online - Export volume as NFS/SMB share if mount_type is NFS or SMB @@ -315,6 +346,9 @@ class GlusterBaseClass(TestCase): Args: volume_create_force(bool): True if create_volume should be executed with 'force' option. + only_volume_create(bool): True, only volume creation is needed + False, by default volume creation and + start. Returns (bool): True if all the steps mentioned in the descriptions passes. False otherwise. @@ -337,12 +371,19 @@ class GlusterBaseClass(TestCase): g.log.info("Setting up volume %s", cls.volname) ret = setup_volume(mnode=cls.mnode, all_servers_info=cls.all_servers_info, - volume_config=cls.volume, force=force_volume_create) + volume_config=cls.volume, force=force_volume_create, + create_only=only_volume_create) if not ret: g.log.error("Failed to Setup volume %s", cls.volname) return False g.log.info("Successful in setting up volume %s", cls.volname) + # Returning the value without proceeding for next steps + if only_volume_create and ret: + g.log.info("Setup volume with volume creation {} " + "successful".format(cls.volname)) + return True + # Wait for volume processes to be online g.log.info("Wait for volume %s processes to be online", cls.volname) ret = wait_for_volume_process_to_be_online(cls.mnode, cls.volname) @@ -433,6 +474,9 @@ class GlusterBaseClass(TestCase): """ g.log.info("Starting to mount volume %s", cls.volname) for mount_obj in mounts: + # For nfs-ganesha, mount is done via vip + if cls.enable_nfs_ganesha: + mount_obj.server_system = cls.vips[0] g.log.info("Mounting volume '%s:%s' on '%s:%s'", mount_obj.server_system, mount_obj.volname, mount_obj.client_system, mount_obj.mountpoint) @@ -952,8 +996,8 @@ class GlusterBaseClass(TestCase): mount_dict['volname'] = cls.slave_volume mount_dict['server'] = cls.mnode_slave mount_dict['mountpoint'] = path_join( - "/mnt", '_'.join([cls.slave_volname, - cls.mount_type])) + "/mnt", '_'.join([cls.slave_volname, + cls.mount_type])) cls.slave_mounts = create_mount_objs(slave_mount_dict_list) # Defining clients from mounts. @@ -993,6 +1037,31 @@ class GlusterBaseClass(TestCase): datetime.now().strftime('%H_%M_%d_%m_%Y')) cls.glustotest_run_id = g.config['glustotest_run_id'] + if cls.enable_nfs_ganesha: + g.log.info("Setup NFS_Ganesha") + cls.num_of_nfs_ganesha_nodes = int(cls.num_of_nfs_ganesha_nodes) + cls.servers_in_nfs_ganesha_cluster = ( + cls.servers[:cls.num_of_nfs_ganesha_nodes]) + cls.vips_in_nfs_ganesha_cluster = ( + cls.vips[:cls.num_of_nfs_ganesha_nodes]) + + # Obtain hostname of servers in ganesha cluster + cls.ganesha_servers_hostname = [] + for ganesha_server in cls.servers_in_nfs_ganesha_cluster: + ret, hostname, _ = g.run(ganesha_server, "hostname") + if ret: + raise ExecutionError("Failed to obtain hostname of %s" + % ganesha_server) + hostname = hostname.strip() + g.log.info("Obtained hostname: IP- %s, hostname- %s", + ganesha_server, hostname) + cls.ganesha_servers_hostname.append(hostname) + from glustolibs.gluster.nfs_ganesha_libs import setup_nfs_ganesha + ret = setup_nfs_ganesha(cls) + if not ret: + raise ExecutionError("Failed to setup nfs ganesha") + g.log.info("Successful in setting up NFS Ganesha Cluster") + msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id) g.log.info(msg) cls.inject_msg_in_gluster_logs(msg) @@ -1020,7 +1089,7 @@ class GlusterBaseClass(TestCase): if (self.error_or_failure_exists or self._is_error_or_failure_exists()): ret = self.scratch_cleanup(self.error_or_failure_exists) - g.log.warn(ret) + g.log.info(ret) return self.get_super_method(self, 'doCleanups')() @classmethod @@ -1029,5 +1098,250 @@ class GlusterBaseClass(TestCase): cls._is_error_or_failure_exists()): ret = cls.scratch_cleanup( GlusterBaseClass.error_or_failure_exists) - g.log.warn(ret) + g.log.info(ret) return cls.get_super_method(cls, 'doClassCleanups')() + + @classmethod + def delete_nfs_ganesha_cluster(cls): + ret = teardown_nfs_ganesha_cluster( + cls.servers_in_nfs_ganesha_cluster) + if not ret: + g.log.error("Teardown got failed. Hence, cleaning up " + "nfs-ganesha cluster forcefully") + ret = teardown_nfs_ganesha_cluster( + cls.servers_in_nfs_ganesha_cluster, force=True) + if not ret: + raise ExecutionError("Force cleanup of nfs-ganesha " + "cluster failed") + g.log.info("Teardown nfs ganesha cluster succeeded") + + @classmethod + def start_memory_and_cpu_usage_logging(cls, test_id, interval=60, + count=100): + """Upload logger script and start logging usage on cluster + + Args: + test_id(str): ID of the test running fetched from self.id() + + Kawrgs: + interval(int): Time interval after which logs are to be collected + (Default: 60) + count(int): Number of samples to be collected(Default: 100) + + Returns: + proc_dict(dict):Dictionary of logging processes + """ + # imports are added inside function to make it them + # optional and not cause breakage on installation + # which don't use the resource leak library + from glustolibs.io.memory_and_cpu_utils import ( + check_upload_memory_and_cpu_logger_script, + log_memory_and_cpu_usage_on_cluster) + + # Checking if script is present on servers or not if not then + # upload it to servers. + if not check_upload_memory_and_cpu_logger_script(cls.servers): + return None + + # Checking if script is present on clients or not if not then + # upload it to clients. + if not check_upload_memory_and_cpu_logger_script(cls.clients): + return None + + # Start logging on servers and clients + proc_dict = log_memory_and_cpu_usage_on_cluster( + cls.servers, cls.clients, test_id, interval, count) + + return proc_dict + + @classmethod + def compute_and_print_usage_stats(cls, test_id, proc_dict, + kill_proc=False): + """Compute and print CPU and memory usage statistics + + Args: + proc_dict(dict):Dictionary of logging processes + test_id(str): ID of the test running fetched from self.id() + + Kwargs: + kill_proc(bool): Kill logging process if true else wait + for process to complete execution + """ + # imports are added inside function to make it them + # optional and not cause breakage on installation + # which don't use the resource leak library + from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop, kill_all_logging_processes, + compute_data_usage_stats_on_servers, + compute_data_usage_stats_on_clients) + + # Wait or kill running logging process + if kill_proc: + nodes = cls.servers + cls.clients + ret = kill_all_logging_processes(proc_dict, nodes, cluster=True) + if not ret: + g.log.error("Unable to stop logging processes.") + else: + ret = wait_for_logging_processes_to_stop(proc_dict, cluster=True) + if not ret: + g.log.error("Processes didn't complete still running.") + + # Compute and print stats for servers + ret = compute_data_usage_stats_on_servers(cls.servers, test_id) + g.log.info('*' * 50) + g.log.info(ret) # TODO: Make logged message more structured + g.log.info('*' * 50) + + # Compute and print stats for clients + ret = compute_data_usage_stats_on_clients(cls.clients, test_id) + g.log.info('*' * 50) + g.log.info(ret) # TODO: Make logged message more structured + g.log.info('*' * 50) + + @classmethod + def check_for_memory_leaks_and_oom_kills_on_servers(cls, test_id, + gain=30.0): + """Check for memory leaks and OOM kills on servers + + Args: + test_id(str): ID of the test running fetched from self.id() + + Kwargs: + gain(float): Accepted amount of leak for a given testcase in MB + (Default:30) + + Returns: + bool: True if memory leaks or OOM kills are observed else false + """ + # imports are added inside function to make it them + # optional and not cause breakage on installation + # which don't use the resource leak library + from glustolibs.io.memory_and_cpu_utils import ( + check_for_memory_leaks_in_glusterd, + check_for_memory_leaks_in_glusterfs, + check_for_memory_leaks_in_glusterfsd, + check_for_oom_killers_on_servers) + + # Check for memory leaks on glusterd + if check_for_memory_leaks_in_glusterd(cls.servers, test_id, gain): + g.log.error("Memory leak on glusterd.") + return True + + if cls.volume_type != "distributed": + # Check for memory leaks on shd + if check_for_memory_leaks_in_glusterfs(cls.servers, test_id, + gain): + g.log.error("Memory leak on shd.") + return True + + # Check for memory leaks on brick processes + if check_for_memory_leaks_in_glusterfsd(cls.servers, test_id, gain): + g.log.error("Memory leak on brick process.") + return True + + # Check OOM kills on servers for all gluster server processes + if check_for_oom_killers_on_servers(cls.servers): + g.log.error('OOM kills present on servers.') + return True + return False + + @classmethod + def check_for_memory_leaks_and_oom_kills_on_clients(cls, test_id, gain=30): + """Check for memory leaks and OOM kills on clients + + Args: + test_id(str): ID of the test running fetched from self.id() + + Kwargs: + gain(float): Accepted amount of leak for a given testcase in MB + (Default:30) + + Returns: + bool: True if memory leaks or OOM kills are observed else false + """ + # imports are added inside function to make it them + # optional and not cause breakage on installation + # which don't use the resource leak library + from glustolibs.io.memory_and_cpu_utils import ( + check_for_memory_leaks_in_glusterfs_fuse, + check_for_oom_killers_on_clients) + + # Check for memory leak on glusterfs fuse process + if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, test_id, + gain): + g.log.error("Memory leaks observed on FUSE clients.") + return True + + # Check for oom kills on clients + if check_for_oom_killers_on_clients(cls.clients): + g.log.error("OOM kills present on clients.") + return True + return False + + @classmethod + def check_for_cpu_usage_spikes_on_servers(cls, test_id, threshold=3): + """Check for CPU usage spikes on servers + + Args: + test_id(str): ID of the test running fetched from self.id() + + Kwargs: + threshold(int): Accepted amount of instances of 100% CPU usage + (Default:3) + Returns: + bool: True if CPU spikes are more than threshold else False + """ + # imports are added inside function to make it them + # optional and not cause breakage on installation + # which don't use the resource leak library + from glustolibs.io.memory_and_cpu_utils import ( + check_for_cpu_usage_spikes_on_glusterd, + check_for_cpu_usage_spikes_on_glusterfs, + check_for_cpu_usage_spikes_on_glusterfsd) + + # Check for CPU usage spikes on glusterd + if check_for_cpu_usage_spikes_on_glusterd(cls.servers, test_id, + threshold): + g.log.error("CPU usage spikes observed more than threshold " + "on glusterd.") + return True + + if cls.volume_type != "distributed": + # Check for CPU usage spikes on shd + if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, test_id, + threshold): + g.log.error("CPU usage spikes observed more than threshold " + "on shd.") + return True + + # Check for CPU usage spikes on brick processes + if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, test_id, + threshold): + g.log.error("CPU usage spikes observed more than threshold " + "on shd.") + return True + return False + + @classmethod + def check_for_cpu_spikes_on_clients(cls, test_id, threshold=3): + """Check for CPU usage spikes on clients + + Args: + test_id(str): ID of the test running fetched from self.id() + + Kwargs: + threshold(int): Accepted amount of instances of 100% CPU usage + (Default:3) + Returns: + bool: True if CPU spikes are more than threshold else False + """ + # imports are added inside function to make it them + # optional and not cause breakage on installation + # which don't use the resource leak library + from glustolibs.io.memory_and_cpu_utils import ( + check_for_cpu_usage_spikes_on_glusterfs_fuse) + + ret = check_for_cpu_usage_spikes_on_glusterfs_fuse(cls.clients, + test_id, + threshold) + return ret diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_init.py b/glustolibs-gluster/glustolibs/gluster/gluster_init.py index 29059e6a1..6a49ffc8b 100644 --- a/glustolibs-gluster/glustolibs/gluster/gluster_init.py +++ b/glustolibs-gluster/glustolibs/gluster/gluster_init.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,13 +23,17 @@ from time import sleep from glusto.core import Glusto as g -def start_glusterd(servers): +def start_glusterd(servers, enable_retry=True): """Starts glusterd on specified servers if they are not running. Args: servers (str|list): A server|List of server hosts on which glusterd has to be started. + Kwargs: + enable_retry(Bool): If set to True then runs reset-failed else + do nothing. + Returns: bool : True if starting glusterd is successful on all servers. False otherwise. @@ -46,10 +50,13 @@ def start_glusterd(servers): if retcode != 0: g.log.error("Unable to start glusterd on server %s", server) _rc = False - if not _rc: - return False + if not _rc and enable_retry: + ret = reset_failed_glusterd(servers) + if ret: + ret = start_glusterd(servers) + return ret - return True + return _rc def stop_glusterd(servers): @@ -81,13 +88,17 @@ def stop_glusterd(servers): return True -def restart_glusterd(servers): +def restart_glusterd(servers, enable_retry=True): """Restart the glusterd on specified servers. Args: servers (str|list): A server|List of server hosts on which glusterd has to be restarted. + Kwargs: + enable_retry(Bool): If set to True than runs reset-failed else + do nothing. + Returns: bool : True if restarting glusterd is successful on all servers. False otherwise. @@ -104,9 +115,35 @@ def restart_glusterd(servers): if retcode != 0: g.log.error("Unable to restart glusterd on server %s", server) _rc = False - if not _rc: - return False + if not _rc and enable_retry: + ret = reset_failed_glusterd(servers) + if ret: + ret = restart_glusterd(servers) + return ret + return _rc + + +def reset_failed_glusterd(servers): + """Reset-failed glusterd on specified servers. + + Args: + servers (str|list): A server|List of server hosts on which glusterd + has to be reset-failed. + + Returns: + bool : True if reset-failed glusterd is successful on all servers. + False otherwise. + """ + if not isinstance(servers, list): + servers = [servers] + + cmd = "systemctl reset-failed glusterd" + results = g.run_parallel(servers, cmd) + for server, (retcode, _, _) in results.items(): + if retcode: + g.log.error("Unable to reset glusterd on server %s", server) + return False return True @@ -260,10 +297,10 @@ def get_gluster_version(host): host(str): IP of the host whose gluster version has to be checked. Returns: - (float): The gluster version value. + str: The gluster version value. """ command = 'gluster --version' _, out, _ = g.run(host, command) g.log.info("The Gluster verion of the cluster under test is %s", out) - return float(out.split(' ')[1]) + return out.split(' ')[1] diff --git a/glustolibs-gluster/glustolibs/gluster/glusterdir.py b/glustolibs-gluster/glustolibs/gluster/glusterdir.py index f2981cb93..5618926c8 100644 --- a/glustolibs-gluster/glustolibs/gluster/glusterdir.py +++ b/glustolibs-gluster/glustolibs/gluster/glusterdir.py @@ -82,22 +82,29 @@ def rmdir(host, fqpath, force=False): return False -def get_dir_contents(host, path): +def get_dir_contents(host, path, recursive=False): """Get the files and directories present in a given directory. Args: host (str): The hostname/ip of the remote system. path (str): The path to the directory. + Kwargs: + recursive (bool): lists all entries recursively + Returns: file_dir_list (list): List of files and directories on path. None: In case of error or failure. """ - ret, out, _ = g.run(host, ' ls '+path) - if ret != 0: + if recursive: + cmd = "find {}".format(path) + else: + cmd = "ls " + path + ret, out, _ = g.run(host, cmd) + if ret: + g.log.error("No such file or directory {}".format(path)) return None - file_dir_list = list(filter(None, out.split("\n"))) - return file_dir_list + return(list(filter(None, out.split("\n")))) class GlusterDir(GlusterFile): diff --git a/glustolibs-gluster/glustolibs/gluster/glusterfile.py b/glustolibs-gluster/glustolibs/gluster/glusterfile.py index 4d712a5f3..ee9b6040d 100755 --- a/glustolibs-gluster/glustolibs/gluster/glusterfile.py +++ b/glustolibs-gluster/glustolibs/gluster/glusterfile.py @@ -97,40 +97,50 @@ def get_mountpoint(host, fqpath): return None -def get_fattr(host, fqpath, fattr): +def get_fattr(host, fqpath, fattr, encode="hex"): """getfattr for filepath on remote system Args: host (str): The hostname/ip of the remote system. fqpath (str): The fully-qualified path to the file. fattr (str): name of the fattr to retrieve - + Kwargs: + encode(str): The supported types of encoding are + [hex|text|base64] + Defaults to hex type of encoding Returns: getfattr result on success. None on fail. """ - command = ("getfattr --absolute-names --only-values -n '%s' %s" % - (fattr, fqpath)) + command = ("getfattr --absolute-names -e '%s' " + "-n '%s' %s" % + (encode, fattr, fqpath)) rcode, rout, rerr = g.run(host, command) - - if rcode == 0: - return rout.strip() + if not rcode: + return rout.strip().split('=')[1].replace('"', '') g.log.error('getfattr failed: %s' % rerr) return None -def get_fattr_list(host, fqpath): +def get_fattr_list(host, fqpath, encode_hex=False): """List of xattr for filepath on remote system. Args: host (str): The hostname/ip of the remote system. fqpath (str): The fully-qualified path to the file. + Kwargs: + encode_hex(bool): Fetch xattr in hex if True + (Default:False) + Returns: Dictionary of xattrs on success. None on fail. """ - command = "getfattr --absolute-names -d -m - %s" % fqpath - rcode, rout, rerr = g.run(host, command) + cmd = "getfattr --absolute-names -d -m - {}".format(fqpath) + if encode_hex: + cmd = ("getfattr --absolute-names -d -m - -e hex {}" + .format(fqpath)) + rcode, rout, rerr = g.run(host, cmd) if rcode == 0: xattr_list = {} @@ -237,7 +247,7 @@ def get_file_stat(host, fqpath): Returns: A dictionary of file stat data. None on fail. """ - statformat = '%F:%n:%i:%a:%s:%h:%u:%g:%U:%G' + statformat = '%F$%n$%i$%a$%s$%h$%u$%g$%U$%G$%x$%y$%z$%X$%Y$%Z' command = "stat -c '%s' %s" % (statformat, fqpath) rcode, rout, rerr = g.run(host, command) if rcode == 0: @@ -245,7 +255,9 @@ def get_file_stat(host, fqpath): stat_string = rout.strip() (filetype, filename, inode, access, size, links, - uid, gid, username, groupname) = stat_string.split(":") + uid, gid, username, groupname, + atime, mtime, ctime, epoch_atime, + epoch_mtime, epoch_ctime) = stat_string.split("$") stat_data['filetype'] = filetype stat_data['filename'] = filename @@ -257,6 +269,12 @@ def get_file_stat(host, fqpath): stat_data["groupname"] = groupname stat_data["uid"] = uid stat_data["gid"] = gid + stat_data["atime"] = atime + stat_data["mtime"] = mtime + stat_data["ctime"] = ctime + stat_data["epoch_atime"] = epoch_atime + stat_data["epoch_mtime"] = epoch_mtime + stat_data["epoch_ctime"] = epoch_ctime return stat_data @@ -382,7 +400,8 @@ def get_pathinfo(host, fqpath): A dictionary of pathinfo data for a remote file. None on fail. """ pathinfo = {} - pathinfo['raw'] = get_fattr(host, fqpath, 'trusted.glusterfs.pathinfo') + pathinfo['raw'] = get_fattr(host, fqpath, 'trusted.glusterfs.pathinfo', + encode="text") pathinfo['brickdir_paths'] = re.findall(r".*?POSIX.*?:(\S+)\>", pathinfo['raw']) @@ -405,17 +424,14 @@ def is_linkto_file(host, fqpath): """ command = 'file %s' % fqpath rcode, rout, _ = g.run(host, command) - if rcode == 0: - if 'sticky empty' in rout.strip(): + # An additional ',' is there for newer platforms + if 'sticky empty' or 'sticky, empty' in rout.strip(): stat = get_file_stat(host, fqpath) if int(stat['size']) == 0: - # xattr = get_fattr(host, fqpath, - # 'trusted.glusterfs.dht.linkto') xattr = get_dht_linkto_xattr(host, fqpath) if xattr is not None: return True - return False @@ -429,7 +445,8 @@ def get_dht_linkto_xattr(host, fqpath): Returns: Return value of get_fattr trusted.glusterfs.dht.linkto call. """ - linkto_xattr = get_fattr(host, fqpath, 'trusted.glusterfs.dht.linkto') + linkto_xattr = get_fattr(host, fqpath, 'trusted.glusterfs.dht.linkto', + encode="text") return linkto_xattr @@ -480,6 +497,154 @@ def check_if_pattern_in_file(host, pattern, fqpath): return 0 +def occurences_of_pattern_in_file(node, search_pattern, filename): + """ + Get the number of occurences of pattern in the file + + Args: + node (str): Host on which the command is executed. + search_pattern (str): Pattern to be found in the file. + filename (str): File in which the pattern is to be validated + + Returns: + (int): (-1), When the file doesn't exists. + (0), When pattern doesn't exists in the file. + (number), When pattern is found and the number of + occurences of pattern in the file. + + Example: + occurences_of_pattern_in_file(node, search_pattern, filename) + """ + + ret = file_exists(node, filename) + if not ret: + g.log.error("File %s is not present on the node " % filename) + return -1 + + cmd = ("grep -c '%s' %s" % (search_pattern, filename)) + ret, out, _ = g.run(node, cmd) + if ret: + g.log.error("No occurence of the pattern found in the file %s" % + filename) + return 0 + return int(out.strip('\n')) + + +def create_link_file(node, file, link, soft=False): + """ + Create hard or soft link for an exisiting file + + Args: + node(str): Host on which the command is executed. + file(str): Path to the source file. + link(str): Path to the link file. + + Kawrgs: + soft(bool): Create soft link if True else create + hard link. + + Returns: + (bool): True if command successful else False. + + Example: + >>> create_link_file('10.20.30.40', '/mnt/mp/file.txt', + '/mnt/mp/link') + True + """ + cmd = "ln {} {}".format(file, link) + if soft: + cmd = "ln -s {} {}".format(file, link) + + ret, _, err = g.run(node, cmd) + if ret: + if soft: + g.log.error('Failed to create soft link on {} ' + 'for file {} with error {}' + .format(node, file, err)) + else: + g.log.error('Failed to create hard link on {} ' + 'for file {} with error {}' + .format(node, file, err)) + return False + return True + + +def set_acl(client, rule, fqpath): + """Set acl rule on a specific file + + Args: + client(str): Host on which the command is executed. + rule(str): The acl rule to be set on the file. + fqpath (str): The fully-qualified path to the file. + + Returns: + (bool): True if command successful else False. + """ + cmd = "setfacl -m {} {}".format(rule, fqpath) + ret, _, _ = g.run(client, cmd) + if ret: + g.log.error('Failed to set rule {} on file {}'.format(rule, fqpath)) + return False + return True + + +def get_acl(client, path, filename): + """Get all acl rules set to a file + + Args: + client(str): Host on which the command is executed. + path (str): The fully-qualified path to the dir where file is present. + filename(str): Name of the file for which rules have to be fetched. + + Returns: + (dict): A dictionary with the formatted output of the command. + (None): In case of failures + + Example: + >>> get_acl('dhcp35-4.lab.eng.blr.redhat.com', '/root/', 'file') + {'owner': 'root', 'rules': ['user::rw-', 'user:root:rwx', 'group::r--', + 'mask::rwx', 'other::r--'], 'group': 'root', 'file': 'file'} + """ + cmd = "cd {};getfacl {}".format(path, filename) + ret, out, _ = g.run(client, cmd) + if ret: + return None + + # Generate a dict out of the output + output_dict = {} + data = out.strip().split('\n') + for key, index in (('file', 0), ('owner', 1), ('group', 2)): + output_dict[key] = data[index].split(' ')[2] + output_dict['rules'] = data[3:] + + return output_dict + + +def delete_acl(client, fqpath, rule=None): + """Delete a specific or all acl rules set on a file + + Args: + client(str): Host on which the command is executed. + fqpath (str): The fully-qualified path to the file. + + Kwargs: + rule(str): The acl rule to be removed from the file. + + Returns: + (bool): True if command successful else False. + """ + # Remove all acls set on a file + cmd = "setfacl -b {}".format(fqpath) + # Remove a specific acl of the file + if rule: + cmd = "setfacl -x {} {}".format(rule, fqpath) + + ret, _, _ = g.run(client, cmd) + if ret: + return False + return True + + class GlusterFile(object): """Class to handle files specific to Gluster (client and backend)""" def __init__(self, host, fqpath): diff --git a/glustolibs-gluster/glustolibs/gluster/heal_libs.py b/glustolibs-gluster/glustolibs/gluster/heal_libs.py index 504173ae7..4a551cd48 100755 --- a/glustolibs-gluster/glustolibs/gluster/heal_libs.py +++ b/glustolibs-gluster/glustolibs/gluster/heal_libs.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -135,7 +135,8 @@ def are_all_self_heal_daemons_are_online(mnode, volname): return False -def monitor_heal_completion(mnode, volname, timeout_period=1200): +def monitor_heal_completion(mnode, volname, timeout_period=1200, + bricks=None, interval_check=120): """Monitors heal completion by looking into .glusterfs/indices/xattrop directory of every brick for certain time. When there are no entries in all the brick directories then heal is successful. Otherwise heal is @@ -147,6 +148,12 @@ def monitor_heal_completion(mnode, volname, timeout_period=1200): heal_monitor_timeout : time until which the heal monitoring to be done. Default: 1200 i.e 20 minutes. + Kwargs: + bricks : list of bricks to monitor heal, if not provided + heal will be monitored on all bricks of volume + interval_check : Time in seconds, for every given interval checks + the heal info, defaults to 120. + Return: bool: True if heal is complete within timeout_period. False otherwise """ @@ -158,7 +165,7 @@ def monitor_heal_completion(mnode, volname, timeout_period=1200): # Get all bricks from glustolibs.gluster.brick_libs import get_all_bricks - bricks_list = get_all_bricks(mnode, volname) + bricks_list = bricks or get_all_bricks(mnode, volname) if bricks_list is None: g.log.error("Unable to get the bricks list. Hence unable to verify " "whether self-heal-daemon process is running or not " @@ -177,10 +184,15 @@ def monitor_heal_completion(mnode, volname, timeout_period=1200): if heal_complete: break else: - time.sleep(120) - time_counter = time_counter - 120 + time.sleep(interval_check) + time_counter = time_counter - interval_check + + if heal_complete and bricks: + # In EC volumes, check heal completion only on online bricks + # and `gluster volume heal info` fails for an offline brick + return True - if heal_complete: + if heal_complete and not bricks: heal_completion_status = is_heal_complete(mnode, volname) if heal_completion_status is True: g.log.info("Heal has successfully completed on volume %s" % @@ -509,3 +521,71 @@ def bring_self_heal_daemon_process_offline(nodes): _rc = False return _rc + + +def is_shd_daemon_running(mnode, node, volname): + """ + Verifies whether the shd daemon is up and running on a particular node by + checking the existence of shd pid and parsing the get volume status output. + + Args: + mnode (str): The first node in servers list + node (str): The node to be checked for whether the glustershd + process is up or not + volname (str): Name of the volume created + + Returns: + boolean: True if shd is running on the node, False, otherwise + """ + + # Get glustershd pid from node. + ret, glustershd_pids = get_self_heal_daemon_pid(node) + if not ret and glustershd_pids[node] != -1: + return False + # Verifying glustershd process is no longer running from get status. + vol_status = get_volume_status(mnode, volname) + if vol_status is None: + return False + try: + _ = vol_status[volname][node]['Self-heal Daemon'] + return True + except KeyError: + return False + + +def enable_granular_heal(mnode, volname): + """Enable granular heal on a given volume + + Args: + mnode(str): Node on which command has to be exectued + volname(str): Name of the volume on which granular heal is to be enabled + + Returns: + bool: True if granular heal is enabled successfully else False + """ + cmd = "gluster volume heal {} granular-entry-heal enable".format(volname) + ret, _, _ = g.run(mnode, cmd) + if ret: + g.log.error('Unable to enable granular-entry-heal on volume %s', + volname) + return False + return True + + +def disable_granular_heal(mnode, volname): + """Diable granular heal on a given volume + + Args: + mnode(str): Node on which command will be exectued + volname(str): Name of the volume on which granular heal is to be disabled + + Returns: + bool: True if granular heal is disabled successfully else False + """ + cmd = "gluster volume heal {} granular-entry-heal disable".format(volname) + ret, _, _ = g.run(mnode, cmd) + if ret: + g.log.error('Unable to disable granular-entry-heal on volume %s', + volname) + return False + return True diff --git a/glustolibs-gluster/glustolibs/gluster/layout.py b/glustolibs-gluster/glustolibs/gluster/layout.py index 8d7ae2d6f..ea5a5bc8b 100644 --- a/glustolibs-gluster/glustolibs/gluster/layout.py +++ b/glustolibs-gluster/glustolibs/gluster/layout.py @@ -19,7 +19,6 @@ from glusto.core import Glusto as g from glustolibs.gluster.brickdir import BrickDir -from glustolibs.gluster.gluster_init import get_gluster_version class Layout(object): @@ -35,20 +34,19 @@ class Layout(object): self._brickdirs = [] for brickdir_path in self._pathinfo['brickdir_paths']: (host, _) = brickdir_path.split(':') - if get_gluster_version(host) >= 6.0: - ret = get_volume_type(brickdir_path) - if ret in ('Replicate', 'Disperse', 'Arbiter'): - g.log.info("Cannot get layout as volume under test is" - " Replicate/Disperse/Arbiter and DHT" - " pass-through was enabled after Gluster 6.") + ret = get_volume_type(brickdir_path) + if ret in ('Replicate', 'Disperse', 'Arbiter'): + g.log.info("Cannot get layout as volume under test is" + " Replicate/Disperse/Arbiter and DHT" + " pass-through was enabled after Gluster 6.0") + else: + brickdir = BrickDir(brickdir_path) + if brickdir is None: + g.log.error("Failed to get the layout") else: - brickdir = BrickDir(brickdir_path) - if brickdir is None: - g.log.error("Failed to get the layout") - else: - g.log.debug("%s: %s" % (brickdir.path, - brickdir.hashrange)) - self._brickdirs.append(brickdir) + g.log.debug("%s: %s" % (brickdir.path, + brickdir.hashrange)) + self._brickdirs.append(brickdir) def __init__(self, pathinfo): """Init the layout class @@ -80,9 +78,8 @@ class Layout(object): for brickdir_path in self._pathinfo['brickdir_paths']: (host, _) = brickdir_path.split(':') - if (get_gluster_version(host) >= 6.0 and - get_volume_type(brickdir_path) in ('Replicate', 'Disperse', - 'Arbiter')): + if get_volume_type(brickdir_path) in ('Replicate', 'Disperse', + 'Arbiter'): g.log.info("Cannot check for layout completeness as volume" " under test is Replicate/Disperse/Arbiter and DHT" " pass-though was enabled after Gluster 6.") diff --git a/glustolibs-gluster/glustolibs/gluster/lib_utils.py b/glustolibs-gluster/glustolibs/gluster/lib_utils.py index 7299874d0..b04976b1c 100755 --- a/glustolibs-gluster/glustolibs/gluster/lib_utils.py +++ b/glustolibs-gluster/glustolibs/gluster/lib_utils.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -387,7 +387,8 @@ def get_servers_unused_bricks_dict(mnode, servers, servers_info): return servers_unused_bricks_dict -def form_bricks_list(mnode, volname, number_of_bricks, servers, servers_info): +def form_bricks_list(mnode, volname, number_of_bricks, servers, servers_info, + dirname=None): """Forms bricks list for create-volume/add-brick given the num_of_bricks servers and servers_info. @@ -400,6 +401,9 @@ def form_bricks_list(mnode, volname, number_of_bricks, servers, servers_info): needs to be selected for creating the brick list. servers_info (dict): dict of server info of each servers. + kwargs: + dirname (str): Name of the directory for glusterfs brick + Returns: list - List of bricks to use with volume-create/add-brick None - if number_of_bricks is greater than unused bricks. @@ -437,10 +441,18 @@ def form_bricks_list(mnode, volname, number_of_bricks, servers, servers_info): list(servers_unused_bricks_dict.values())[dict_index]) brick_path = '' if current_server_unused_bricks_list: - brick_path = ("%s:%s/%s_brick%s" % - (current_server, - current_server_unused_bricks_list[0], volname, num)) - bricks_list.append(brick_path) + if dirname and (" " not in dirname): + brick_path = ("%s:%s/%s_brick%s" % + (current_server, + current_server_unused_bricks_list[0], dirname, + num)) + bricks_list.append(brick_path) + else: + brick_path = ("%s:%s/%s_brick%s" % + (current_server, + current_server_unused_bricks_list[0], volname, + num)) + bricks_list.append(brick_path) # Remove the added brick from the current_server_unused_bricks_list list(servers_unused_bricks_dict.values())[dict_index].pop(0) @@ -1004,6 +1016,30 @@ def group_add(servers, groupname): return True +def group_del(servers, groupname): + """ + Deletes a group in all the servers. + + Args: + servers(list|str): Nodes on which cmd is to be executed. + groupname(str): Name of the group to be removed. + + Return always True + """ + if not isinstance(servers, list): + servers = [servers] + + cmd = "groupdel %s" % groupname + results = g.run_parallel(servers, cmd) + + for server, ret_value in list(results.items()): + retcode, _, err = ret_value + if retcode != 0 and "does not exist" in err: + g.log.error("Group %s on server %s already removed", + groupname, server) + return True + + def ssh_keygen(mnode): """ Creates a pair of ssh private and public key if not present @@ -1199,3 +1235,25 @@ def collect_bricks_arequal(bricks_list): arequal_list.append(arequal) return (return_code, arequal_list) + + +def get_usable_size_per_disk(brickpath, min_free_limit=10): + """Get the usable size per disk + + Args: + brickpath(str): Brick path to be used to calculate usable size + + Kwargs: + min_free_limit(int): Min free disk limit to be used + + Returns: + (int): Usable size in GB. None in case of errors. + """ + node, brick_path = brickpath.split(':') + size = get_size_of_mountpoint(node, brick_path) + if not size: + return None + size = int(size) + min_free_size = size * min_free_limit // 100 + usable_size = ((size - min_free_size) // 1048576) + 1 + return usable_size diff --git a/glustolibs-gluster/glustolibs/gluster/mount_ops.py b/glustolibs-gluster/glustolibs/gluster/mount_ops.py index 02dc0a253..c8fbddd05 100755 --- a/glustolibs-gluster/glustolibs/gluster/mount_ops.py +++ b/glustolibs-gluster/glustolibs/gluster/mount_ops.py @@ -336,10 +336,10 @@ def mount_volume(volname, mtype, mpoint, mserver, mclient, options='', if mtype == 'nfs': if not options: - options = "-o vers=3" + options = "-o vers=4.1" elif options and 'vers' not in options: - options = options + ",vers=3" + options = options + ",vers=4.1" if mserver: mcmd = ("mount -t %s %s %s:/%s %s" % diff --git a/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_libs.py b/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_libs.py index 19e98408e..5f69e68f6 100644..100755 --- a/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_libs.py +++ b/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_libs.py @@ -32,179 +32,123 @@ from glustolibs.gluster.nfs_ganesha_ops import ( create_nfs_ganesha_cluster, configure_ports_on_clients, ganesha_client_firewall_settings) -from glustolibs.gluster.gluster_base_class import GlusterBaseClass -from glustolibs.gluster.exceptions import ExecutionError, ConfigError from glustolibs.gluster.volume_libs import is_volume_exported +from glustolibs.gluster.lib_utils import is_rhel7 -class NfsGaneshaClusterSetupClass(GlusterBaseClass): - """Creates nfs ganesha cluster +def setup_nfs_ganesha(cls): """ - @classmethod - def setUpClass(cls): - """ - Setup variable for nfs-ganesha tests. - """ - # pylint: disable=too-many-statements, too-many-branches - super(NfsGaneshaClusterSetupClass, cls).setUpClass() - - # Check if enable_nfs_ganesha is set in config file - if not cls.enable_nfs_ganesha: - raise ConfigError("Please enable nfs ganesha in config") - - # Read num_of_nfs_ganesha_nodes from config file and create - # nfs ganesha cluster accordingly - cls.num_of_nfs_ganesha_nodes = int(cls.num_of_nfs_ganesha_nodes) - cls.servers_in_nfs_ganesha_cluster = ( - cls.servers[:cls.num_of_nfs_ganesha_nodes]) - cls.vips_in_nfs_ganesha_cluster = ( - cls.vips[:cls.num_of_nfs_ganesha_nodes]) - - # Obtain hostname of servers in ganesha cluster - cls.ganesha_servers_hostname = [] - for ganesha_server in cls.servers_in_nfs_ganesha_cluster: - ret, hostname, _ = g.run(ganesha_server, "hostname") - if ret: - raise ExecutionError("Failed to obtain hostname of %s" - % ganesha_server) - hostname = hostname.strip() - g.log.info("Obtained hostname: IP- %s, hostname- %s", - ganesha_server, hostname) - cls.ganesha_servers_hostname.append(hostname) - - @classmethod - def setup_nfs_ganesha(cls): - """ - Create nfs-ganesha cluster if not exists - Set client configurations for nfs-ganesha - - Returns: - True(bool): If setup is successful - False(bool): If setup is failure - """ - # pylint: disable = too-many-statements, too-many-branches - # pylint: disable = too-many-return-statements - cluster_exists = is_nfs_ganesha_cluster_exists( + Create nfs-ganesha cluster if not exists + Set client configurations for nfs-ganesha + + Returns: + True(bool): If setup is successful + False(bool): If setup is failure + """ + # pylint: disable = too-many-statements, too-many-branches + # pylint: disable = too-many-return-statements + cluster_exists = is_nfs_ganesha_cluster_exists( + cls.servers_in_nfs_ganesha_cluster[0]) + if cluster_exists: + is_healthy = is_nfs_ganesha_cluster_in_healthy_state( cls.servers_in_nfs_ganesha_cluster[0]) - if cluster_exists: - is_healthy = is_nfs_ganesha_cluster_in_healthy_state( - cls.servers_in_nfs_ganesha_cluster[0]) - - if is_healthy: - g.log.info("Nfs-ganesha Cluster exists and is in healthy " - "state. Skipping cluster creation...") - else: - g.log.info("Nfs-ganesha Cluster exists and is not in " - "healthy state.") - g.log.info("Tearing down existing cluster which is not in " - "healthy state") - ganesha_ha_file = ("/var/run/gluster/shared_storage/" - "nfs-ganesha/ganesha-ha.conf") - - g.log.info("Collecting server details of existing " - "nfs ganesha cluster") - conn = g.rpyc_get_connection( - cls.servers_in_nfs_ganesha_cluster[0], user="root") - if not conn: - tmp_node = cls.servers_in_nfs_ganesha_cluster[0] - g.log.error("Unable to get connection to 'root' of node" - " %s", tmp_node) - return False - - if not conn.modules.os.path.exists(ganesha_ha_file): - g.log.error("Unable to locate %s", ganesha_ha_file) - return False - with conn.builtin.open(ganesha_ha_file, "r") as fhand: - ganesha_ha_contents = fhand.read() - g.rpyc_close_connection( - host=cls.servers_in_nfs_ganesha_cluster[0], user="root") - servers_in_existing_cluster = re.findall(r'VIP_(.*)\=.*', - ganesha_ha_contents) - - ret = teardown_nfs_ganesha_cluster( - servers_in_existing_cluster, force=True) - if not ret: - g.log.error("Failed to teardown unhealthy ganesha " - "cluster") - return False - - g.log.info("Existing unhealthy cluster got teardown " - "successfully") - - if (not cluster_exists) or (not is_healthy): - g.log.info("Creating nfs-ganesha cluster of %s nodes" - % str(cls.num_of_nfs_ganesha_nodes)) - g.log.info("Nfs-ganesha cluster node info: %s" - % cls.servers_in_nfs_ganesha_cluster) - g.log.info("Nfs-ganesha cluster vip info: %s" - % cls.vips_in_nfs_ganesha_cluster) - - ret = create_nfs_ganesha_cluster( - cls.ganesha_servers_hostname, - cls.vips_in_nfs_ganesha_cluster) + + if is_healthy: + g.log.info("Nfs-ganesha Cluster exists and is in healthy " + "state. Skipping cluster creation...") + else: + g.log.info("Nfs-ganesha Cluster exists and is not in " + "healthy state.") + g.log.info("Tearing down existing cluster which is not in " + "healthy state") + ganesha_ha_file = ("/var/run/gluster/shared_storage/" + "nfs-ganesha/ganesha-ha.conf") + g_node = cls.servers_in_nfs_ganesha_cluster[0] + + g.log.info("Collecting server details of existing " + "nfs ganesha cluster") + + # Check whether ganesha ha file exists + cmd = "[ -f {} ]".format(ganesha_ha_file) + ret, _, _ = g.run(g_node, cmd) + if ret: + g.log.error("Unable to locate %s", ganesha_ha_file) + return False + + # Read contents of ganesha_ha_file + cmd = "cat {}".format(ganesha_ha_file) + ret, ganesha_ha_contents, _ = g.run(g_node, cmd) + if ret: + g.log.error("Failed to read %s", ganesha_ha_file) + return False + + servers_in_existing_cluster = re.findall(r'VIP_(.*)\=.*', + ganesha_ha_contents) + + ret = teardown_nfs_ganesha_cluster( + servers_in_existing_cluster, force=True) if not ret: - g.log.error("Creation of nfs-ganesha cluster failed") + g.log.error("Failed to teardown unhealthy ganesha " + "cluster") return False - if not is_nfs_ganesha_cluster_in_healthy_state( - cls.servers_in_nfs_ganesha_cluster[0]): - g.log.error("Nfs-ganesha cluster is not healthy") - return False - g.log.info("Nfs-ganesha Cluster exists is in healthy state") + g.log.info("Existing unhealthy cluster got teardown " + "successfully") - ret = configure_ports_on_clients(cls.clients) + if (not cluster_exists) or (not is_healthy): + g.log.info("Creating nfs-ganesha cluster of %s nodes" + % str(cls.num_of_nfs_ganesha_nodes)) + g.log.info("Nfs-ganesha cluster node info: %s" + % cls.servers_in_nfs_ganesha_cluster) + g.log.info("Nfs-ganesha cluster vip info: %s" + % cls.vips_in_nfs_ganesha_cluster) + + ret = create_nfs_ganesha_cluster( + cls.ganesha_servers_hostname, + cls.vips_in_nfs_ganesha_cluster) if not ret: - g.log.error("Failed to configure ports on clients") + g.log.error("Creation of nfs-ganesha cluster failed") return False - ret = ganesha_client_firewall_settings(cls.clients) + if not is_nfs_ganesha_cluster_in_healthy_state( + cls.servers_in_nfs_ganesha_cluster[0]): + g.log.error("Nfs-ganesha cluster is not healthy") + return False + g.log.info("Nfs-ganesha Cluster exists is in healthy state") + + if is_rhel7(cls.clients): + ret = configure_ports_on_clients(cls.clients) if not ret: - g.log.error("Failed to do firewall setting in clients") + g.log.error("Failed to configure ports on clients") return False - for server in cls.servers: - for client in cls.clients: - cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " - "echo \"%s %s\" >> /etc/hosts; fi" - % (client, socket.gethostbyname(client), client)) - ret, _, _ = g.run(server, cmd) - if ret != 0: - g.log.error("Failed to add entry of client %s in " - "/etc/hosts of server %s" - % (client, server)) + ret = ganesha_client_firewall_settings(cls.clients) + if not ret: + g.log.error("Failed to do firewall setting in clients") + return False + for server in cls.servers: for client in cls.clients: - for server in cls.servers: - cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " - "echo \"%s %s\" >> /etc/hosts; fi" - % (server, socket.gethostbyname(server), server)) - ret, _, _ = g.run(client, cmd) - if ret != 0: - g.log.error("Failed to add entry of server %s in " - "/etc/hosts of client %s" - % (server, client)) - return True - - @classmethod - def tearDownClass(cls, delete_nfs_ganesha_cluster=True): - """Teardown nfs ganesha cluster. - """ - super(NfsGaneshaClusterSetupClass, cls).tearDownClass() - - if delete_nfs_ganesha_cluster: - ret = teardown_nfs_ganesha_cluster( - cls.servers_in_nfs_ganesha_cluster) - if not ret: - g.log.error("Teardown got failed. Hence, cleaning up " - "nfs-ganesha cluster forcefully") - ret = teardown_nfs_ganesha_cluster( - cls.servers_in_nfs_ganesha_cluster, force=True) - if not ret: - raise ExecutionError("Force cleanup of nfs-ganesha " - "cluster failed") - g.log.info("Teardown nfs ganesha cluster succeeded") - else: - g.log.info("Skipping teardown nfs-ganesha cluster...") + cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " + "echo \"%s %s\" >> /etc/hosts; fi" + % (client, socket.gethostbyname(client), client)) + ret, _, _ = g.run(server, cmd) + if ret != 0: + g.log.error("Failed to add entry of client %s in " + "/etc/hosts of server %s" + % (client, server)) + + for client in cls.clients: + for server in cls.servers: + cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " + "echo \"%s %s\" >> /etc/hosts; fi" + % (server, socket.gethostbyname(server), server)) + ret, _, _ = g.run(client, cmd) + if ret != 0: + g.log.error("Failed to add entry of server %s in " + "/etc/hosts of client %s" + % (server, client)) + return True def wait_for_nfs_ganesha_volume_to_get_exported(mnode, volname, timeout=120): diff --git a/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_ops.py b/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_ops.py index 3e53ec29d..d8486c7d2 100644..100755 --- a/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_ops.py +++ b/glustolibs-gluster/glustolibs/gluster/nfs_ganesha_ops.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2016-2017 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,10 +23,10 @@ """ import os -import random from glusto.core import Glusto as g from glustolibs.gluster.glusterdir import mkdir -from glustolibs.gluster.lib_utils import add_services_to_firewall +from glustolibs.gluster.lib_utils import (add_services_to_firewall, + is_rhel7) from glustolibs.gluster.shared_storage_ops import enable_shared_storage from glustolibs.gluster.peer_ops import peer_probe_servers @@ -50,17 +50,33 @@ def teardown_nfs_ganesha_cluster(servers, force=False): Example: teardown_nfs_ganesha_cluster(servers) """ + # Copy ganesha.conf before proceeding to clean up + for server in servers: + cmd = "cp /etc/ganesha/ganesha.conf ganesha.conf" + ret, _, _ = g.run(server, cmd) + if ret: + g.log.error("Failed to copy ganesha.conf") + if force: g.log.info("Executing force cleanup...") + cleanup_ops = ['--teardown', '--cleanup'] for server in servers: - cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --teardown " - "/var/run/gluster/shared_storage/nfs-ganesha") - _, _, _ = g.run(server, cmd) - cmd = ("/usr/libexec/ganesha/ganesha-ha.sh --cleanup /var/run/" - "gluster/shared_storage/nfs-ganesha") - _, _, _ = g.run(server, cmd) + # Perform teardown and cleanup + for op in cleanup_ops: + cmd = ("/usr/libexec/ganesha/ganesha-ha.sh {} /var/run/" + "gluster/shared_storage/nfs-ganesha".format(op)) + _, _, _ = g.run(server, cmd) + + # Stop nfs ganesha service _, _, _ = stop_nfs_ganesha_service(server) + + # Clean shared storage, ganesha.conf, and replace with backup + for cmd in ("rm -rf /var/run/gluster/shared_storage/*", + "rm -rf /etc/ganesha/ganesha.conf", + "cp ganesha.conf /etc/ganesha/ganesha.conf"): + _, _, _ = g.run(server, cmd) return True + ret, _, _ = disable_nfs_ganesha(servers[0]) if ret != 0: g.log.error("Nfs-ganesha disable failed") @@ -667,14 +683,17 @@ def create_nfs_ganesha_cluster(servers, vips): False(bool): If failed to configure ganesha cluster """ # pylint: disable=too-many-return-statements + # pylint: disable=too-many-branches + # pylint: disable=too-many-statements ganesha_mnode = servers[0] - # Configure ports in ganesha servers - g.log.info("Defining statd service ports") - ret = configure_ports_on_servers(servers) - if not ret: - g.log.error("Failed to set statd service ports on nodes.") - return False + # Configure ports in ganesha servers for RHEL7 + if is_rhel7(servers): + g.log.info("Defining statd service ports") + ret = configure_ports_on_servers(servers) + if not ret: + g.log.error("Failed to set statd service ports on nodes.") + return False # Firewall settings for nfs-ganesha ret = ganesha_server_firewall_settings(servers) @@ -752,6 +771,22 @@ def create_nfs_ganesha_cluster(servers, vips): # Create backup of ganesha-ha.conf file in ganesha_mnode g.upload(ganesha_mnode, tmp_ha_conf, '/etc/ganesha/') + # setsebool ganesha_use_fusefs on + cmd = "setsebool ganesha_use_fusefs on" + for server in servers: + ret, _, _ = g.run(server, cmd) + if ret: + g.log.error("Failed to 'setsebool ganesha_use_fusefs on' on %", + server) + return False + + # Verify ganesha_use_fusefs is on + _, out, _ = g.run(server, "getsebool ganesha_use_fusefs") + if "ganesha_use_fusefs --> on" not in out: + g.log.error("Failed to 'setsebool ganesha_use_fusefs on' on %", + server) + return False + # Enabling ganesha g.log.info("Enable nfs-ganesha") ret, _, _ = enable_nfs_ganesha(ganesha_mnode) @@ -765,6 +800,31 @@ def create_nfs_ganesha_cluster(servers, vips): # pcs status output _, _, _ = g.run(ganesha_mnode, "pcs status") + # pacemaker status output + _, _, _ = g.run(ganesha_mnode, "systemctl status pacemaker") + + return True + + +def enable_firewall(servers): + """Enables Firewall if not enabled already + Args: + servers(list): Hostname of ganesha nodes + Returns: + Status (bool) : True/False based on the status of firewall enable + """ + + cmd = "systemctl status firewalld | grep Active" + for server in servers: + ret, out, _ = g.run(server, cmd) + if 'inactive' in out: + g.log.info("Firewalld is not running. Enabling Firewalld") + for command in ("enable", "start"): + ret, out, _ = g.run(server, + "systemctl {} firewalld".format(command)) + if ret: + g.log.error("Failed to enable Firewalld on %s", server) + return False return True @@ -778,9 +838,11 @@ def ganesha_server_firewall_settings(servers): True(bool): If successfully set the firewall settings False(bool): If failed to do firewall settings """ + if not enable_firewall(servers): + return False + services = ['nfs', 'rpc-bind', 'high-availability', 'nlm', 'mountd', 'rquota'] - ret = add_services_to_firewall(servers, services, True) if not ret: g.log.error("Failed to set firewall zone permanently on ganesha nodes") @@ -852,47 +914,51 @@ def create_nfs_passwordless_ssh(mnode, gnodes, guser='root'): False(bool): On failure """ loc = "/var/lib/glusterd/nfs/" - mconn_inst = random.randint(20, 100) - mconn = g.rpyc_get_connection(host=mnode, instance=mconn_inst) - if not mconn.modules.os.path.isfile('/root/.ssh/id_rsa'): + # Check whether key is present + cmd = "[ -f /root/.ssh/id_rsa ]" + ret, _, _ = g.run(mnode, cmd) + if ret: # Generate key on mnode if not already present - if not mconn.modules.os.path.isfile('%s/secret.pem' % loc): + g.log.info("id_rsa not found") + cmd = "[ -f %s/secret.pem ]" % loc + ret, _, _ = g.run(mnode, cmd) + if ret: + g.log.info("Secret.pem file not found. Creating new") ret, _, _ = g.run( mnode, "ssh-keygen -f %s/secret.pem -q -N ''" % loc) - if ret != 0: + if ret: g.log.error("Failed to generate the secret pem file") return False g.log.info("Key generated on %s" % mnode) else: - mconn.modules.shutil.copyfile("/root/.ssh/id_rsa", - "%s/secret.pem" % loc) - g.log.info("Copying the id_rsa.pub to secret.pem.pub") - mconn.modules.shutil.copyfile("/root/.ssh/id_rsa.pub", - "%s/secret.pem.pub" % loc) + g.log.info("Found existing key") + # Copy the .pem and .pyb files + for file, to_file in (('id_rsa', 'secret.pem'), ('id_rsa.pub', + 'secret.pem.pub')): + cmd = "cp /root/.ssh/{} {}{}".format(file, loc, to_file) + ret, _, err = g.run(mnode, cmd) + if ret: + g.log.error("Failed to copy {} to {} file {}".format(file, + to_file, + err)) + return False # Create password less ssh from mnode to all ganesha nodes + cmd = "cat /root/.ssh/id_rsa.pub" + ret, id_rsa, _ = g.run(mnode, cmd, user=guser) + if ret: + g.log.info("Failed to read key from %s", mnode) + return False for gnode in gnodes: - gconn_inst = random.randint(20, 100) - gconn = g.rpyc_get_connection(gnode, user=guser, instance=gconn_inst) - try: - glocal = gconn.modules.os.path.expanduser('~') - gfhand = gconn.builtin.open("%s/.ssh/authorized_keys" % glocal, - "a") - with mconn.builtin.open("/root/.ssh/id_rsa.pub", 'r') as fhand: - for line in fhand: - gfhand.write(line) - gfhand.close() - except Exception as exep: - g.log.error("Exception occurred while trying to establish " - "password less ssh from %s@%s to %s@%s. Exception: %s" - % ('root', mnode, guser, gnode, exep)) + file = "~/.ssh/authorized_keys" + cmd = ("grep -q '{}' {} || echo '{}' >> {}" + .format(id_rsa.rstrip(), file, id_rsa.rstrip(), file)) + ret, _, _ = g.run(gnode, cmd, user=guser) + if ret: + g.log.info("Failed to add ssh key for %s", gnode) return False - finally: - g.rpyc_close_connection( - host=gnode, user=guser, instance=gconn_inst) - - g.rpyc_close_connection(host=mnode, instance=mconn_inst) + g.log.info("Successfully copied ssh key to all Ganesha nodes") # Copy the ssh key pair from mnode to all the nodes in the Ganesha-HA # cluster @@ -906,8 +972,8 @@ def create_nfs_passwordless_ssh(mnode, gnodes, guser='root'): % (loc, loc, guser, gnode, loc)) ret, _, _ = g.run(mnode, cmd) if ret != 0: - g.log.error("Failed to copy the ssh key pair from %s to %s", - mnode, gnode) + g.log.error("Failed to copy the ssh key pair from " + "%s to %s", mnode, gnode) return False return True @@ -923,7 +989,7 @@ def create_ganesha_ha_conf(hostnames, vips, temp_ha_file): """ hosts = ','.join(hostnames) - with open(temp_ha_file, 'wb') as fhand: + with open(temp_ha_file, 'w') as fhand: fhand.write('HA_NAME="ganesha-ha-360"\n') fhand.write('HA_CLUSTER_NODES="%s"\n' % hosts) for (hostname, vip) in zip(hostnames, vips): @@ -940,7 +1006,6 @@ def cluster_auth_setup(servers): True(bool): If configuration of cluster services is success False(bool): If failed to configure cluster services """ - result = True for node in servers: # Enable pacemaker.service ret, _, _ = g.run(node, "systemctl enable pacemaker.service") @@ -965,13 +1030,15 @@ def cluster_auth_setup(servers): return False # Perform cluster authentication between the nodes + auth_type = 'cluster' if is_rhel7(servers) else 'host' for node in servers: - ret, _, _ = g.run(node, "pcs cluster auth %s -u hacluster -p " - "hacluster" % ' '.join(servers)) - if ret != 0: - g.log.error("pcs cluster auth command failed on %s", node) - result = False - return result + ret, _, _ = g.run(node, "pcs %s auth %s -u hacluster -p hacluster" + % (auth_type, ' '.join(servers))) + if ret: + g.log.error("pcs %s auth command failed on %s", + auth_type, node) + return False + return True def configure_ports_on_servers(servers): diff --git a/glustolibs-gluster/glustolibs/gluster/rebalance_ops.py b/glustolibs-gluster/glustolibs/gluster/rebalance_ops.py index 1c8c10a4b..1011c89c6 100644 --- a/glustolibs-gluster/glustolibs/gluster/rebalance_ops.py +++ b/glustolibs-gluster/glustolibs/gluster/rebalance_ops.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -401,3 +401,76 @@ def get_remove_brick_status(mnode, volname, bricks_list): else: remove_brick_status[element.tag] = element.text return remove_brick_status + + +def wait_for_remove_brick_to_complete(mnode, volname, bricks_list, + timeout=1200): + """Waits for the remove brick to complete + + Args: + mnode (str): Node on which command has to be executed. + volname (str): volume name + bricks_list (str): List of bricks participating in + remove-brick operation + + Kwargs: + timeout (int): timeout value in seconds to wait for remove brick + to complete + + Returns: + True on success, False otherwise + + Examples: + >>> wait_for_remove_brick_to_complete("abc.com", "testvol") + """ + + count = 0 + while count < timeout: + status_info = get_remove_brick_status(mnode, volname, bricks_list) + if status_info is None: + return False + status = status_info['aggregate']['statusStr'] + if status == 'completed': + g.log.info("Remove brick is successfully completed in %s sec", + count) + return True + elif status == 'failed': + g.log.error(" Remove brick failed on one or more nodes. " + "Check remove brick status for more details") + return False + else: + time.sleep(10) + count += 10 + g.log.error("Remove brick operation has not completed. " + "Wait timeout is %s" % count) + return False + + +def set_rebalance_throttle(mnode, volname, throttle_type='normal'): + """Sets rebalance throttle + + Args: + mnode (str): Node on which cmd has to be executed. + volname (str): volume name + + Kwargs: + throttle_type (str): throttling type (lazy|normal|aggressive) + Defaults to 'normal' + + Returns: + tuple: Tuple containing three elements (ret, out, err). + The first element 'ret' is of type 'int' and is the return value + of command execution. + + The second element 'out' is of type 'str' and is the stdout value + of the command execution. + + The third element 'err' is of type 'str' and is the stderr value + of the command execution. + + Example: + set_rebalance_throttle(mnode, testvol, throttle_type='aggressive') + """ + cmd = ("gluster volume set {} rebal-throttle {}".format + (volname, throttle_type)) + return g.run(mnode, cmd) diff --git a/glustolibs-gluster/glustolibs/gluster/snap_ops.py b/glustolibs-gluster/glustolibs/gluster/snap_ops.py index 1e792ada7..0fba7771b 100644 --- a/glustolibs-gluster/glustolibs/gluster/snap_ops.py +++ b/glustolibs-gluster/glustolibs/gluster/snap_ops.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -859,3 +859,29 @@ def snap_deactivate(mnode, snapname): cmd = "gluster snapshot deactivate %s --mode=script" % snapname return g.run(mnode, cmd) + + +def terminate_snapd_on_node(mnode): + """Terminate snapd on the specified node + + Args: + mnode(str):node on which commands has to be executed + + Returns: + tuple: Tuple containing three elements (ret, out, err). + The first element 'ret' is of type 'int' and is the return value + of command execution. + + The second element 'out' is of type 'str' and is the stdout value + of the command execution. + + The third element 'err' is of type 'str' and is the stderr value + of the command execution. + """ + cmd = "ps aux| grep -m1 snapd | awk '{print $2}'" + _, out, _ = g.run(mnode, cmd) + if out is None: + g.log.error("Failed to get the snapd PID using command %s", cmd) + return None + cmd = "kill -9 %s" % out + return g.run(mnode, cmd) diff --git a/glustolibs-gluster/glustolibs/gluster/ssl_ops.py b/glustolibs-gluster/glustolibs/gluster/ssl_ops.py deleted file mode 100644 index f5d310d01..000000000 --- a/glustolibs-gluster/glustolibs/gluster/ssl_ops.py +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env python -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -""" - Description: Module for creating ssl machines for - validating basic ssl cases -""" - -from io import StringIO - -from glusto.core import Glusto as g - - -def create_ssl_machine(servers, clients): - """Following are the steps to create ssl machines: - - Stop glusterd on all servers - - Run: openssl genrsa -out /etc/ssl/glusterfs.key 2048 - - Run: openssl req -new -x509 -key /etc/ssl/glusterfs.key - -subj "/CN=ip's" -days 365 -out /etc/ssl/glusterfs.pem - - copy glusterfs.pem files into glusterfs.ca from all - the nodes(servers+clients) to all the servers - - touch /var/lib/glusterd/secure-access - - Start glusterd on all servers - Args: - servers: List of servers - clients: List of clients - - Returns: - bool : True if successfully created ssl machine. False otherwise. - """ - # pylint: disable=too-many-statements, too-many-branches - # pylint: disable=too-many-return-statements - # Variable to collect all servers ca_file for servers - ca_file_server = StringIO() - - # Stop glusterd on all servers - ret = g.run_parallel(servers, "systemctl stop glusterd") - if not ret: - g.log.error("Failed to stop glusterd on all servers") - return False - - # Generate key file on all servers - cmd = "openssl genrsa -out /etc/ssl/glusterfs.key 2048" - ret = g.run_parallel(servers, cmd) - if not ret: - g.log.error("Failed to create /etc/ssl/glusterfs.key " - "file on all servers") - return False - - # Generate glusterfs.pem file on all servers - for server in servers: - _, hostname, _ = g.run(server, "hostname") - cmd = ("openssl req -new -x509 -key /etc/ssl/glusterfs.key -subj " - "/CN=%s -days 365 -out /etc/ssl/glusterfs.pem" % (hostname)) - ret = g.run(server, cmd) - if not ret: - g.log.error("Failed to create /etc/ssl/glusterfs.pem " - "file on server %s", server) - return False - - # Copy glusterfs.pem file of all servers into ca_file_server - for server in servers: - conn1 = g.rpyc_get_connection(server) - if conn1 == "None": - g.log.error("Failed to get rpyc connection on %s", server) - - with conn1.builtin.open('/etc/ssl/glusterfs.pem') as fin: - ca_file_server.write(fin.read()) - - # Copy all ca_file_server for clients use - ca_file_client = ca_file_server.getvalue() - - # Generate key file on all clients - for client in clients: - _, hostname, _ = g.run(client, "hostname -s") - cmd = "openssl genrsa -out /etc/ssl/glusterfs.key 2048" - ret = g.run(client, cmd) - if not ret: - g.log.error("Failed to create /etc/ssl/glusterfs.key " - "file on client %s", client) - return False - - # Generate glusterfs.pem file on all clients - cmd = ("openssl req -new -x509 -key /etc/ssl/glusterfs.key -subj " - "/CN=%s -days 365 -out /etc/ssl/glusterfs.pem" % (client)) - ret = g.run(client, cmd) - if not ret: - g.log.error("Failed to create /etc/ssl/glusterf.pem " - "file on client %s", client) - return False - - # Copy glusterfs.pem file of client to a ca_file_server - conn2 = g.rpyc_get_connection(client) - if conn2 == "None": - g.log.error("Failed to get rpyc connection on %s", server) - with conn2.builtin.open('/etc/ssl/glusterfs.pem') as fin: - ca_file_server.write(fin.read()) - - # Copy glusterfs.pem file to glusterfs.ca of client such that - # clients shouldn't share respectives ca file each other - cmd = "cp /etc/ssl/glusterfs.pem /etc/ssl/glusterfs.ca" - ret, _, _ = g.run(client, cmd) - if ret != 0: - g.log.error("Failed to copy the glusterfs.pem to " - "glusterfs.ca of client") - return False - - # Now copy the ca_file of all servers to client ca file - with conn2.builtin.open('/etc/ssl/glusterfs.ca', 'a') as fout: - fout.write(ca_file_client) - - # Create /var/lib/glusterd directory on clients - ret = g.run(client, "mkdir -p /var/lib/glusterd/") - if not ret: - g.log.error("Failed to create directory /var/lib/glusterd/" - " on clients") - - # Copy ca_file_server to all servers - for server in servers: - conn3 = g.rpyc_get_connection(server) - if conn3 == "None": - g.log.error("Failed to get rpyc connection on %s", server) - - with conn3.builtin.open('/etc/ssl/glusterfs.ca', 'w') as fout: - fout.write(ca_file_server.getvalue()) - - # Touch /var/lib/glusterd/secure-access on all servers - ret = g.run_parallel(servers, "touch /var/lib/glusterd/secure-access") - if not ret: - g.log.error("Failed to touch the file on servers") - return False - - # Touch /var/lib/glusterd/secure-access on all clients - ret = g.run_parallel(clients, "touch /var/lib/glusterd/secure-access") - if not ret: - g.log.error("Failed to touch the file on clients") - return False - - # Start glusterd on all servers - ret = g.run_parallel(servers, "systemctl start glusterd") - if not ret: - g.log.error("Failed to stop glusterd on servers") - return False - - return True - - -def cleanup_ssl_setup(servers, clients): - """ - Following are the steps to cleanup ssl setup: - - Stop glusterd on all servers - - Remove folder /etc/ssl/* - - Remove /var/lib/glusterd/* - - Start glusterd on all servers - - Args: - servers: List of servers - clients: List of clients - - Returns: - bool : True if successfully cleaned ssl machine. False otherwise. - """ - # pylint: disable=too-many-return-statements - _rc = True - - # Stop glusterd on all servers - ret = g.run_parallel(servers, "systemctl stop glusterd") - if not ret: - _rc = False - g.log.error("Failed to stop glusterd on all servers") - - # Remove glusterfs.key, glusterfs.pem and glusterfs.ca file - # from all servers - cmd = "rm -rf /etc/ssl/glusterfs*" - ret = g.run_parallel(servers, cmd) - if not ret: - _rc = False - g.log.error("Failed to remove folder /etc/ssl/glusterfs* " - "on all servers") - - # Remove folder /var/lib/glusterd/secure-access from servers - cmd = "rm -rf /var/lib/glusterd/secure-access" - ret = g.run_parallel(servers, cmd) - if not ret: - _rc = False - g.log.error("Failed to remove folder /var/lib/glusterd/secure-access " - "on all servers") - - # Remove glusterfs.key, glusterfs.pem and glusterfs.ca file - # from all clients - cmd = "rm -rf /etc/ssl/glusterfs*" - ret = g.run_parallel(clients, cmd) - if not ret: - _rc = False - g.log.error("Failed to remove folder /etc/ssl/glusterfs* " - "on all clients") - - # Remove folder /var/lib/glusterd/secure-access from clients - cmd = "rm -rf /var/lib/glusterd/secure-access" - ret = g.run_parallel(clients, cmd) - if not ret: - _rc = False - g.log.error("Failed to remove folder /var/lib/glusterd/secure-access " - "on all clients") - - # Start glusterd on all servers - ret = g.run_parallel(servers, "systemctl start glusterd") - if not ret: - _rc = False - g.log.error("Failed to stop glusterd on servers") - - return _rc diff --git a/glustolibs-gluster/glustolibs/gluster/tiering_ops.py b/glustolibs-gluster/glustolibs/gluster/tiering_ops.py deleted file mode 100644 index 357b3d471..000000000 --- a/glustolibs-gluster/glustolibs/gluster/tiering_ops.py +++ /dev/null @@ -1,1023 +0,0 @@ -#!/usr/bin/env python -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -""" - Description: Library for gluster tiering operations. -""" - -import re -import time -from glusto.core import Glusto as g -from glustolibs.gluster.peer_ops import peer_probe_servers -from glustolibs.gluster.gluster_init import start_glusterd -from glustolibs.gluster.lib_utils import list_files - -try: - import xml.etree.cElementTree as etree -except ImportError: - import xml.etree.ElementTree as etree - - -def add_extra_servers_to_cluster(mnode, extra_servers): - """Adds the given extra servers to cluster - - Args: - mnode (str): Node on which cmd has to be executed. - extra_servers (str|list) : A server|list of extra servers to be - attached to cluster - - Returns: - bool: True, if extra servers are attached to cluster - False, otherwise - - Example: - add_extra_servers_to_cluster("abc.com", ['peer_node1','peer_node2']) - """ - - if not isinstance(extra_servers, list): - extra_servers = [extra_servers] - - ret = start_glusterd(servers=extra_servers) - if not ret: - g.log.error("glusterd did not start in peer nodes") - return False - - ret = peer_probe_servers(mnode, servers=extra_servers) - if not ret: - g.log.error("Unable to do peer probe on extra server machines") - return False - - return True - - -def tier_attach(mnode, volname, num_bricks_to_add, extra_servers, - extra_servers_info, replica=1, force=False): - """Attaches tier to the volume - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - num_bricks_to_add (str): number of bricks to be added as hot tier - extra_servers (str|list): from this server|these servers, - hot tier will be added to volume - extra_servers_info (dict): dict of server info of each extra servers - - Kwargs: - replica (str): replica count of the hot tier - force (bool): If this option is set to True, then attach tier - will get executed with force option. If it is set to False, - then attach tier will get executed without force option - - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_attach("abc.com", testvol, '2', ['extra_server1','extra_server2'], - extra_server_info) - """ - if not isinstance(extra_servers, list): - extra_servers = [extra_servers] - - replica = int(replica) - repc = '' - if replica != 1: - repc = "replica %d" % replica - - frce = '' - if force: - frce = 'force' - - num_bricks_to_add = int(num_bricks_to_add) - - from glustolibs.gluster.lib_utils import form_bricks_list - bricks_list = form_bricks_list(mnode, volname, num_bricks_to_add, - extra_servers[:], extra_servers_info) - if bricks_list is None: - g.log.error("number of bricks required are greater than " - "unused bricks") - return (-1, '', '') - - bricks_path = ' '.join(bricks_list) - bricks_path = [re.sub(r"(.*\/\S+\_)brick(\d+)", r"\1tier\2", item) - for item in bricks_path.split() if item] - tier_bricks_path = " ".join(bricks_path) - cmd = ("gluster volume tier %s attach %s %s %s --mode=script" - % (volname, repc, tier_bricks_path, frce)) - - return g.run(mnode, cmd) - - -def tier_start(mnode, volname, force=False): - """Starts the tier volume - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - - Kwargs: - force (bool): If this option is set to True, then attach tier - will get executed with force option. If it is set to False, - then attach tier will get executed without force option - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_start("abc.com", testvol) - """ - - frce = '' - if force: - frce = 'force' - - cmd = ("gluster volume tier %s start %s --mode=script" - % (volname, frce)) - return g.run(mnode, cmd) - - -def tier_status(mnode, volname): - """executes tier status command - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_status("abc.com", testvol) - """ - - cmd = "gluster volume tier %s status" % volname - ret = g.run(mnode, cmd) - - return ret - - -def get_tier_status(mnode, volname): - """Parse the output of 'gluster tier status' command. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - dict: dict on success. - - Examples: - >>> get_tier_status('abc.lab.eng.xyz.com', 'testvol') - {'node': [{'promotedFiles': '0', 'demotedFiles': '0', 'nodeName': - 'localhost', 'statusStr': 'in progress'}, {'promotedFiles': '0', - 'demotedFiles': '0', 'nodeName': '10.70.47.16', 'statusStr': - 'in progress'}], 'task-id': '2ed28cbd-4246-493a-87b8-1fdcce313b34', - 'nodeCount': '4', 'op': '7'} - """ - - cmd = "gluster volume tier %s status --xml" % volname - ret, out, _ = g.run(mnode, cmd) - if ret != 0: - g.log.error("Failed to execute 'tier status' on node %s. " - "Hence failed to get tier status.", mnode) - return None - - try: - root = etree.XML(out) - except etree.ParseError: - g.log.error("Failed to parse the gluster tier status xml output.") - return None - - tier_status = {} - tier_status["node"] = [] - for info in root.findall("volRebalance"): - for element in info.getchildren(): - if element.tag == "node": - status_info = {} - for elmt in element.getchildren(): - status_info[elmt.tag] = elmt.text - tier_status[element.tag].append(status_info) - else: - tier_status[element.tag] = element.text - return tier_status - - -def tier_detach_start(mnode, volname): - """starts detaching tier on given volume - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_detach_start("abc.com", testvol) - - """ - - cmd = "gluster volume tier %s detach start --mode=script" % volname - return g.run(mnode, cmd) - - -def tier_detach_status(mnode, volname): - """executes detach tier status on given volume - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_detach_status("abc.com", testvol) - - """ - - cmd = "gluster volume tier %s detach status --mode=script" % volname - return g.run(mnode, cmd) - - -def tier_detach_stop(mnode, volname): - """stops detaching tier on given volume - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_detach_stop("abc.com", testvol) - - """ - - cmd = "gluster volume tier %s detach stop --mode=script" % volname - return g.run(mnode, cmd) - - -def tier_detach_commit(mnode, volname): - """commits detach tier on given volume - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_detach_commit("abc.com", testvol) - - """ - - cmd = "gluster volume tier %s detach commit --mode=script" % volname - return g.run(mnode, cmd) - - -def tier_detach_force(mnode, volname): - """detaches tier forcefully on given volume - - Args: - mnode (str): Node on which cmd has to be executed. - volname (str): volume name - - Returns: - tuple: Tuple containing three elements (ret, out, err). - The first element 'ret' is of type 'int' and is the return value - of command execution. - - The second element 'out' is of type 'str' and is the stdout value - of the command execution. - - The third element 'err' is of type 'str' and is the stderr value - of the command execution. - - Example: - tier_detach_force("abc.com", testvol) - - """ - - cmd = "gluster volume tier %s detach force --mode=script" % volname - return g.run(mnode, cmd) - - -def get_detach_tier_status(mnode, volname): - """Parse the output of 'gluster volume tier detach status' command. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - dict: dict on success. - - Examples: - >>> get_detach_tier_status('abc.lab.eng.xyz.com', "testvol") - {'node': [{'files': '0', 'status': '3', 'lookups': '1', 'skipped': '0', - 'nodeName': 'localhost', 'failures': '0', 'runtime': '0.00', 'id': - '11336017-9561-4e88-9ac3-a94d4b403340', 'statusStr': 'completed', - 'size': '0'}, {'files': '0', 'status': '3', 'lookups': '0', 'skipped': - '0', 'nodeName': '10.70.47.16', 'failures': '0', 'runtime': '0.00', - 'id': 'a2b88b10-eba2-4f97-add2-8dc37df08b27', 'statusStr': 'completed', - 'size': '0'}], 'nodeCount': '4', 'aggregate': {'files': '0', 'status': - '3', 'lookups': '1', 'skipped': '0', 'failures': '0', 'runtime': '0.0', - 'statusStr': 'completed', 'size': '0'}} - """ - - cmd = "gluster volume tier %s detach status --xml" % volname - ret, out, _ = g.run(mnode, cmd) - if ret != 0: - g.log.error("Failed to execute 'detach tier status' on node %s. " - "Hence failed to get detach tier status.", mnode) - return None - - try: - root = etree.XML(out) - except etree.ParseError: - g.log.error("Failed to parse the detach tier status xml output.") - return None - - tier_status = {} - tier_status["node"] = [] - for info in root.findall("volDetachTier"): - for element in info.getchildren(): - if element.tag == "node": - status_info = {} - for elmt in element.getchildren(): - status_info[elmt.tag] = elmt.text - tier_status[element.tag].append(status_info) - elif element.tag == "aggregate": - status_info = {} - for elmt in element.getchildren(): - status_info[elmt.tag] = elmt.text - tier_status[element.tag] = status_info - else: - tier_status[element.tag] = element.text - return tier_status - - -def tier_detach_start_and_get_taskid(mnode, volname): - """Parse the output of 'gluster volume tier detach start' command. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - dict: dict on success. - - Examples: - >>> tier_detach_start_and_get_taskid('abc.lab.eng.xyz.com', - "testvol") - {'task-id': '8020835c-ff0d-4ea1-9f07-62dd067e92d4'} - """ - - cmd = "gluster volume tier %s detach start --xml" % volname - ret, out, _ = g.run(mnode, cmd) - if ret != 0: - g.log.error("Failed to execute 'detach tier start' on node %s. " - "Hence failed to parse the detach tier start.", mnode) - return None - - try: - root = etree.XML(out) - except etree.ParseError: - g.log.error("Failed to parse the gluster detach tier " - "start xml output.") - return None - - tier_status = {} - for info in root.findall("volDetachTier"): - for element in info.getchildren(): - tier_status[element.tag] = element.text - return tier_status - - -def tier_detach_stop_and_get_status(mnode, volname): - """Parse the output of 'gluster volume tier detach stop' command. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - dict: dict on success. - - Examples: - >>> tier_detach_stop_and_get_status('abc.lab.eng.xyz.com', - "testvol") - {'node': [{'files': '0', 'status': '3', 'lookups': '1', 'skipped': '0', - 'nodeName': 'localhost', 'failures': '0', 'runtime': '0.00', 'id': - '11336017-9561-4e88-9ac3-a94d4b403340', 'statusStr': 'completed', - 'size': '0'}, {'files': '0', 'status': '3', 'lookups': '0', 'skipped': - '0', 'nodeName': '10.70.47.16', 'failures': '0', 'runtime': '0.00', - 'id': 'a2b88b12-eba2-4f97-add2-8dc37df08b27', 'statusStr': 'completed', - 'size': '0'}], 'nodeCount': '4', 'aggregate': {'files': '0', 'status': - '3', 'lookups': '1', 'skipped': '0', 'failures': '0', 'runtime': '0.0', - 'statusStr': 'completed', 'size': '0'}} - """ - - cmd = "gluster volume tier %s detach stop --xml" % volname - ret, out, _ = g.run(mnode, cmd) - if ret != 0: - g.log.error("Failed to execute 'tier start' on node %s. " - "Hence failed to parse the tier start.", mnode) - return None - - try: - root = etree.XML(out) - except etree.ParseError: - g.log.error("Failed to parse the gluster detach tier stop" - " xml output.") - return None - - tier_status = {} - tier_status["node"] = [] - for info in root.findall("volDetachTier"): - for element in info.getchildren(): - if element.tag == "node": - status_info = {} - for elmt in element.getchildren(): - status_info[elmt.tag] = elmt.text - tier_status[element.tag].append(status_info) - elif element.tag == "aggregate": - status_info = {} - for elmt in element.getchildren(): - status_info[elmt.tag] = elmt.text - tier_status[element.tag] = status_info - else: - tier_status[element.tag] = element.text - return tier_status - - -def wait_for_detach_tier_to_complete(mnode, volname, timeout=300): - """Waits for the detach tier to complete - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Kwargs: - timeout (int): timeout value to wait for detach tier to complete - - Returns: - True on success, False otherwise - - Examples: - >>> wait_for_detach_tier_to_complete(mnode, "testvol") - """ - - count = 0 - flag = 0 - while (count < timeout): - status_info = get_detach_tier_status(mnode, volname) - if status_info is None: - return False - - status = status_info['aggregate']['statusStr'] - if status == 'completed': - flag = 1 - break - - time.sleep(10) - count = count + 10 - if not flag: - g.log.error("detach tier is not completed") - return False - else: - g.log.info("detach tier is successfully completed") - return True - - -def get_files_from_hot_tier(mnode, volname): - """Lists files from hot tier for the given volume - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - Emptylist: if there are no files in hot tier. - list: list of files in hot tier on success. - - Examples: - >>>get_files_from_hot_tier(mnode, "testvol") - """ - - files = [] - from glustolibs.gluster.volume_libs import get_subvols - subvols = get_subvols(mnode, volname) - for subvol in subvols['hot_tier_subvols']: - info = subvol[0].split(':') - file_list = list_files(info[0], info[1]) - for file in file_list: - if ".glusterfs" not in file: - files.append(file) - - return files - - -def get_files_from_cold_tier(mnode, volname): - """Lists files from cold tier for the given volume - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - Emptylist: if there are no files in cold tier. - list: list of files in cold tier on success. - - Examples: - >>>get_files_from_hot_tier("testvol") - """ - - files = [] - from glustolibs.gluster.volume_libs import get_subvols - subvols = get_subvols(mnode, volname) - for subvol in subvols['cold_tier_subvols']: - info = subvol[0].split(':') - file_list = list_files(info[0], info[1]) - for file in file_list: - if ".glusterfs" not in file: - files.append(file) - - return files - - -def get_tier_promote_frequency(mnode, volname): - """Gets tier promote frequency value for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: promote frequency value on success. - - Examples: - >>>get_tier_promote_frequency("abc.com", "testvol") - """ - - from glustolibs.gluster.volume_ops import get_volume_options - vol_options = get_volume_options(mnode, volname) - if vol_options is None: - g.log.error("Failed to get volume options") - return None - - return vol_options['cluster.tier-promote-frequency'] - - -def get_tier_demote_frequency(mnode, volname): - """Gets tier demote frequency value for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: demote frequency value on success. - - Examples: - >>>get_tier_demote_frequency("abc.com", "testvol") - """ - - from glustolibs.gluster.volume_ops import get_volume_options - vol_options = get_volume_options(mnode, volname) - if vol_options is None: - g.log.error("Failed to get volume options") - return None - - return vol_options['cluster.tier-demote-frequency'] - - -def get_tier_mode(mnode, volname): - """Gets tier mode for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: tier mode on success. - - Examples: - >>>get_tier_mode("testvol") - """ - - from glustolibs.gluster.volume_ops import get_volume_options - vol_options = get_volume_options(mnode, volname) - if vol_options is None: - g.log.error("Failed to get volume options") - return None - - return vol_options['cluster.tier-mode'] - - -def get_tier_max_mb(mnode, volname): - """Gets tier max mb for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: tier max mb on success. - - Examples: - >>>get_tier_max_mb("abc.com", "testvol") - """ - - from glustolibs.gluster.volume_ops import get_volume_options - vol_options = get_volume_options(mnode, volname) - if vol_options is None: - g.log.error("Failed to get volume options") - return None - - return vol_options['cluster.tier-max-mb'] - - -def get_tier_max_files(mnode, volname): - """Gets tier max files for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: tier max files on success. - - Examples: - >>>get_tier_max_files("abc.com", "testvol") - """ - - from glustolibs.gluster.volume_ops import get_volume_options - vol_options = get_volume_options(mnode, volname) - if vol_options is None: - g.log.error("Failed to get volume options") - return None - - return vol_options['cluster.tier-max-files'] - - -def get_tier_watermark_high_limit(mnode, volname): - """Gets tier watermark high limit for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: tier watermark high limit on success. - - Examples: - >>>get_tier_watermark_high_limit(mnode, "testvol") - """ - - from glustolibs.gluster.volume_ops import get_volume_options - vol_options = get_volume_options(mnode, volname) - if vol_options is None: - g.log.error("Failed to get volume options") - return None - - return vol_options['cluster.watermark-hi'] - - -def get_tier_watermark_low_limit(mnode, volname): - """Gets tier watermark low limit for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: tier watermark low limit on success. - - Examples: - >>>get_tier_watermark_low_limit("abc.com", "testvol") - """ - - from glustolibs.gluster.volume_ops import get_volume_options - vol_options = get_volume_options(mnode, volname) - if vol_options is None: - g.log.error("Failed to get volume options") - return None - - return vol_options['cluster.watermark-low'] - - -def set_tier_promote_frequency(mnode, volname, value): - """Sets tier promote frequency value for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - value (str): promote frequency value - - Returns: - bool: True on success, False Otherwise - - Examples: - >>>set_tier_promote_frequency("abc.com", "testvol", '1000') - """ - - option = {'cluster.tier-promote-frequency': value} - - from glustolibs.gluster.volume_ops import set_volume_options - if not set_volume_options(mnode, volname, - options=option): - g.log.error("Failed to set promote frequency to %s" - % value) - return False - - return True - - -def set_tier_demote_frequency(mnode, volname, value): - """Sets tier demote frequency value for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - value (str): demote frequency value - - Returns: - bool: True on success, False Otherwise - - Examples: - >>>set_tier_demote_frequency("abc.com", "testvol", "500") - """ - - option = {'cluster.tier-demote-frequency': value} - - from glustolibs.gluster.volume_ops import set_volume_options - if not set_volume_options(mnode, volname, - options=option): - g.log.error("Failed to set demote frequency to %s" - % value) - return False - - return True - - -def set_tier_mode(mnode, volname, value): - """Sets tier mode for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - value (str): tier mode value - - Returns: - bool: True on success, False Otherwise - - Examples: - >>>set_tier_mode("abc.com", "testvol", "cache") - """ - - option = {'cluster.tier-mode': value} - - from glustolibs.gluster.volume_ops import set_volume_options - if not set_volume_options(mnode, volname, - options=option): - g.log.error("Failed to set tier mode to %s" - % value) - return False - - return True - - -def set_tier_max_mb(mnode, volname, value): - """Sets tier max mb for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - value (str): tier mode value - - Returns: - bool: True on success, False Otherwise - - Examples: - >>>set_tier_max_mb("abc.com", "testvol", "50") - """ - - option = {'cluster.tier-max-mb': value} - - from glustolibs.gluster.volume_ops import set_volume_options - if not set_volume_options(mnode, volname, - options=option): - g.log.error("Failed to set tier max mb to %s" - % value) - return False - - return True - - -def set_tier_max_files(mnode, volname, value): - """Sets tier max files for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - value (str): tier mode value - - Returns: - bool: True on success, False Otherwise - - Examples: - >>>set_tier_max_files("abc.com", "testvol", "10") - """ - - option = {'cluster.tier-max-files': value} - - from glustolibs.gluster.volume_ops import set_volume_options - if not set_volume_options(mnode, volname, - options=option): - g.log.error("Failed to set tier max files to %s" - % value) - return False - - return True - - -def set_tier_watermark_high_limit(mnode, volname, value): - """Sets tier watermark high limit for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - value (str): tier mode value - - Returns: - bool: True on success, False Otherwise - - Examples: - >>>set_tier_watermark_high_limit("abc.com", "testvol", "95") - """ - - option = {'cluster.watermark-hi': value} - - from glustolibs.gluster.volume_ops import set_volume_options - if not set_volume_options(mnode, volname, - options=option): - g.log.error("Failed to set tier watermark high limit to %s" - % value) - return False - - return True - - -def set_tier_watermark_low_limit(mnode, volname, value): - """Sets tier watermark low limit for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - value (str): tier mode value - - Returns: - bool: True on success, False Otherwise - - Examples: - >>>set_tier_watermark_low_limit("abc.com", "testvol", "40") - """ - - option = {'cluster.watermark-low': value} - - from glustolibs.gluster.volume_ops import set_volume_options - if not set_volume_options(mnode, volname, - options=option): - g.log.error("Failed to set tier watermark low limit to %s" - % value) - return False - - return True - - -def get_tier_pid(mnode, volname): - """Gets tier pid for given volume. - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - NoneType: None if command execution fails, parse errors. - str: pid of tier process on success. - - Examples: - >>>get_tier_pid("abc.xyz.com", "testvol") - """ - - cmd = ("ps -ef | grep -v grep | grep '/var/log/glusterfs/%s-tier.log' |" - "awk '{print $2}'" % volname) - ret, out, err = g.run(mnode, cmd) - if ret != 0: - g.log.error("Failed to execute 'ps' cmd") - return None - - return out.strip("\n") - - -def is_tier_process_running(mnode, volname): - """Checks whether tier process is running - - Args: - mnode (str): Node on which command has to be executed. - volname (str): volume name - - Returns: - True on success, False otherwise - - Examples: - >>>is_tier_process_running("abc.xyz.com", "testvol") - """ - - pid = get_tier_pid(mnode, volname) - if pid == '': - return False - return True diff --git a/glustolibs-gluster/glustolibs/gluster/volume_libs.py b/glustolibs-gluster/glustolibs/gluster/volume_libs.py index a5e54101e..87e70ca8c 100644 --- a/glustolibs-gluster/glustolibs/gluster/volume_libs.py +++ b/glustolibs-gluster/glustolibs/gluster/volume_libs.py @@ -31,9 +31,6 @@ from glustolibs.gluster.volume_ops import (volume_create, volume_start, volume_info, volume_status, get_volume_options, get_volume_list) -from glustolibs.gluster.tiering_ops import (add_extra_servers_to_cluster, - tier_attach, - is_tier_process_running) from glustolibs.gluster.quota_ops import (quota_enable, quota_limit_usage, is_quota_enabled) from glustolibs.gluster.uss_ops import enable_uss, is_uss_enabled @@ -67,7 +64,7 @@ def volume_exists(mnode, volname): def setup_volume(mnode, all_servers_info, volume_config, multi_vol=False, - force=False): + force=False, create_only=False): """Setup Volume with the configuration defined in volume_config Args: @@ -101,11 +98,6 @@ def setup_volume(mnode, all_servers_info, volume_config, multi_vol=False, 'size': '100GB'}, 'enable': False}, 'uss': {'enable': False}, - 'tier': {'create_tier': True, - 'tier_type': {'type': 'distributed-replicated', - 'replica_count': 2, - 'dist_count': 2, - 'transport': 'tcp'}}, 'options': {'performance.readdir-ahead': True} } Kwargs: @@ -115,7 +107,11 @@ def setup_volume(mnode, all_servers_info, volume_config, multi_vol=False, force (bool): If this option is set to True, then volume creation command is executed with force option. False, without force option. - By default, value is set to False + By default, value is set to False. + create_only(bool): True, if only volume creation is needed. + False, will do volume create, start, set operation + if any provided in the volume_config. + By default, value is set to False. Returns: bool : True on successful setup. False Otherwise @@ -128,8 +124,8 @@ def setup_volume(mnode, all_servers_info, volume_config, multi_vol=False, return False # Check if the volume already exists - volinfo = get_volume_info(mnode=mnode) - if volinfo is not None and volname in volinfo.keys(): + vollist = get_volume_list(mnode=mnode) + if vollist is not None and volname in vollist: g.log.info("volume %s already exists. Returning...", volname) return True @@ -293,6 +289,25 @@ def setup_volume(mnode, all_servers_info, volume_config, multi_vol=False, g.log.error("Unable to create volume %s", volname) return False + if create_only and (ret == 0): + g.log.info("Volume creation of {} is done successfully".format( + volname)) + return True + + is_ganesha = False + if 'nfs_ganesha' in volume_config: + is_ganesha = bool(volume_config['nfs_ganesha']['enable']) + + if not is_ganesha: + # Set all the volume options: + if 'options' in volume_config: + volume_options = volume_config['options'] + ret = set_volume_options(mnode=mnode, volname=volname, + options=volume_options) + if not ret: + g.log.error("Unable to set few volume options") + return False + # Start Volume time.sleep(2) ret = volume_start(mnode, volname) @@ -300,68 +315,6 @@ def setup_volume(mnode, all_servers_info, volume_config, multi_vol=False, g.log.error("volume start %s failed", volname) return False - # Create Tier volume - if ('tier' in volume_config and 'create_tier' in volume_config['tier'] and - volume_config['tier']['create_tier']): - # get servers info for tier attach - if ('extra_servers' in volume_config and - volume_config['extra_servers']): - extra_servers = volume_config['extra_servers'] - ret = add_extra_servers_to_cluster(mnode, extra_servers) - if not ret: - return False - else: - extra_servers = volume_config['servers'] - - # get the tier volume type - if 'tier_type' in volume_config['tier']: - if 'type' in volume_config['tier']['tier_type']: - tier_volume_type = volume_config['tier']['tier_type']['type'] - dist = rep = 1 - if tier_volume_type == 'distributed': - if 'dist_count' in volume_config['tier']['tier_type']: - dist = (volume_config['tier']['tier_type'] - ['dist_count']) - - elif tier_volume_type == 'replicated': - if 'replica_count' in volume_config['tier']['tier_type']: - rep = (volume_config['tier']['tier_type'] - ['replica_count']) - - elif tier_volume_type == 'distributed-replicated': - if 'dist_count' in volume_config['tier']['tier_type']: - dist = (volume_config['tier']['tier_type'] - ['dist_count']) - if 'replica_count' in volume_config['tier']['tier_type']: - rep = (volume_config['tier']['tier_type'] - ['replica_count']) - else: - tier_volume_type = 'distributed' - dist = 1 - rep = 1 - number_of_bricks = dist * rep - - # Attach Tier - ret, _, _ = tier_attach(mnode=mnode, volname=volname, - extra_servers=extra_servers, - extra_servers_info=all_servers_info, - num_bricks_to_add=number_of_bricks, - replica=rep) - if ret != 0: - g.log.error("Unable to attach tier") - return False - - time.sleep(30) - # Check if tier is running - _rc = True - for server in extra_servers: - ret = is_tier_process_running(server, volname) - if not ret: - g.log.error("Tier process not running on %s", server) - _rc = False - if not _rc: - return False - # Enable Quota if ('quota' in volume_config and 'enable' in volume_config['quota'] and volume_config['quota']['enable']): @@ -411,20 +364,22 @@ def setup_volume(mnode, all_servers_info, volume_config, multi_vol=False, g.log.error("USS is not enabled on the volume %s", volname) return False - # Set all the volume options: - if 'options' in volume_config: - volume_options = volume_config['options'] - ret = set_volume_options(mnode=mnode, volname=volname, - options=volume_options) - if not ret: - g.log.error("Unable to set few volume options") - return False + if is_ganesha: + # Set all the volume options for NFS Ganesha + if 'options' in volume_config: + volume_options = volume_config['options'] + ret = set_volume_options(mnode=mnode, volname=volname, + options=volume_options) + if not ret: + g.log.error("Unable to set few volume options") + return False + return True def bulk_volume_creation(mnode, number_of_volumes, servers_info, volume_config, vol_prefix="mult_vol_", - is_force=False): + is_force=False, is_create_only=False): """ Creates the number of volumes user has specified @@ -438,7 +393,11 @@ def bulk_volume_creation(mnode, number_of_volumes, servers_info, Kwargs: vol_prefix (str): Prefix to be added to the volume name. is_force (bool): True, If volume create command need to be executed - with force, False Otherwise. Defaults to False + with force, False Otherwise. Defaults to False. + create_only(bool): True, if only volume creation is needed. + False, will do volume create, start, set operation + if any provided in the volume_config. + By default, value is set to False. Returns: bool: True on successful bulk volume creation, False Otherwise. @@ -468,7 +427,7 @@ def bulk_volume_creation(mnode, number_of_volumes, servers_info, for volume in range(number_of_volumes): volume_config['name'] = vol_prefix + volume_name + str(volume) ret = setup_volume(mnode, servers_info, volume_config, multi_vol=True, - force=is_force) + force=is_force, create_only=is_create_only) if not ret: g.log.error("Volume creation failed for the volume %s" % volume_config['name']) @@ -617,77 +576,11 @@ def get_subvols(mnode, volname): get_subvols("abc.xyz.com", "testvol") """ - subvols = { - 'is_tier': False, - 'hot_tier_subvols': [], - 'cold_tier_subvols': [], - 'volume_subvols': [] - } + subvols = {'volume_subvols': []} + volinfo = get_volume_info(mnode, volname) if volinfo is not None: voltype = volinfo[volname]['typeStr'] - if voltype == 'Tier': - # Set is_tier to True - subvols['is_tier'] = True - - # Get hot tier subvols - hot_tier_type = (volinfo[volname]["bricks"] - ['hotBricks']['hotBrickType']) - tmp = volinfo[volname]["bricks"]['hotBricks']["brick"] - hot_tier_bricks = [x["name"] for x in tmp if "name" in x] - if hot_tier_type == 'Distribute': - for brick in hot_tier_bricks: - subvols['hot_tier_subvols'].append([brick]) - - elif (hot_tier_type == 'Replicate' or - hot_tier_type == 'Distributed-Replicate'): - rep_count = int( - (volinfo[volname]["bricks"]['hotBricks'] - ['numberOfBricks']).split("=", 1)[0].split("x")[1].strip() - ) - subvol_list = ( - [hot_tier_bricks[i:i + rep_count] - for i in range(0, len(hot_tier_bricks), rep_count)]) - subvols['hot_tier_subvols'] = subvol_list - - # Get cold tier subvols - cold_tier_type = (volinfo[volname]["bricks"]['coldBricks'] - ['coldBrickType']) - tmp = volinfo[volname]["bricks"]['coldBricks']["brick"] - cold_tier_bricks = [x["name"] for x in tmp if "name" in x] - - # Distribute volume - if cold_tier_type == 'Distribute': - for brick in cold_tier_bricks: - subvols['cold_tier_subvols'].append([brick]) - - # Replicate or Distribute-Replicate volume - elif (cold_tier_type == 'Replicate' or - cold_tier_type == 'Distributed-Replicate'): - rep_count = int( - (volinfo[volname]["bricks"]['coldBricks'] - ['numberOfBricks']).split("=", 1)[0].split("x")[1].strip() - ) - subvol_list = ( - [cold_tier_bricks[i:i + rep_count] - for i in range(0, len(cold_tier_bricks), rep_count)]) - subvols['cold_tier_subvols'] = subvol_list - - # Disperse or Distribute-Disperse volume - elif (cold_tier_type == 'Disperse' or - cold_tier_type == 'Distributed-Disperse'): - disp_count = sum( - [int(nums) for nums in ( - (volinfo[volname]["bricks"]['coldBricks'] - ['numberOfBricks']).split("x", 1)[1]. - strip().split("=")[0].strip().strip("()"). - split()) if nums.isdigit()]) - subvol_list = [cold_tier_bricks[i:i + disp_count] - for i in range(0, len(cold_tier_bricks), - disp_count)] - subvols['cold_tier_subvols'] = subvol_list - return subvols - tmp = volinfo[volname]["bricks"]["brick"] bricks = [x["name"] for x in tmp if "name" in x] if voltype == 'Replicate' or voltype == 'Distributed-Replicate': @@ -708,29 +601,6 @@ def get_subvols(mnode, volname): return subvols -def is_tiered_volume(mnode, volname): - """Check if volume is tiered volume. - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - bool : True if the volume is tiered volume. False otherwise - NoneType: None if volume does not exist. - """ - volinfo = get_volume_info(mnode, volname) - if volinfo is None: - g.log.error("Unable to get the volume info for volume %s", volname) - return None - - voltype = volinfo[volname]['typeStr'] - if voltype == 'Tier': - return True - else: - return False - - def is_distribute_volume(mnode, volname): """Check if volume is a plain distributed volume @@ -747,20 +617,10 @@ def is_distribute_volume(mnode, volname): g.log.error("Unable to check if the volume %s is distribute", volname) return False - if volume_type_info['is_tier']: - hot_tier_type = (volume_type_info['hot_tier_type_info'] - ['hotBrickType']) - cold_tier_type = (volume_type_info['cold_tier_type_info'] - ['coldBrickType']) - if hot_tier_type == 'Distribute' and cold_tier_type == 'Distribute': - return True - else: - return False + if volume_type_info['volume_type_info']['typeStr'] == 'Distribute': + return True else: - if volume_type_info['volume_type_info']['typeStr'] == 'Distribute': - return True - else: - return False + return False def get_volume_type_info(mnode, volname): @@ -774,9 +634,6 @@ def get_volume_type_info(mnode, volname): dict : Dict containing the keys, values defining the volume type: Example: volume_type_info = { - 'is_tier': False, - 'hot_tier_type_info': {}, - 'cold_tier_type_info': {}, 'volume_type_info': { 'typeStr': 'Disperse', 'replicaCount': '1', @@ -788,18 +645,6 @@ def get_volume_type_info(mnode, volname): } volume_type_info = { - 'is_tier': True, - 'hot_tier_type_info': { - 'hotBrickType': 'Distribute', - 'hotreplicaCount': '1' - }, - 'cold_tier_type_info': { - 'coldBrickType': 'Disperse', - 'coldreplicaCount': '1', - 'coldarbiterCount': '0', - 'colddisperseCount': '3', - 'numberOfBricks':1 - }, 'volume_type_info': {} @@ -810,138 +655,26 @@ def get_volume_type_info(mnode, volname): g.log.error("Unable to get the volume info for volume %s", volname) return None - volume_type_info = { - 'is_tier': False, - 'hot_tier_type_info': {}, - 'cold_tier_type_info': {}, - 'volume_type_info': {} - } - - voltype = volinfo[volname]['typeStr'] - if voltype == 'Tier': - volume_type_info['is_tier'] = True - - hot_tier_type_info = get_hot_tier_type_info(mnode, volname) - volume_type_info['hot_tier_type_info'] = hot_tier_type_info - - cold_tier_type_info = get_cold_tier_type_info(mnode, volname) - volume_type_info['cold_tier_type_info'] = cold_tier_type_info - - else: - non_tiered_volume_type_info = { - 'typeStr': '', - 'replicaCount': '', - 'arbiterCount': '', - 'stripeCount': '', - 'disperseCount': '', - 'redundancyCount': '' - } - for key in non_tiered_volume_type_info.keys(): - if key in volinfo[volname]: - non_tiered_volume_type_info[key] = volinfo[volname][key] - else: - g.log.error("Unable to find key '%s' in the volume info for " - "the volume %s", key, volname) - non_tiered_volume_type_info[key] = None - volume_type_info['volume_type_info'] = non_tiered_volume_type_info - - return volume_type_info - - -def get_cold_tier_type_info(mnode, volname): - """Returns cold tier type information for the specified volume. - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - dict : Dict containing the keys, values defining the cold tier type: - Example: - cold_tier_type_info = { - 'coldBrickType': 'Disperse', - 'coldreplicaCount': '1', - 'coldarbiterCount': '0', - 'colddisperseCount': '3', - 'numberOfBricks': '3' - } - NoneType: None if volume does not exist or is not a tiered volume or - any other key errors. - """ - volinfo = get_volume_info(mnode, volname) - if volinfo is None: - g.log.error("Unable to get the volume info for volume %s", volname) - return None - - if not is_tiered_volume(mnode, volname): - g.log.error("Volume %s is not a tiered volume", volname) - return None - - cold_tier_type_info = { - 'coldBrickType': '', - 'coldreplicaCount': '', - 'coldarbiterCount': '', - 'colddisperseCount': '', - 'numberOfBricks': '' - } - for key in cold_tier_type_info.keys(): - if key in volinfo[volname]['bricks']['coldBricks']: - cold_tier_type_info[key] = (volinfo[volname]['bricks'] - ['coldBricks'][key]) - else: - g.log.error("Unable to find key '%s' in the volume info for the " - "volume %s", key, volname) - return None - - if 'Disperse' in cold_tier_type_info['coldBrickType']: - redundancy_count = (cold_tier_type_info['numberOfBricks']. - split("x", 1)[1].strip(). - split("=")[0].strip().strip("()").split()[2]) - cold_tier_type_info['coldredundancyCount'] = redundancy_count - - return cold_tier_type_info - - -def get_hot_tier_type_info(mnode, volname): - """Returns hot tier type information for the specified volume. - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - dict : Dict containing the keys, values defining the hot tier type: - Example: - hot_tier_type_info = { - 'hotBrickType': 'Distribute', - 'hotreplicaCount': '1' - } - NoneType: None if volume does not exist or is not a tiered volume or - any other key errors. - """ - volinfo = get_volume_info(mnode, volname) - if volinfo is None: - g.log.error("Unable to get the volume info for volume %s", volname) - return None - - if not is_tiered_volume(mnode, volname): - g.log.error("Volume %s is not a tiered volume", volname) - return None - - hot_tier_type_info = { - 'hotBrickType': '', - 'hotreplicaCount': '' - } - for key in hot_tier_type_info.keys(): - if key in volinfo[volname]['bricks']['hotBricks']: - hot_tier_type_info[key] = (volinfo[volname]['bricks']['hotBricks'] - [key]) + volume_type_info = {'volume_type_info': {}} + + all_volume_type_info = { + 'typeStr': '', + 'replicaCount': '', + 'arbiterCount': '', + 'stripeCount': '', + 'disperseCount': '', + 'redundancyCount': '' + } + for key in all_volume_type_info.keys(): + if key in volinfo[volname]: + all_volume_type_info[key] = volinfo[volname][key] else: - g.log.error("Unable to find key '%s' in the volume info for the " - "volume %s", key, volname) - return None + g.log.error("Unable to find key '%s' in the volume info for " + "the volume %s", key, volname) + all_volume_type_info[key] = None + volume_type_info['volume_type_info'] = all_volume_type_info - return hot_tier_type_info + return volume_type_info def get_num_of_bricks_per_subvol(mnode, volname): @@ -956,86 +689,21 @@ def get_num_of_bricks_per_subvol(mnode, volname): number of bricks per subvol Example: num_of_bricks_per_subvol = { - 'is_tier': False, - 'hot_tier_num_of_bricks_per_subvol': None, - 'cold_tier_num_of_bricks_per_subvol': None, 'volume_num_of_bricks_per_subvol': 2 } - num_of_bricks_per_subvol = { - 'is_tier': True, - 'hot_tier_num_of_bricks_per_subvol': 3, - 'cold_tier_num_of_bricks_per_subvol': 2, - 'volume_num_of_bricks_per_subvol': None - } - - NoneType: None if volume does not exist or is a tiered volume. + NoneType: None if volume does not exist. """ - bricks_per_subvol_dict = { - 'is_tier': False, - 'hot_tier_num_of_bricks_per_subvol': None, - 'cold_tier_num_of_bricks_per_subvol': None, - 'volume_num_of_bricks_per_subvol': None - } + bricks_per_subvol_dict = {'volume_num_of_bricks_per_subvol': None} subvols_dict = get_subvols(mnode, volname) if subvols_dict['volume_subvols']: bricks_per_subvol_dict['volume_num_of_bricks_per_subvol'] = ( len(subvols_dict['volume_subvols'][0])) - else: - if (subvols_dict['hot_tier_subvols'] and - subvols_dict['cold_tier_subvols']): - bricks_per_subvol_dict['is_tier'] = True - bricks_per_subvol_dict['hot_tier_num_of_bricks_per_subvol'] = ( - len(subvols_dict['hot_tier_subvols'][0])) - bricks_per_subvol_dict['cold_tier_num_of_bricks_per_subvol'] = ( - len(subvols_dict['cold_tier_subvols'][0])) return bricks_per_subvol_dict -def get_cold_tier_num_of_bricks_per_subvol(mnode, volname): - """Returns number of bricks per subvol in cold tier - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - int : Number of bricks per subvol on cold tier. - NoneType: None if volume does not exist or not a tiered volume. - """ - if not is_tiered_volume(mnode, volname): - g.log.error("Volume %s is not a tiered volume", volname) - return None - subvols_dict = get_subvols(mnode, volname) - if subvols_dict['cold_tier_subvols']: - return len(subvols_dict['cold_tier_subvols'][0]) - else: - return None - - -def get_hot_tier_num_of_bricks_per_subvol(mnode, volname): - """Returns number of bricks per subvol in hot tier - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - int : Number of bricks per subvol on hot tier. - NoneType: None if volume does not exist or not a tiered volume. - """ - if not is_tiered_volume(mnode, volname): - g.log.error("Volume %s is not a tiered volume", volname) - return None - subvols_dict = get_subvols(mnode, volname) - if subvols_dict['hot_tier_subvols']: - return len(subvols_dict['hot_tier_subvols'][0]) - else: - return None - - def get_replica_count(mnode, volname): """Get the replica count of the volume @@ -1047,17 +715,8 @@ def get_replica_count(mnode, volname): dict : Dict contain keys, values defining Replica count of the volume. Example: replica_count_info = { - 'is_tier': False, - 'hot_tier_replica_count': None, - 'cold_tier_replica_count': None, 'volume_replica_count': 3 } - replica_count_info = { - 'is_tier': True, - 'hot_tier_replica_count': 2, - 'cold_tier_replica_count': 3, - 'volume_replica_count': None - } NoneType: None if it is parse failure. """ vol_type_info = get_volume_type_info(mnode, volname) @@ -1066,69 +725,14 @@ def get_replica_count(mnode, volname): volname) return None - replica_count_info = { - 'is_tier': False, - 'hot_tier_replica_count': None, - 'cold_tier_replica_count': None, - 'volume_replica_count': None - } + replica_count_info = {'volume_replica_count': None} - replica_count_info['is_tier'] = vol_type_info['is_tier'] - if replica_count_info['is_tier']: - replica_count_info['hot_tier_replica_count'] = ( - vol_type_info['hot_tier_type_info']['hotreplicaCount']) - replica_count_info['cold_tier_replica_count'] = ( - vol_type_info['cold_tier_type_info']['coldreplicaCount']) - - else: - replica_count_info['volume_replica_count'] = ( - vol_type_info['volume_type_info']['replicaCount']) + replica_count_info['volume_replica_count'] = ( + vol_type_info['volume_type_info']['replicaCount']) return replica_count_info -def get_cold_tier_replica_count(mnode, volname): - """Get the replica count of cold tier. - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - int : Replica count of the cold tier. - NoneType: None if volume does not exist or not a tiered volume. - """ - is_tier = is_tiered_volume(mnode, volname) - if not is_tier: - return None - else: - volinfo = get_volume_info(mnode, volname) - cold_tier_replica_count = (volinfo[volname]["bricks"]['coldBricks'] - ['coldreplicaCount']) - return cold_tier_replica_count - - -def get_hot_tier_replica_count(mnode, volname): - """Get the replica count of hot tier. - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - int : Replica count of the hot tier. - NoneType: None if volume does not exist or not a tiered volume. - """ - is_tier = is_tiered_volume(mnode, volname) - if not is_tier: - return None - else: - volinfo = get_volume_info(mnode, volname) - hot_tier_replica_count = (volinfo[volname]["bricks"]['hotBricks'] - ['hotreplicaCount']) - return hot_tier_replica_count - - def get_disperse_count(mnode, volname): """Get the disperse count of the volume @@ -1140,15 +744,8 @@ def get_disperse_count(mnode, volname): dict : Dict contain keys, values defining Disperse count of the volume. Example: disperse_count_info = { - 'is_tier': False, - 'cold_tier_disperse_count': None, 'volume_disperse_count': 3 } - disperse_count_info = { - 'is_tier': True, - 'cold_tier_disperse_count': 3, - 'volume_disperse_count': None - } None: If it is non dispersed volume. """ vol_type_info = get_volume_type_info(mnode, volname) @@ -1157,45 +754,14 @@ def get_disperse_count(mnode, volname): volname) return None - disperse_count_info = { - 'is_tier': False, - 'cold_tier_disperse_count': None, - 'volume_disperse_count': None - } - - disperse_count_info['is_tier'] = vol_type_info['is_tier'] - if disperse_count_info['is_tier']: - disperse_count_info['cold_tier_disperse_count'] = ( - vol_type_info['cold_tier_type_info']['colddisperseCount']) + disperse_count_info = {'volume_disperse_count': None} - else: - disperse_count_info['volume_disperse_count'] = ( + disperse_count_info['volume_disperse_count'] = ( vol_type_info['volume_type_info']['disperseCount']) return disperse_count_info -def get_cold_tier_disperse_count(mnode, volname): - """Get the disperse count of cold tier. - - Args: - mnode (str): Node on which commands are executed. - volname (str): Name of the volume. - - Returns: - int : disperse count of the cold tier. - NoneType: None if volume does not exist or not a tiered volume. - """ - is_tier = is_tiered_volume(mnode, volname) - if not is_tier: - return None - else: - volinfo = get_volume_info(mnode, volname) - cold_tier_disperse_count = (volinfo[volname]["bricks"]['coldBricks'] - ['colddisperseCount']) - return cold_tier_disperse_count - - def enable_and_validate_volume_options(mnode, volname, volume_options_list, time_delay=5): """Enable the volume option and validate whether the option has be @@ -1242,7 +808,6 @@ def enable_and_validate_volume_options(mnode, volname, volume_options_list, def form_bricks_list_to_add_brick(mnode, volname, servers, all_servers_info, - add_to_hot_tier=False, **kwargs): """Forms list of bricks to add-bricks to the volume. @@ -1265,9 +830,6 @@ def form_bricks_list_to_add_brick(mnode, volname, servers, all_servers_info, } } Kwargs: - add_to_hot_tier (bool): True If bricks are to be added to hot_tier. - False otherwise. Defaults to False. - The keys, values in kwargs are: - replica_count : (int)|None. Increase the current_replica_count by replica_count @@ -1306,19 +868,8 @@ def form_bricks_list_to_add_brick(mnode, volname, servers, all_servers_info, bricks_per_subvol_dict = get_num_of_bricks_per_subvol(mnode, volname) # Get number of bricks to add. - if bricks_per_subvol_dict['is_tier']: - if add_to_hot_tier: - num_of_bricks_per_subvol = ( - bricks_per_subvol_dict['hot_tier_num_of_bricks_per_subvol'] - ) - else: - num_of_bricks_per_subvol = ( - bricks_per_subvol_dict - ['cold_tier_num_of_bricks_per_subvol'] - ) - else: - num_of_bricks_per_subvol = ( - bricks_per_subvol_dict['volume_num_of_bricks_per_subvol']) + num_of_bricks_per_subvol = ( + bricks_per_subvol_dict['volume_num_of_bricks_per_subvol']) if num_of_bricks_per_subvol is None: g.log.error("Number of bricks per subvol is None. " @@ -1334,15 +885,7 @@ def form_bricks_list_to_add_brick(mnode, volname, servers, all_servers_info, if replica_count: # Get Subvols subvols_info = get_subvols(mnode, volname) - - # Calculate number of bricks to add - if subvols_info['is_tier']: - if add_to_hot_tier: - num_of_subvols = len(subvols_info['hot_tier_subvols']) - else: - num_of_subvols = len(subvols_info['cold_tier_subvols']) - else: - num_of_subvols = len(subvols_info['volume_subvols']) + num_of_subvols = len(subvols_info['volume_subvols']) if num_of_subvols == 0: g.log.error("No Sub-Volumes available for the volume %s." @@ -1380,7 +923,7 @@ def form_bricks_list_to_add_brick(mnode, volname, servers, all_servers_info, def expand_volume(mnode, volname, servers, all_servers_info, force=False, - add_to_hot_tier=False, **kwargs): + **kwargs): """Forms list of bricks to add and adds those bricks to the volume. Args: @@ -1406,9 +949,6 @@ def expand_volume(mnode, volname, servers, all_servers_info, force=False, will get executed with force option. If it is set to False, then add-brick command will get executed without force option - add_to_hot_tier (bool): True If bricks are to be added to hot_tier. - False otherwise. Defaults to False. - **kwargs The keys, values in kwargs are: - replica_count : (int)|None. @@ -1420,11 +960,9 @@ def expand_volume(mnode, volname, servers, all_servers_info, force=False, bool: True of expanding volumes is successful. False otherwise. - NOTE: adding bricks to hot tier is yet to be added in this function. """ bricks_list = form_bricks_list_to_add_brick(mnode, volname, servers, - all_servers_info, - add_to_hot_tier, **kwargs) + all_servers_info, **kwargs) if not bricks_list: g.log.info("Unable to get bricks list to add-bricks. " @@ -1436,17 +974,8 @@ def expand_volume(mnode, volname, servers, all_servers_info, force=False, # Get replica count info. replica_count_info = get_replica_count(mnode, volname) - - if is_tiered_volume(mnode, volname): - if add_to_hot_tier: - current_replica_count = ( - int(replica_count_info['hot_tier_replica_count'])) - else: - current_replica_count = ( - int(replica_count_info['cold_tier_replica_count'])) - else: - current_replica_count = ( - int(replica_count_info['volume_replica_count'])) + current_replica_count = ( + int(replica_count_info['volume_replica_count'])) kwargs['replica_count'] = current_replica_count + replica_count @@ -1462,8 +991,7 @@ def expand_volume(mnode, volname, servers, all_servers_info, force=False, def form_bricks_list_to_remove_brick(mnode, volname, subvol_num=None, - replica_num=None, - remove_from_hot_tier=False, **kwargs): + replica_num=None, **kwargs): """Form bricks list for removing the bricks. Args: @@ -1480,9 +1008,6 @@ def form_bricks_list_to_remove_brick(mnode, volname, subvol_num=None, If replica_num = 0, then 1st brick from each subvolume is removed. the replica_num starts from 0. - remove_from_hot_tier (bool): True If bricks are to be removed from - hot_tier. False otherwise. Defaults to False. - **kwargs The keys, values in kwargs are: - replica_count : (int)|None. Specify the number of replicas @@ -1525,27 +1050,13 @@ def form_bricks_list_to_remove_brick(mnode, volname, subvol_num=None, is_arbiter = False # Calculate bricks to remove - if subvols_info['is_tier']: - if remove_from_hot_tier: - current_replica_count = ( - int(replica_count_info['hot_tier_replica_count'])) - subvols_list = subvols_info['hot_tier_subvols'] - else: - current_replica_count = ( - int(replica_count_info['cold_tier_replica_count'])) - subvols_list = subvols_info['cold_tier_subvols'] - arbiter_count = int(volume_type_info['cold_tier_type_info'] - ['coldarbiterCount']) - if arbiter_count == 1: - is_arbiter = True - else: - current_replica_count = ( - int(replica_count_info['volume_replica_count'])) - subvols_list = subvols_info['volume_subvols'] - arbiter_count = int(volume_type_info['volume_type_info'] - ['arbiterCount']) - if arbiter_count == 1: - is_arbiter = True + current_replica_count = ( + int(replica_count_info['volume_replica_count'])) + subvols_list = subvols_info['volume_subvols'] + arbiter_count = int(volume_type_info['volume_type_info'] + ['arbiterCount']) + if arbiter_count == 1: + is_arbiter = True # If replica_num is specified select the bricks of that replica number # from all the subvolumes. @@ -1591,14 +1102,7 @@ def form_bricks_list_to_remove_brick(mnode, volname, subvol_num=None, # remove bricks from sub-volumes if subvol_num is not None or 'distribute_count' in kwargs: - if subvols_info['is_tier']: - if remove_from_hot_tier: - subvols_list = subvols_info['hot_tier_subvols'] - else: - subvols_list = subvols_info['cold_tier_subvols'] - else: - subvols_list = subvols_info['volume_subvols'] - + subvols_list = subvols_info['volume_subvols'] if not subvols_list: g.log.error("No Sub-Volumes available for the volume %s", volname) return None @@ -1634,7 +1138,7 @@ def form_bricks_list_to_remove_brick(mnode, volname, subvol_num=None, def shrink_volume(mnode, volname, subvol_num=None, replica_num=None, force=False, rebalance_timeout=300, delete_bricks=True, - remove_from_hot_tier=False, **kwargs): + **kwargs): """Remove bricks from the volume. Args: @@ -1661,9 +1165,6 @@ def shrink_volume(mnode, volname, subvol_num=None, replica_num=None, delete_bricks (bool): After remove-brick delete the removed bricks. - remove_from_hot_tier (bool): True If bricks are to be removed from - hot_tier. False otherwise. Defaults to False. - **kwargs The keys, values in kwargs are: - replica_count : (int)|None. Specify the replica count to @@ -1674,12 +1175,10 @@ def shrink_volume(mnode, volname, subvol_num=None, replica_num=None, bool: True if removing bricks from the volume is successful. False otherwise. - NOTE: remove-bricks from hot-tier is yet to be added in this function. """ # Form bricks list to remove-bricks bricks_list_to_remove = form_bricks_list_to_remove_brick( - mnode, volname, subvol_num, replica_num, remove_from_hot_tier, - **kwargs) + mnode, volname, subvol_num, replica_num, **kwargs) if not bricks_list_to_remove: g.log.error("Failed to form bricks list to remove-brick. " @@ -1698,16 +1197,8 @@ def shrink_volume(mnode, volname, subvol_num=None, replica_num=None, # Get replica count info. replica_count_info = get_replica_count(mnode, volname) - if is_tiered_volume(mnode, volname): - if remove_from_hot_tier: - current_replica_count = ( - int(replica_count_info['hot_tier_replica_count'])) - else: - current_replica_count = ( - int(replica_count_info['cold_tier_replica_count'])) - else: - current_replica_count = ( - int(replica_count_info['volume_replica_count'])) + current_replica_count = ( + int(replica_count_info['volume_replica_count'])) kwargs['replica_count'] = current_replica_count - replica_count @@ -1825,8 +1316,7 @@ def shrink_volume(mnode, volname, subvol_num=None, replica_num=None, def form_bricks_to_replace_brick(mnode, volname, servers, all_servers_info, - src_brick=None, dst_brick=None, - replace_brick_from_hot_tier=False): + src_brick=None, dst_brick=None): """Get src_brick, dst_brick to replace brick Args: @@ -1853,9 +1343,6 @@ def form_bricks_to_replace_brick(mnode, volname, servers, all_servers_info, dst_brick (str): New brick to replace the faulty brick - replace_brick_from_hot_tier (bool): True If brick are to be - replaced from hot_tier. False otherwise. Defaults to False. - Returns: Tuple: (src_brick, dst_brick) Nonetype: if volume doesn't exists or any other failure. @@ -1881,13 +1368,7 @@ def form_bricks_to_replace_brick(mnode, volname, servers, all_servers_info, if not src_brick: # Randomly pick up a brick to bring the brick down and replace. - if subvols_info['is_tier']: - if replace_brick_from_hot_tier: - subvols_list = subvols_info['hot_tier_subvols'] - else: - subvols_list = subvols_info['cold_tier_subvols'] - else: - subvols_list = subvols_info['volume_subvols'] + subvols_list = subvols_info['volume_subvols'] src_brick = (random.choice(random.choice(subvols_list))) @@ -1896,8 +1377,7 @@ def form_bricks_to_replace_brick(mnode, volname, servers, all_servers_info, def replace_brick_from_volume(mnode, volname, servers, all_servers_info, src_brick=None, dst_brick=None, - delete_brick=True, - replace_brick_from_hot_tier=False): + delete_brick=True, multi_vol=False): """Replace faulty brick from the volume. Args: @@ -1926,8 +1406,9 @@ def replace_brick_from_volume(mnode, volname, servers, all_servers_info, delete_bricks (bool): After remove-brick delete the removed bricks. - replace_brick_from_hot_tier (bool): True If brick are to be - replaced from hot_tier. False otherwise. Defaults to False. + multi_vol (bool): True, If bricks need to created for multiple + volumes(more than 5) + False, Otherwise. By default, value is set to False. Returns: bool: True if replacing brick from the volume is successful. @@ -1945,10 +1426,17 @@ def replace_brick_from_volume(mnode, volname, servers, all_servers_info, subvols_info = get_subvols(mnode, volname) if not dst_brick: - dst_brick = form_bricks_list(mnode=mnode, volname=volname, - number_of_bricks=1, - servers=servers, - servers_info=all_servers_info) + if multi_vol: + dst_brick = form_bricks_for_multivol(mnode=mnode, + volname=volname, + number_of_bricks=1, + servers=servers, + servers_info=all_servers_info) + else: + dst_brick = form_bricks_list(mnode=mnode, volname=volname, + number_of_bricks=1, + servers=servers, + servers_info=all_servers_info) if not dst_brick: g.log.error("Failed to get a new brick to replace the faulty " "brick") @@ -1957,13 +1445,7 @@ def replace_brick_from_volume(mnode, volname, servers, all_servers_info, if not src_brick: # Randomly pick up a brick to bring the brick down and replace. - if subvols_info['is_tier']: - if replace_brick_from_hot_tier: - subvols_list = subvols_info['hot_tier_subvols'] - else: - subvols_list = subvols_info['cold_tier_subvols'] - else: - subvols_list = subvols_info['volume_subvols'] + subvols_list = subvols_info['volume_subvols'] src_brick = (random.choice(random.choice(subvols_list))) @@ -2028,17 +1510,6 @@ def get_client_quorum_info(mnode, volname): Returns: dict: client quorum information for the volume. client_quorum_dict = { - 'is_tier': False, - 'hot_tier_quorum_info':{ - 'is_quorum_applicable': False, - 'quorum_type': None, - 'quorum_count': None - }, - 'cold_tier_quorum_info':{ - 'is_quorum_applicable': False, - 'quorum_type': None, - 'quorum_count': None - }, 'volume_quorum_info':{ 'is_quorum_applicable': False, 'quorum_type': None, @@ -2048,17 +1519,6 @@ def get_client_quorum_info(mnode, volname): NoneType: None if volume does not exist. """ client_quorum_dict = { - 'is_tier': False, - 'hot_tier_quorum_info': { - 'is_quorum_applicable': False, - 'quorum_type': None, - 'quorum_count': None - }, - 'cold_tier_quorum_info': { - 'is_quorum_applicable': False, - 'quorum_type': None, - 'quorum_count': None - }, 'volume_quorum_info': { 'is_quorum_applicable': False, 'quorum_type': None, @@ -2084,111 +1544,37 @@ def get_client_quorum_info(mnode, volname): # Set the quorum info volume_type_info = get_volume_type_info(mnode, volname) - if volume_type_info['is_tier'] is True: - client_quorum_dict['is_tier'] = True - - # Hot Tier quorum info - hot_tier_type = volume_type_info['hot_tier_type_info']['hotBrickType'] - if (hot_tier_type == 'Replicate' or - hot_tier_type == 'Distributed-Replicate'): - - (client_quorum_dict['hot_tier_quorum_info'] - ['is_quorum_applicable']) = True - replica_count = (volume_type_info['hot_tier_type_info'] - ['hotreplicaCount']) - - # Case1: Replica 2 - if int(replica_count) == 2: - if 'none' not in quorum_type: - (client_quorum_dict['hot_tier_quorum_info'] - ['quorum_type']) = quorum_type - - if quorum_type == 'fixed': - if not quorum_count == '(null)': - (client_quorum_dict['hot_tier_quorum_info'] - ['quorum_count']) = quorum_count - - # Case2: Replica > 2 - if int(replica_count) > 2: - if quorum_type == 'none': - (client_quorum_dict['hot_tier_quorum_info'] - ['quorum_type']) = 'auto' - quorum_type == 'auto' - else: - (client_quorum_dict['hot_tier_quorum_info'] - ['quorum_type']) = quorum_type - if quorum_type == 'fixed': - if not quorum_count == '(null)': - (client_quorum_dict['hot_tier_quorum_info'] - ['quorum_count']) = quorum_count - - # Cold Tier quorum info - cold_tier_type = (volume_type_info['cold_tier_type_info'] - ['coldBrickType']) - if (cold_tier_type == 'Replicate' or - cold_tier_type == 'Distributed-Replicate'): - (client_quorum_dict['cold_tier_quorum_info'] - ['is_quorum_applicable']) = True - replica_count = (volume_type_info['cold_tier_type_info'] - ['coldreplicaCount']) - - # Case1: Replica 2 - if int(replica_count) == 2: - if 'none' not in quorum_type: - (client_quorum_dict['cold_tier_quorum_info'] - ['quorum_type']) = quorum_type - - if quorum_type == 'fixed': - if not quorum_count == '(null)': - (client_quorum_dict['cold_tier_quorum_info'] - ['quorum_count']) = quorum_count - - # Case2: Replica > 2 - if int(replica_count) > 2: - if quorum_type == 'none': - (client_quorum_dict['cold_tier_quorum_info'] - ['quorum_type']) = 'auto' - quorum_type == 'auto' - else: - (client_quorum_dict['cold_tier_quorum_info'] - ['quorum_type']) = quorum_type - if quorum_type == 'fixed': - if not quorum_count == '(null)': - (client_quorum_dict['cold_tier_quorum_info'] - ['quorum_count']) = quorum_count - else: - volume_type = (volume_type_info['volume_type_info']['typeStr']) - if (volume_type == 'Replicate' or - volume_type == 'Distributed-Replicate'): - (client_quorum_dict['volume_quorum_info'] - ['is_quorum_applicable']) = True - replica_count = (volume_type_info['volume_type_info'] - ['replicaCount']) - - # Case1: Replica 2 - if int(replica_count) == 2: - if 'none' not in quorum_type: - (client_quorum_dict['volume_quorum_info'] - ['quorum_type']) = quorum_type + volume_type = (volume_type_info['volume_type_info']['typeStr']) + if (volume_type == 'Replicate' or + volume_type == 'Distributed-Replicate'): + (client_quorum_dict['volume_quorum_info'] + ['is_quorum_applicable']) = True + replica_count = (volume_type_info['volume_type_info']['replicaCount']) + + # Case1: Replica 2 + if int(replica_count) == 2: + if 'none' not in quorum_type: + (client_quorum_dict['volume_quorum_info'] + ['quorum_type']) = quorum_type - if quorum_type == 'fixed': - if not quorum_count == '(null)': - (client_quorum_dict['volume_quorum_info'] - ['quorum_count']) = quorum_count - - # Case2: Replica > 2 - if int(replica_count) > 2: - if quorum_type == 'none': - (client_quorum_dict['volume_quorum_info'] - ['quorum_type']) = 'auto' - quorum_type == 'auto' - else: - (client_quorum_dict['volume_quorum_info'] - ['quorum_type']) = quorum_type if quorum_type == 'fixed': if not quorum_count == '(null)': (client_quorum_dict['volume_quorum_info'] - ['quorum_count']) = quorum_count + ['quorum_count']) = quorum_count + + # Case2: Replica > 2 + if int(replica_count) > 2: + if quorum_type == 'none': + (client_quorum_dict['volume_quorum_info'] + ['quorum_type']) = 'auto' + quorum_type == 'auto' + else: + (client_quorum_dict['volume_quorum_info'] + ['quorum_type']) = quorum_type + if quorum_type == 'fixed': + if not quorum_count == '(null)': + (client_quorum_dict['volume_quorum_info'] + ['quorum_count']) = quorum_count return client_quorum_dict @@ -2300,7 +1686,8 @@ def get_volume_type(brickdir_path): # Adding import here to avoid cyclic imports from glustolibs.gluster.brick_libs import get_all_bricks (host, brick_path_info) = brickdir_path.split(':') - path_info = brick_path_info[:-1] + path_info = (brick_path_info[:-2] if brick_path_info.endswith("//") + else brick_path_info[:-1]) for volume in get_volume_list(host): brick_paths = [brick.split(':')[1] for brick in get_all_bricks(host, volume)] @@ -2321,3 +1708,62 @@ def get_volume_type(brickdir_path): else: g.log.info("Failed to find brick-path %s for volume %s", brickdir_path, volume) + + +def parse_vol_file(mnode, vol_file): + """ Parses the .vol file and returns the content as a dict + Args: + mnode (str): Node on which commands will be executed. + vol_file(str) : Path to the .vol file + Returns: + (dict): Content of the .vol file + None : if failure happens + Example: + >>> ret = parse_vol_file("abc@xyz.com", + "/var/lib/glusterd/vols/testvol_distributed/ + trusted-testvol_distributed.tcp-fuse.vol") + {'testvol_distributed-client-0': {'type': 'protocol/client', + 'option': {'send-gids': 'true','transport.socket.keepalive-count': '9', + 'transport.socket.keepalive-interval': '2', + 'transport.socket.keepalive-time': '20', + 'transport.tcp-user-timeout': '0', + 'transport.socket.ssl-enabled': 'off', 'password': + 'bcc934b3-9e76-47fd-930c-c31ad9f6e2f0', 'username': + '23bb8f1c-b373-4f85-8bab-aaa77b4918ce', 'transport.address-family': + 'inet', 'transport-type': 'tcp', 'remote-subvolume': + '/gluster/bricks/brick1/testvol_distributed_brick0', + 'remote-host': 'xx.xx.xx.xx', 'ping-timeout': '42'}}} + """ + vol_dict, data, key = {}, {}, None + + def _create_dict_from_list(cur_dict, keys, value): + """Creates dynamic dictionary from a given list of keys and values""" + if len(keys) == 1: + cur_dict[keys[0]] = value + return + if keys[0] not in cur_dict: + cur_dict[keys[0]] = {} + _create_dict_from_list(cur_dict[keys[0]], keys[1:], value) + + ret, file_contents, err = g.run(mnode, "cat {}".format(vol_file)) + if ret: + g.log.error("Failed to read the .vol file : %s", err) + return None + if not file_contents: + g.log.error("The given .vol file is empty") + return None + for line in file_contents.split("\n"): + if line: + line = line.strip() + if line.startswith('end-volume'): + vol_dict[key] = data + data = {} + elif line.startswith("volume "): + key = line.split(" ")[-1] + elif line.startswith("subvolumes "): + key_list = line.split(" ")[0] + _create_dict_from_list(data, [key_list], line.split(" ")[1:]) + else: + key_list = line.split(" ")[:-1] + _create_dict_from_list(data, key_list, line.split(" ")[-1]) + return vol_dict diff --git a/glustolibs-gluster/glustolibs/gluster/volume_ops.py b/glustolibs-gluster/glustolibs/gluster/volume_ops.py index 8445efa11..d25a9349b 100644 --- a/glustolibs-gluster/glustolibs/gluster/volume_ops.py +++ b/glustolibs-gluster/glustolibs/gluster/volume_ops.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,6 +20,11 @@ import re import copy from glusto.core import Glusto as g from pprint import pformat +import io +try: + import ConfigParser as configparser # Python 2 +except ImportError: + import configparser as configparser # Python 3 try: import xml.etree.cElementTree as etree except ImportError: @@ -233,15 +238,8 @@ def volume_delete(mnode, volname, xfail=False): ) return False - if volinfo[volname]['typeStr'] == 'Tier': - tmp_hot_brick = volinfo[volname]["bricks"]["hotBricks"]["brick"] - hot_bricks = [x["name"] for x in tmp_hot_brick if "name" in x] - tmp_cold_brick = volinfo[volname]["bricks"]["coldBricks"]["brick"] - cold_bricks = [x["name"] for x in tmp_cold_brick if "name" in x] - bricks = hot_bricks + cold_bricks - else: - bricks = [x["name"] for x in volinfo[volname]["bricks"]["brick"] - if "name" in x] + bricks = [x["name"] for x in volinfo[volname]["bricks"]["brick"] if + "name" in x] ret, out, err = g.run(mnode, "gluster volume delete {} --mode=script" .format(volname)) if ret != 0: @@ -387,27 +385,34 @@ def get_volume_status(mnode, volname='all', service='', options=''): NoneType: on failure Example: - get_volume_status("10.70.47.89", volname="testvol") - >>>{'testvol': {'10.70.47.89': {'/bricks/brick1/a11': {'status': '1', - 'pid': '28963', 'bricktype': 'cold', 'port': '49163', 'peerid': - '7fc9015e-8134-4753-b837-54cbc6030c98', 'ports': {'rdma': 'N/A', - 'tcp': '49163'}}, '/bricks/brick2/a31': {'status': '1', 'pid': - '28982', 'bricktype': 'cold', 'port': '49164', 'peerid': - '7fc9015e-8134-4753-b837-54cbc6030c98', 'ports': {'rdma': 'N/A', - 'tcp': '49164'}}, 'NFS Server': {'status': '1', 'pid': '30525', - 'port': '2049', 'peerid': '7fc9015e-8134-4753-b837-54cbc6030c98', - 'ports': {'rdma': 'N/A', 'tcp': '2049'}}, '/bricks/brick1/a12': - {'status': '1', 'pid': '30505', 'bricktype': 'hot', 'port': '49165', - 'peerid': '7fc9015e-8134-4753-b837-54cbc6030c98', 'ports': {'rdma': - 'N/A', 'tcp': '49165'}}}, '10.70.47.118': {'/bricks/brick1/a21': - {'status': '1', 'pid': '5427', 'bricktype': 'cold', 'port': '49162', - 'peerid': '5397d8f5-2986-453a-b0b5-5c40a9bb87ff', 'ports': {'rdma': - 'N/A', 'tcp': '49162'}}, '/bricks/brick2/a41': {'status': '1', 'pid': - '5446', 'bricktype': 'cold', 'port': '49163', 'peerid': - '5397d8f5-2986-453a-b0b5-5c40a9bb87ff', 'ports': {'rdma': 'N/A', - 'tcp': '49163'}}, 'NFS Server': {'status': '1', 'pid': '6397', 'port': - '2049', 'peerid': '5397d8f5-2986-453a-b0b5-5c40a9bb87ff', 'ports': - {'rdma': 'N/A', 'tcp': '2049'}}}}} + get_volume_status(host1, volname="testvol_replicated") + >>>{'testvol_replicated': {'host1': {'Self-heal Daemon': {'status': + '1', 'pid': '2479', 'port': 'N/A', 'peerid': + 'b7a02af9-eea4-4657-8b86-3b21ec302f48', 'ports': {'rdma': 'N/A', + 'tcp': 'N/A'}}, '/bricks/brick4/testvol_replicated_brick2': {'status': + '1', 'pid': '2468', 'bricktype': 'None', 'port': '49160', 'peerid': + 'b7a02af9-eea4-4657-8b86-3b21ec302f48', 'ports': {'rdma': 'N/A', + 'tcp': '49160'}}}, 'host2': {'Self-heal Daemon': {'status': '1', + 'pid': '2513', 'port': 'N/A', 'peerid': + '7f6fb9ed-3e0b-4f27-89b3-9e4f836c2332', 'ports': {'rdma': 'N/A', + 'tcp': 'N/A'}}, '/bricks/brick4/testvol_replicated_brick1': {'status': + '1', 'pid': '2456', 'bricktype': 'None', 'port': '49160', 'peerid': + '7f6fb9ed-3e0b-4f27-89b3-9e4f836c2332', 'ports': {'rdma': 'N/A', + 'tcp': '49160'}}}, 'host3': {'Self-heal Daemon': {'status': '1', 'pid' + : '2515', 'port': 'N/A', 'peerid': + '6172cfab-9d72-43b5-ba6f-612e5cfc020c', 'ports': {'rdma': 'N/A', + 'tcp': 'N/A'}}}, 'host4': {'Self-heal Daemon': {'status': '1', 'pid': + '2445', 'port': 'N/A', 'peerid': 'c16a1660-ee73-4e0f-b9c7-d2e830e39539 + ', 'ports': {'rdma': 'N/A', 'tcp': 'N/A'}}}, 'host5': + {'Self-heal Daemon': {'status': '1', 'pid': '2536', 'port': 'N/A', + 'peerid': '79ea9f52-88f0-4293-ae21-8ea13f44b58d', 'ports': + {'rdma': 'N/A', 'tcp': 'N/A'}}}, 'host6': {'Self-heal Daemon': + {'status': '1', 'pid': '2526', 'port': 'N/A', 'peerid': + 'c00a3c5e-668f-440b-860c-da43e999737b', 'ports': {'rdma': 'N/A', + 'tcp': 'N/A'}}, '/bricks/brick4/testvol_replicated_brick0': {'status': + '1', 'pid': '2503', 'bricktype': 'None', 'port': '49160', 'peerid': + 'c00a3c5e-668f-440b-860c-da43e999737b', 'ports': {'rdma': 'N/A', + 'tcp': '49160'}}}}} """ cmd = "gluster vol status %s %s %s --xml" % (volname, service, options) @@ -428,8 +433,6 @@ def get_volume_status(mnode, volname='all', service='', options=''): for volume in volume_list: tmp_dict1 = {} tmp_dict2 = {} - hot_bricks = [] - cold_bricks = [] vol_name = [vol.text for vol in volume if vol.tag == "volName"] # parsing volume status xml output @@ -449,24 +452,7 @@ def get_volume_status(mnode, volname='all', service='', options=''): elem_tag = [] for elem in volume.getchildren(): elem_tag.append(elem.tag) - if ('hotBricks' in elem_tag) or ('coldBricks' in elem_tag): - for elem in volume.getchildren(): - if (elem.tag == 'hotBricks'): - nodes = elem.findall("node") - hot_bricks = [node.find('path').text - for node in nodes - if ( - node.find('path').text.startswith('/'))] - if (elem.tag == 'coldBricks'): - for n in elem.findall("node"): - nodes.append(n) - cold_bricks = [node.find('path').text - for node in nodes - if ( - (node.find('path'). - text.startswith('/')))] - else: - nodes = volume.findall("node") + nodes = volume.findall("node") for each_node in nodes: if each_node.find('path').text.startswith('/'): @@ -479,12 +465,7 @@ def get_volume_status(mnode, volname='all', service='', options=''): tmp_dict3 = {} if "hostname" in node_dict.keys(): if node_dict['path'].startswith('/'): - if node_dict['path'] in hot_bricks: - node_dict["bricktype"] = 'hot' - elif node_dict['path'] in cold_bricks: - node_dict["bricktype"] = 'cold' - else: - node_dict["bricktype"] = 'None' + node_dict["bricktype"] = 'None' tmp = node_dict["path"] tmp_dict3[node_dict["path"]] = node_dict else: @@ -673,29 +654,42 @@ def get_volume_info(mnode, volname='all', xfail=False): dict: volume info in dict of dicts Example: - get_volume_info("abc.com", volname="testvol") - >>>{'testvol': {'status': '1', 'xlators': None, 'disperseCount': '0', - 'bricks': {'coldBricks': {'colddisperseCount': '0', - 'coldarbiterCount': '0', 'coldBrickType': 'Distribute', - 'coldbrickCount': '4', 'numberOfBricks': '4', 'brick': - [{'isArbiter': '0', 'name': '10.70.47.89:/bricks/brick1/a11', - 'hostUuid': '7fc9015e-8134-4753-b837-54cbc6030c98'}, {'isArbiter': - '0', 'name': '10.70.47.118:/bricks/brick1/a21', 'hostUuid': - '7fc9015e-8134-4753-b837-54cbc6030c98'}, {'isArbiter': '0', 'name': - '10.70.47.89:/bricks/brick2/a31', 'hostUuid': - '7fc9015e-8134-4753-b837-54cbc6030c98'}, {'isArbiter': '0', - 'name': '10.70.47.118:/bricks/brick2/a41', 'hostUuid': - '7fc9015e-8134-4753-b837-54cbc6030c98'}], 'coldreplicaCount': '1'}, - 'hotBricks': {'hotBrickType': 'Distribute', 'numberOfBricks': '1', - 'brick': [{'name': '10.70.47.89:/bricks/brick1/a12', 'hostUuid': - '7fc9015e-8134-4753-b837-54cbc6030c98'}], 'hotbrickCount': '1', - 'hotreplicaCount': '1'}}, 'type': '5', 'distCount': '1', - 'replicaCount': '1', 'brickCount': '5', 'options': - {'cluster.tier-mode': 'cache', 'performance.readdir-ahead': 'on', - 'features.ctr-enabled': 'on'}, 'redundancyCount': '0', 'transport': - '0', 'typeStr': 'Tier', 'stripeCount': '1', 'arbiterCount': '0', - 'id': 'ffa8a8d1-546f-4ebf-8e82-fcc96c7e4e05', 'statusStr': 'Started', - 'optCount': '3'}} + get_volume_info("host1", volname="testvol") + >>>{'testvol': {'status': '1', 'disperseCount': '6', + 'bricks': {'brick': [{'isArbiter': '0', 'name': + 'host1:/bricks/brick6/testvol_brick0', 'hostUuid': + 'c00a3c5e-668f-440b-860c-da43e999737b'}, {'isArbiter': '0', 'name': + 'host2:/bricks/brick6/testvol_brick1', 'hostUuid': + '7f6fb9ed-3e0b-4f27-89b3-9e4f836c2332'}, {'isArbiter': '0', 'name': + 'host3:/bricks/brick6/testvol_brick2', 'hostUuid': + 'b7a02af9-eea4-4657-8b86-3b21ec302f48'}, {'isArbiter': '0', 'name': + 'host4:/bricks/brick4/testvol_brick3', 'hostUuid': + '79ea9f52-88f0-4293-ae21-8ea13f44b58d'}, {'isArbiter': '0', 'name': + 'host5:/bricks/brick2/testvol_brick4', 'hostUuid': + 'c16a1660-ee73-4e0f-b9c7-d2e830e39539'}, {'isArbiter': '0', 'name': + 'host6:/bricks/brick2/testvol_brick5', 'hostUuid': + '6172cfab-9d72-43b5-ba6f-612e5cfc020c'}, {'isArbiter': '0', 'name': + 'host1:/bricks/brick7/testvol_brick6', 'hostUuid': + 'c00a3c5e-668f-440b-860c-da43e999737b'}, {'isArbiter': '0', 'name': + 'host2:/bricks/brick7/testvol_brick7', 'hostUuid': + '7f6fb9ed-3e0b-4f27-89b3-9e4f836c2332'}, {'isArbiter': '0', 'name': + 'host3:/bricks/brick7/testvol_brick8', 'hostUuid': + 'b7a02af9-eea4-4657-8b86-3b21ec302f48'}, {'isArbiter': '0', 'name': + 'host4:/bricks/brick5/testvol_brick9', 'hostUuid': + '79ea9f52-88f0-4293-ae21-8ea13f44b58d'}, {'isArbiter': '0', 'name': + 'host5:/bricks/brick4/testvol_brick10', 'hostUuid': + 'c16a1660-ee73-4e0f-b9c7-d2e830e39539'}, {'isArbiter': '0', 'name': + 'host6:/bricks/brick4/testvol_brick11', 'hostUuid': + '6172cfab-9d72-43b5-ba6f-612e5cfc020c'}]}, + 'type': '9', 'distCount': '2', 'replicaCount': '1', 'brickCount': + '12', 'options': {'nfs.disable': 'on', 'cluster.server-quorum-ratio': + '90%', 'storage.fips-mode-rchecksum': 'on', + 'transport.address-family': 'inet', 'cluster.brick-multiplex': + 'disable'}, 'redundancyCount': '2', 'snapshotCount': '0', + 'transport': '0', 'typeStr': 'Distributed-Disperse', 'stripeCount': + '1', 'arbiterCount': '0', + 'id': '8d217fa3-094b-4293-89b5-41d447c06d22', 'statusStr': 'Started', + 'optCount': '5'}} """ cmd = "gluster volume info %s --xml" % volname @@ -727,18 +721,6 @@ def get_volume_info(mnode, volname='all', xfail=False): (volinfo[volname]["bricks"]["brick"]. append(brick_info_dict)) - if el.tag == "hotBricks" or el.tag == "coldBricks": - volinfo[volname]["bricks"][el.tag] = {} - volinfo[volname]["bricks"][el.tag]["brick"] = [] - for elmt in el.getchildren(): - if elmt.tag == 'brick': - brick_info_dict = {} - for el_brk in elmt.getchildren(): - brick_info_dict[el_brk.tag] = el_brk.text - (volinfo[volname]["bricks"][el.tag]["brick"]. - append(brick_info_dict)) - else: - volinfo[volname]["bricks"][el.tag][elmt.tag] = elmt.text # noqa: E501 elif elem.tag == "options": volinfo[volname]["options"] = {} for option in elem.findall("option"): @@ -840,3 +822,76 @@ def get_volume_list(mnode): vol_list.append(elem.text) return vol_list + + +def get_gluster_state(mnode): + """Executes the 'gluster get-state' command on the specified node, checks + for the data dump, reads the glusterd state dump and returns it. + + Args: + mnode (str): Node on which command has to be executed + + Returns: + dict: The output of gluster get-state command in dict format + + Example: + >>>get_gluster_state(self.mnode) + {'Global': {'myuuid': 'e92964c8-a7d2-4e59-81ac-feb0687df55e', + 'op-version': '70000'}, 'Global options': {}, 'Peers': + {'peer1.primary_hostname': 'dhcp43-167.lab.eng.blr.redhat.com', + 'peer1.uuid': 'd3a85b6a-134f-4df2-ba93-4bd0321b6d6a', 'peer1.state': + 'Peer in Cluster', 'peer1.connected': 'Connected', + 'peer1.othernames': '', 'peer2.primary_hostname': + 'dhcp43-68.lab.eng.blr.redhat.com', 'peer2.uuid': + 'f488aa35-bc56-4aea-9581-8db54e137937', 'peer2.state': + 'Peer in Cluster', 'peer2.connected': 'Connected', + 'peer2.othernames': '', 'peer3.primary_hostname': + 'dhcp43-64.lab.eng.blr.redhat.com', 'peer3.uuid': + 'dfe75b01-2988-4eac-879a-cf3d701e1382', 'peer3.state': + 'Peer in Cluster', 'peer3.connected': 'Connected', + 'peer3.othernames': '', 'peer4.primary_hostname': + 'dhcp42-147.lab.eng.blr.redhat.com', 'peer4.uuid': + '05e3858b-33bf-449a-b170-2d3dac9adc45', 'peer4.state': + 'Peer in Cluster', 'peer4.connected': 'Connected', + 'peer4.othernames': '', 'peer5.primary_hostname': + 'dhcp41-246.lab.eng.blr.redhat.com', 'peer5.uuid': + 'c2e3f833-98fa-42d9-ae63-2bc471515810', 'peer5.state': + 'Peer in Cluster', 'peer5.connected': 'Connected', + 'peer5.othernames': ''}, 'Volumes': {}, 'Services': {'svc1.name': + 'glustershd', 'svc1.online_status': 'Offline', 'svc2.name': 'nfs', + 'svc2.online_status': 'Offline', 'svc3.name': 'bitd', + 'svc3.online_status': 'Offline', 'svc4.name': 'scrub', + 'svc4.online_status': 'Offline', 'svc5.name': 'quotad', + 'svc5.online_status': 'Offline'}, 'Misc': {'base port': '49152', + 'last allocated port': '49154'}} + """ + + ret, out, _ = g.run(mnode, "gluster get-state") + if ret: + g.log.error("Failed to execute gluster get-state command!") + return None + # get-state should dump properly. + # Checking whether a path is returned or not and then + # extracting path from the out data + + path = re.search(r"/.*?/.\S*", out).group() + if not path: + g.log.error("Failed to get the gluster state dump file path.") + return None + ret, out, _ = g.run(mnode, "cat {}".format(path)) + if ret: + g.log.error("Failed to read the gluster state dump.") + return None + g.log.info("Command Executed successfully and the data dump verified") + + # Converting the string to unicode for py2/3 compatibility + out = u"".join(out) + data_buf = io.StringIO(out) + config = configparser.ConfigParser() + try: + config.read_file(data_buf) # Python3 + except AttributeError: + config.readfp(data_buf) # Python2 + # Converts the config parser object to a dictionary and returns it + return {section: dict(config.items(section)) for section in + config.sections()} diff --git a/glustolibs-gluster/scripts/compute_hash.py b/glustolibs-gluster/scripts/compute_hash.py index b5ae2f83b..7cab7c494 100644 --- a/glustolibs-gluster/scripts/compute_hash.py +++ b/glustolibs-gluster/scripts/compute_hash.py @@ -20,7 +20,13 @@ import sys filename = sys.argv[1] glusterfs = ctypes.cdll.LoadLibrary("libglusterfs.so.0") -computed_hash = ctypes.c_uint32(glusterfs.gf_dm_hashfn(filename, - len(filename))) + +# In case of python3 encode string to ascii +if sys.version_info.major == 3: + computed_hash = ctypes.c_uint32(glusterfs.gf_dm_hashfn( + filename.encode('ascii'), len(filename))) +else: + computed_hash = ctypes.c_uint32(glusterfs.gf_dm_hashfn( + filename, len(filename))) print(computed_hash.value) diff --git a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py new file mode 100644 index 000000000..4e1dadbd7 --- /dev/null +++ b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py @@ -0,0 +1,924 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.volume_ops import get_volume_status +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.misc.misc_libs import upload_scripts, kill_process + +import numpy as np +import pandas as pd +from statistics import mean, median + + +def check_upload_memory_and_cpu_logger_script(servers): + """Check and upload memory_and_cpu_logger.py to servers if not present + + Args: + servers(list): List of all servers where script has to be uploaded + + Returns: + bool: True if script is uploaded successfully else false + """ + script = "/usr/share/glustolibs/io/scripts/memory_and_cpu_logger.py" + is_present = [] + for server in servers: + if not file_exists(server, script): + if not upload_scripts(server, script): + g.log.error("Unable to upload memory_and_cpu_logger.py on %s", + server) + is_present.append(False) + else: + is_present.append(True) + return all(is_present) + + +def _start_logging_processes(process, servers, test_name, interval, count): + """Start logging processes on all nodes for a given process + + Args: + servers(list): Servers on which CPU and memory usage has to be logged + test_name(str): Name of testcase for which logs are to be collected + interval(int): Time interval after which logs are to be collected + count(int): Number of samples to be captured + + Returns: + list: A list of logging processes + """ + cmd = ("/usr/bin/env python " + "/usr/share/glustolibs/io/scripts/memory_and_cpu_logger.py" + " -p %s -t %s -i %d -c %d" % (process, test_name, + interval, count)) + logging_process = [] + for server in servers: + proc = g.run_async(server, cmd) + logging_process.append(proc) + return logging_process + + +def log_memory_and_cpu_usage_on_servers(servers, test_name, interval=60, + count=100): + """Log memory and CPU usage of gluster server processes + + Args: + servers(list): Servers on which CPU and memory usage has to be logged + test_name(str): Name of the testcase for which logs are to be collected + + Kwargs: + interval(int): Time interval after which logs are to be collected + (Default:60) + count(int): Number of samples to be captured (Default:100) + + Returns: + dict: Logging processes dict for all gluster server processes + """ + logging_process_dict = {} + for proc_name in ('glusterd', 'glusterfs', 'glusterfsd'): + logging_procs = _start_logging_processes( + proc_name, servers, test_name, interval, count) + logging_process_dict[proc_name] = logging_procs + return logging_process_dict + + +def log_memory_and_cpu_usage_on_clients(servers, test_name, interval=60, + count=100): + """Log memory and CPU usage of gluster client processes + + Args: + servers(list): Clients on which CPU and memory usage has to be logged + test_name(str): Name of testcase for which logs are to be collected + + Kwargs: + interval(int): Time interval after which logs are to be collected + (Defaults:60) + count(int): Number of samples to be captured (Default:100) + + Returns: + dict: Logging processes dict for all gluster client processes + """ + logging_process_dict = {} + logging_procs = _start_logging_processes( + 'glusterfs', servers, test_name, interval, count) + logging_process_dict['glusterfs'] = logging_procs + return logging_process_dict + + +def log_memory_and_cpu_usage_on_cluster(servers, clients, test_name, + interval=60, count=100): + """Log memory and CPU usage on gluster cluster + + Args: + servers(list): Servers on which memory and CPU usage is to be logged + clients(list): Clients on which memory and CPU usage is to be logged + test_name(str): Name of testcase for which logs are to be collected + + Kwargs: + interval(int): Time interval after which logs are to be collected + (Default:60) + count(int): Number of samples to be captured (Default:100) + + Returns: + dict: Logging processes dict for all servers and clients + """ + # Start logging on all servers + server_logging_processes = log_memory_and_cpu_usage_on_servers( + servers, test_name, interval, count) + if not server_logging_processes: + return {} + + # Starting logging on all clients + client_logging_processes = log_memory_and_cpu_usage_on_clients( + clients, test_name, interval, count) + if not client_logging_processes: + return {} + + # Combining dicts + logging_process_dict = {} + for node_type, proc_dict in (('server', server_logging_processes), + ('client', client_logging_processes)): + logging_process_dict[node_type] = {} + for proc in proc_dict: + logging_process_dict[node_type][proc] = ( + proc_dict[proc]) + return logging_process_dict + + +def _process_wait_flag_append(proc, flag): + """Run async communicate and adds true to flag list""" + # If the process is already completed async_communicate() + # throws a ValueError + try: + proc.async_communicate() + flag.append(True) + except ValueError: + flag.append(True) + + +def wait_for_logging_processes_to_stop(proc_dict, cluster=False): + """Wait for all given logging processes to stop + + Args: + proc_dict(dict): Dictionary of all the active logging processes + + Kwargs: + cluster(bool): True if proc_dict is for the entire cluster else False + (Default:False) + + Retruns: + bool: True if processes are completed else False + """ + flag = [] + if cluster: + for sub_dict in proc_dict: + for proc_name in proc_dict[sub_dict]: + for proc in proc_dict[sub_dict][proc_name]: + _process_wait_flag_append(proc, flag) + else: + for proc_name in proc_dict: + for proc in proc_dict[proc_name]: + _process_wait_flag_append(proc, flag) + return all(flag) + + +def kill_all_logging_processes(proc_dict, nodes, cluster=False): + """Kill logging processes on all given nodes + + Args: + proc_dict(dict): Dictonary of all active logging processes + nodes(list): List of nodes where logging has to be stopped + + Kwargs: + cluster(bool): True if proc_dict is for a full cluster else False + (Default:False) + + Retruns: + bool: True if processes are completed else False + """ + # Kill all logging processes + for server in nodes: + if not kill_process(server, process_names='memory_and_cpu_logger.py'): + g.log.error("Unable to kill some of the processes at %s.", server) + + # This will stop the async threads created by run_aysnc() as the proc is + # already killed. + ret = wait_for_logging_processes_to_stop(proc_dict, cluster) + if ret: + return True + return False + + +def create_dataframe_from_csv(node, proc_name, test_name): + """Creates a dataframe from a given process. + + Args: + node(str): Node from which csv is to be picked + proc_name(str): Name of process for which csv is to picked + test_name(str): Name of the testcase for which CSV + + Returns: + dataframe: Pandas dataframe if CSV file exits else None + """ + # Read the csv file generated by memory_and_cpu_logger.py + ret, raw_data, _ = g.run(node, "cat /root/{}.csv" + .format(proc_name)) + if ret: + return None + + # Split the complete dump to individual lines + data = raw_data.split("\r\n") + rows, flag = [], False + for line in data: + values = line.split(',') + if test_name == values[0]: + # Reset rows if it's the second instance + if flag: + rows = [] + flag = True + continue + + # Pick and append values which have complete entry + if flag and len(values) == 4: + rows.append(values) + + # Create a panda dataframe and set the type for columns + dataframe = pd.DataFrame(rows[1:], columns=rows[0]) + conversion_dict = {'Process ID': int, + 'CPU Usage': float, + 'Memory Usage': float} + dataframe = dataframe.astype(conversion_dict) + return dataframe + + +def _get_min_max_mean_median(entrylist): + """Get the mix, max. mean and median of a list + + Args: + entrylist(list): List of values to be used + + Returns: + dict:Result dict generate from list + """ + result = {} + result['Min'] = min(entrylist) + result['Max'] = max(entrylist) + result['Mean'] = mean(entrylist) + result['Median'] = median(entrylist) + return result + + +def _compute_min_max_mean_median(dataframe, data_dict, process, node, + volume=None, brick=None): + """Compute min, max, mean and median for a given process + + Args: + dataframe(panda dataframe): Panda data frame of the csv file + data_dict(dict): data dict to which info is to be added + process(str): Name of process for which data is to be computed + node(str): Node for which min, max, mean and median has to be computed + + Kwargs: + volume(str): Volume name of the volume for which data is to be computed + brick(str): Brick path of the brick for which data is to be computed + """ + if volume and process == 'glusterfs': + # Create subdict inside dict + data_dict[node][process][volume] = {} + for usage in ('CPU Usage', 'Memory Usage'): + # Create usage subdict + data_dict[node][process][volume][usage] = {} + + # Clean data and compute values + cleaned_usage = list(dataframe[usage].dropna()) + out = _get_min_max_mean_median(cleaned_usage) + + # Add values to data_dict + for key in ('Min', 'Max', 'Mean', 'Median'): + data_dict[node][process][volume][usage][key] = out[key] + + if volume and brick and process == 'glusterfsd': + # Create subdict inside dict + data_dict[node][process][volume] = {} + data_dict[node][process][volume][brick] = {} + for usage in ('CPU Usage', 'Memory Usage'): + # Create usage subdict + data_dict[node][process][volume][brick][usage] = {} + + # Clean data and compute values + cleaned_usage = list(dataframe[usage].dropna()) + out = _get_min_max_mean_median(cleaned_usage) + + # Add values to data_dict + for key in ('Min', 'Max', 'Mean', 'Median'): + data_dict[node][process][volume][brick][usage][key] = out[key] + + # Compute CPU Uage and Memory Usage for glusterd + else: + for usage in ('CPU Usage', 'Memory Usage'): + # Create uage subdict + data_dict[node][process][usage] = {} + + # Clean data and compute value + cleaned_usage = list(dataframe[usage].dropna()) + out = _get_min_max_mean_median(cleaned_usage) + + # Add values to data_dict + for key in ('Min', 'Max', 'Mean', 'Median'): + data_dict[node][process][usage][key] = out[key] + + +def compute_data_usage_stats_on_servers(nodes, test_name): + """Compute min, max, mean and median for servers + + Args: + nodes(list): Servers from which data is to be used to compute min, max + , mean, mode and median + test_name(str): Name of testcase for which data has to be processed + + Returns: + dict: dict of min, max, mean and median for a given process + + NOTE: + This function has to be always run before cleanup. + """ + data_dict = {} + for node in nodes: + # Get the volume status on the node + volume_status = get_volume_status(node) + data_dict[node] = {} + for process in ('glusterd', 'glusterfs', 'glusterfsd'): + + # Generate a dataframe from the csv file + dataframe = create_dataframe_from_csv(node, process, test_name) + if dataframe.empty: + return {} + + data_dict[node][process] = {} + if process == 'glusterd': + # Checking if glusterd is restarted. + if len(set(dataframe['Process ID'])) > 1: + data_dict[node][process]['is_restarted'] = True + else: + data_dict[node][process]['is_restarted'] = False + + # Call function to compute min, max, mean and median + _compute_min_max_mean_median(dataframe, data_dict, process, + node) + continue + + # Map volumes to volume process + for volume in volume_status.keys(): + for proc in volume_status[volume][node].keys(): + if (proc == 'Self-heal Daemon' and process == 'glusterfs'): + # Fetching pid from volume status output and create a + # dataframe with the entries of only that pid + pid = volume_status[volume][node][proc]['pid'] + proc_dataframe = dataframe[ + dataframe['Process ID'] == pid] + + # Call function to compute min, max, mean + # and median + _compute_min_max_mean_median( + proc_dataframe, data_dict, process, node, volume) + + if (proc.count('/') >= 2 and process == 'glusterfsd'): + # Fetching pid from volume status output and create a + # dataframe with the entries of only that pid + pid = volume_status[volume][node][proc]['pid'] + proc_dataframe = dataframe[ + dataframe['Process ID'] == pid] + + # Call function to compute min, max, mean and median + _compute_min_max_mean_median( + proc_dataframe, data_dict, process, node, volume, + proc) + + return data_dict + + +def compute_data_usage_stats_on_clients(nodes, test_name): + """Compute min, max, mean and median for clients + + Args: + nodes(list): Clients from which data is to be used to compute min, max + , mean, mode and median + test_name(str): Name of the testcase for which data has to be processed + + Returns: + dict: dict of min, max, mean and median for a given process + """ + data_dict = {} + for node in nodes: + data_dict[node] = {} + dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name) + if dataframe.empty: + return {} + + data_dict[node]['glusterfs'] = {} + # Call function to compute min, max, mean and median + _compute_min_max_mean_median(dataframe, data_dict, 'glusterfs', node) + + return data_dict + + +def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain, + volume_status=None, + volume=None, + vol_name=None): + """Perform three point check + + Args: + dataframe(panda dataframe): Panda dataframe of a given process + node(str): Node on which memory leak has to be checked + process(str): Name of process for which check has to be done + gain(float): Accepted amount of leak for a given testcase in MB + + kwargs: + volume_status(dict): Volume status output on the give name + volumne(str):Name of volume for which 3 point check has to be done + vol_name(str): Name of volume process according to volume status + + Returns: + bool: True if memory leak instances are observed else False + """ + # Filter dataframe to be process wise if it's volume specific process + if process in ('glusterfs', 'glusterfsd'): + if process == 'glusterfs' and vol_name: + pid = int(volume_status[volume][node][vol_name]['pid']) + dataframe = dataframe[dataframe['Process ID'] == pid] + + # Compute usage gain throught the data frame + memory_increments = list(dataframe['Memory Usage'].diff().dropna()) + + # Check if usage is more than accepted amount of leak + memory_leak_decision_array = np.where( + dataframe['Memory Usage'].diff().dropna() > gain, True, False) + instances_of_leak = np.where(memory_leak_decision_array)[0] + + # If memory leak instances are present check if it's reduced + count_of_leak_instances = len(instances_of_leak) + if count_of_leak_instances > 0: + g.log.error('There are %s instances of memory leaks on node %s', + count_of_leak_instances, node) + for instance in instances_of_leak: + # In cases of last log file entry the below op could throw + # IndexError which is handled as below. + try: + # Check if memory gain had decrease in the consecutive + # entries, after 2 entry and betwen current and last entry + if all([memory_increments[instance+1] > + memory_increments[instance], + memory_increments[instance+2] > + memory_increments[instance], + (memory_increments[len(memory_increments)-1] > + memory_increments[instance])]): + return True + + except IndexError: + # In case of last log file entry rerun the command + # and check for difference + g.log.info('Instance at last log entry.') + if process in ('glusterfs', 'glusterfsd'): + cmd = ("ps u -p %s | awk 'NR>1 && $11~/%s$/{print " + " $6/1024}'" % (pid, process)) + else: + cmd = ("ps u -p `pgrep glusterd` | awk 'NR>1 && $11~/" + "glusterd$/{print $6/1024}'") + ret, out, _ = g.run(node, cmd) + if ret: + g.log.error('Unable to run the command to fetch current ' + 'memory utilization.') + continue + usage_now = float(out.replace('\n', '')[2]) + last_entry = dataframe['Memory Usage'].iloc[-1] + + # Check if current memory usage is higher than last entry + fresh_diff = last_entry - usage_now + if fresh_diff > gain and last_entry > fresh_diff: + return True + return False + + +def check_for_memory_leaks_in_glusterd(nodes, test_name, gain=30.0): + """Check for memory leaks in glusterd + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + gain(float): Accepted amount of leak for a given testcase in MB + (Default:30) + + Returns: + bool: True if memory leak was obsevred else False + """ + is_there_a_leak = [] + for node in nodes: + dataframe = create_dataframe_from_csv(node, 'glusterd', test_name) + if dataframe.empty: + return False + + # Call 3 point check function + three_point_check = _perform_three_point_check_for_memory_leak( + dataframe, node, 'glusterd', gain) + if three_point_check: + g.log.error("Memory leak observed on node %s in glusterd", + node) + is_there_a_leak.append(three_point_check) + + return any(is_there_a_leak) + + +def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0): + """Check for memory leaks in glusterfs + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + gain(float): Accepted amount of leak for a given testcase in MB + (Default:30) + + Returns: + bool: True if memory leak was obsevred else False + + NOTE: + This function should be executed with the volumes present on the cluster + """ + is_there_a_leak = [] + for node in nodes: + # Get the volume status on the node + volume_status = get_volume_status(node) + dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name) + if dataframe.empty: + return False + + for volume in volume_status.keys(): + for process in volume_status[volume][node].keys(): + # Skiping if process isn't Self-heal Deamon + if process != 'Self-heal Daemon': + continue + + # Call 3 point check function + three_point_check = _perform_three_point_check_for_memory_leak( + dataframe, node, 'glusterfs', gain, volume_status, volume, + 'Self-heal Daemon') + if three_point_check: + g.log.error("Memory leak observed on node %s in shd " + "on volume %s", node, volume) + is_there_a_leak.append(three_point_check) + + return any(is_there_a_leak) + + +def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0): + """Check for memory leaks in glusterfsd + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + gain(float): Accepted amount of leak for a given testcase in MB + (Default:30) + + Returns: + bool: True if memory leak was obsevred else False + + NOTE: + This function should be executed with the volumes present on the cluster. + """ + is_there_a_leak = [] + for node in nodes: + # Get the volume status on the node + volume_status = get_volume_status(node) + dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name) + if dataframe.empty: + return False + + for volume in volume_status.keys(): + for process in volume_status[volume][node].keys(): + # Skiping if process isn't brick process + if not process.count('/'): + continue + + # Call 3 point check function + three_point_check = _perform_three_point_check_for_memory_leak( + dataframe, node, 'glusterfsd', gain, volume_status, volume, + process) + if three_point_check: + g.log.error("Memory leak observed on node %s in brick " + " process for brick %s on volume %s", node, + process, volume) + is_there_a_leak.append(three_point_check) + + return any(is_there_a_leak) + + +def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0): + """Check for memory leaks in glusterfs fuse + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + gain(float): Accepted amount of leak for a given testcase in MB + (Default:30) + + Returns: + bool: True if memory leak was observed else False + + NOTE: + This function should be executed when the volume is still mounted. + """ + is_there_a_leak = [] + for node in nodes: + # Get the volume status on the node + dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name) + if dataframe.empty: + return False + + # Call 3 point check function + three_point_check = _perform_three_point_check_for_memory_leak( + dataframe, node, 'glusterfs', gain) + if three_point_check: + g.log.error("Memory leak observed on node %s for client", + node) + + # If I/O is constantly running on Clients the memory + # usage spikes up and stays at a point for long. + last_entry = dataframe['Memory Usage'].iloc[-1] + cmd = ("ps u -p `pidof glusterfs` | " + "awk 'NR>1 && $11~/glusterfs$/{print" + " $6/1024}'") + ret, out, _ = g.run(node, cmd) + if ret: + g.log.error('Unable to run the command to fetch current ' + 'memory utilization.') + continue + + if float(out) != last_entry: + if float(out) > last_entry: + is_there_a_leak.append(True) + continue + + is_there_a_leak.append(False) + + return any(is_there_a_leak) + + +def _check_for_oom_killers(nodes, process, oom_killer_list): + """Checks for OOM killers for a specific process + + Args: + nodes(list): Nodes on which OOM killers have to be checked + process(str): Process for which OOM killers have to be checked + oom_killer_list(list): A list in which the presence of + OOM killer has to be noted + """ + cmd = ("grep -i 'killed process' /var/log/messages* " + "| grep -w '{}'".format(process)) + ret_codes = g.run_parallel(nodes, cmd) + for key in ret_codes.keys(): + ret, out, _ = ret_codes[key] + if not ret: + g.log.error('OOM killer observed on %s for %s', key, process) + g.log.error(out) + oom_killer_list.append(True) + else: + oom_killer_list.append(False) + + +def check_for_oom_killers_on_servers(nodes): + """Check for OOM killers on servers + + Args: + nodes(list): Servers on which OOM kills have to be checked + + Returns: + bool: True if OOM killers are present on any server else False + """ + oom_killer_list = [] + for process in ('glusterfs', 'glusterfsd', 'glusterd'): + _check_for_oom_killers(nodes, process, oom_killer_list) + return any(oom_killer_list) + + +def check_for_oom_killers_on_clients(nodes): + """Check for OOM killers on clients + + Args: + nodes(list): Clients on which OOM kills have to be checked + + Returns: + bool: True if OOM killers are present on any client else false + """ + oom_killer_list = [] + _check_for_oom_killers(nodes, 'glusterfs', oom_killer_list) + return any(oom_killer_list) + + +def _check_for_cpu_usage_spikes(dataframe, node, process, threshold, + volume_status=None, volume=None, + vol_name=None): + """Check for cpu spikes for a given process + + Args: + dataframe(panda dataframe): Panda dataframe of a given process + node(str): Node on which cpu spikes has to be checked + process(str): Name of process for which check has to be done + threshold(int): Accepted amount of 100% CPU usage instances + + kwargs: + volume_status(dict): Volume status output on the give name + volume(str):Name of volume for which check has to be done + vol_name(str): Name of volume process according to volume status + + Returns: + bool: True if number of instances more than threshold else False + """ + # Filter dataframe to be process wise if it's volume specific process + if process in ('glusterfs', 'glusterfsd'): + pid = int(volume_status[volume][node][vol_name]['pid']) + dataframe = dataframe[dataframe['Process ID'] == pid] + + # Check if usage is more than accepted amount of leak + cpu_spike_decision_array = np.where( + dataframe['CPU Usage'].dropna() == 100.0, True, False) + instances_of_spikes = np.where(cpu_spike_decision_array)[0] + + return bool(len(instances_of_spikes) > threshold) + + +def check_for_cpu_usage_spikes_on_glusterd(nodes, test_name, threshold=3): + """Check for CPU usage spikes on glusterd + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + threshold(int): Accepted amount of instances of 100% CPU usage + (Default:3) + + Returns: + bool: True if CPU spikes are more than threshold else False + """ + is_there_a_spike = [] + for node in nodes: + dataframe = create_dataframe_from_csv(node, 'glusterd', test_name) + if dataframe.empty: + return False + + # Call function to check for cpu spikes + cpu_spikes = _check_for_cpu_usage_spikes( + dataframe, node, 'glusterd', threshold) + if cpu_spikes: + g.log.error("CPU usage spikes observed more than " + "threshold %d on node %s for glusterd", + threshold, node) + is_there_a_spike.append(cpu_spikes) + + return any(is_there_a_spike) + + +def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3): + """Check for CPU usage spikes on glusterfs + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + threshold(int): Accepted amount of instances of 100% CPU usage + (Default:3) + + Returns: + bool: True if CPU spikes are more than threshold else False + + NOTE: + This function should be exuected with the volumes present on the cluster. + """ + is_there_a_spike = [] + for node in nodes: + # Get the volume status on the node + volume_status = get_volume_status(node) + dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name) + if dataframe.empty: + return False + + for volume in volume_status.keys(): + for process in volume_status[volume][node].keys(): + # Skiping if process isn't Self-heal Deamon + if process != 'Self-heal Daemon': + continue + + # Call function to check for cpu spikes + cpu_spikes = _check_for_cpu_usage_spikes( + dataframe, node, 'glusterfs', threshold, volume_status, + volume, 'Self-heal Daemon') + if cpu_spikes: + g.log.error("CPU usage spikes observed more than " + "threshold %d on node %s on volume %s for shd", + threshold, node, volume) + is_there_a_spike.append(cpu_spikes) + + return any(is_there_a_spike) + + +def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3): + """Check for CPU usage spikes in glusterfsd + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + threshold(int): Accepted amount of instances of 100% CPU usage + (Default:3) + + Returns: + bool: True if CPU spikes are more than threshold else False + + NOTE: + This function should be exuected with the volumes present on the cluster. + """ + is_there_a_spike = [] + for node in nodes: + # Get the volume status on the node + volume_status = get_volume_status(node) + dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name) + if dataframe.empty: + return False + + for volume in volume_status.keys(): + for process in volume_status[volume][node].keys(): + # Skiping if process isn't brick process + if process in ('Self-heal Daemon', 'Quota Daemon'): + continue + + # Call function to check for cpu spikes + cpu_spikes = _check_for_cpu_usage_spikes( + dataframe, node, 'glusterfsd', threshold, volume_status, + volume, process) + if cpu_spikes: + g.log.error("CPU usage spikes observed more than " + "threshold %d on node %s on volume %s for " + "brick process %s", + threshold, node, volume, process) + is_there_a_spike.append(cpu_spikes) + + return any(is_there_a_spike) + + +def check_for_cpu_usage_spikes_on_glusterfs_fuse(nodes, test_name, + threshold=3): + """Check for CPU usage spikes on glusterfs fuse + + Args: + nodes(list): Servers on which memory leaks have to be checked + test_name(str): Name of testcase for which memory leaks has to be checked + + Kwargs: + threshold(int): Accepted amount of instances of 100% CPU usage + (Default:3) + + Returns: + bool: True if CPU spikes are more than threshold else False + + NOTE: + This function should be executed when the volume is still mounted. + """ + is_there_a_spike = [] + for node in nodes: + # Get the volume status on the node + dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name) + if dataframe.empty: + return False + + # Call function to check for cpu spikes + cpu_spikes = _check_for_cpu_usage_spikes( + dataframe, node, 'glusterfs', threshold) + if cpu_spikes: + g.log.error("CPU usage spikes observed more than " + "threshold %d on node %s for client", + threshold, node) + is_there_a_spike.append(cpu_spikes) + + return any(is_there_a_spike) diff --git a/glustolibs-io/glustolibs/io/utils.py b/glustolibs-io/glustolibs/io/utils.py index 67b3fe2d1..16ee93f21 100755 --- a/glustolibs-io/glustolibs/io/utils.py +++ b/glustolibs-io/glustolibs/io/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,17 +22,21 @@ import os import subprocess from glusto.core import Glusto as g +from glustolibs.gluster.glusterfile import file_exists from glustolibs.gluster.mount_ops import GlusterMount from glustolibs.gluster.volume_libs import get_subvols from glustolibs.misc.misc_libs import upload_scripts -def collect_mounts_arequal(mounts): +def collect_mounts_arequal(mounts, path=''): """Collects arequal from all the mounts Args: mounts (list): List of all GlusterMount objs. + Kwargs: + path (str): Path whose arequal is to be calculated. + Defaults to root of mountpoint Returns: tuple(bool, list): On success returns (True, list of arequal-checksums of each mount) @@ -47,9 +51,10 @@ def collect_mounts_arequal(mounts): g.log.info("Start collecting arequal-checksum from all mounts") all_mounts_procs = [] for mount_obj in mounts: + total_path = os.path.join(mount_obj.mountpoint, path) g.log.info("arequal-checksum of mount %s:%s", mount_obj.client_system, - mount_obj.mountpoint) - cmd = "arequal-checksum -p %s -i .trashcan" % mount_obj.mountpoint + total_path) + cmd = "arequal-checksum -p %s -i .trashcan" % total_path proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) all_mounts_arequal_checksums = [] @@ -983,3 +988,84 @@ def upload_file_dir_ops(clients): g.log.info("Successfully uploaded IO scripts to clients %s", clients) return True + + +def open_file_fd(mountpoint, time, client, start_range=0, + end_range=0): + """Open FD for a file and write to file. + + Args: + mountpoint(str): The mount point where the FD of file is to + be opened. + time(int): The time to wait after opening an FD. + client(str): The client from which FD is to be opened. + + Kwargs: + start_range(int): The start range of the open FD. + (Default: 0) + end_range(int): The end range of the open FD. + (Default: 0) + + Returns: + proc(object): Returns a process object + + NOTE: + Before opening FD, check the currently used fds on the + system as only a limited number of fds can be opened on + a system at a given time for each process. + """ + if not (start_range and end_range): + cmd = ("cd {}; exec 30<> file_openfd ; sleep {};" + "echo 'xyz' >&30".format(mountpoint, time)) + else: + cmd = ('cd {}; for i in `seq {} {}`;' + ' do eval "exec $i<>file_openfd$i"; sleep {};' + ' echo "Write to open FD" >&$i; done'.format( + mountpoint, start_range, end_range, time)) + proc = g.run_async(client, cmd) + return proc + + +def run_linux_untar(clients, mountpoint, dirs=('.')): + """Run linux kernal untar on a given mount point + + Args: + clients(str|list): Client nodes on which I/O + has to be started. + mountpoint(str): Mount point where the volume is + mounted. + Kwagrs: + dirs(tuple): A tuple of dirs where untar has to + started. (Default:('.')) + Returns: + list: Returns a list of process object else None + """ + # Checking and convering clients to list. + if not isinstance(clients, list): + clients = [clients] + + list_of_procs = [] + for client in clients: + # Download linux untar to root, so that it can be + # utilized in subsequent run_linux_untar() calls. + cmd = ("wget https://cdn.kernel.org/pub/linux/kernel/" + "v5.x/linux-5.4.54.tar.xz") + if not file_exists(client, '/root/linux-5.4.54.tar.xz'): + ret, _, _ = g.run(client, cmd) + if ret: + return None + + for directory in dirs: + # copy linux tar to dir + cmd = ("cp /root/linux-5.4.54.tar.xz {}/{}" + .format(mountpoint, directory)) + ret, _, _ = g.run(client, cmd) + if ret: + return None + # Start linux untar + cmd = ("cd {}/{};tar -xvf linux-5.4.54.tar.xz" + .format(mountpoint, directory)) + proc = g.run_async(client, cmd) + list_of_procs.append(proc) + + return list_of_procs diff --git a/glustolibs-io/shared_files/scripts/file_lock.py b/glustolibs-io/shared_files/scripts/file_lock.py new file mode 100644 index 000000000..e29fd1b1d --- /dev/null +++ b/glustolibs-io/shared_files/scripts/file_lock.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from fcntl import flock, LOCK_EX, LOCK_NB, LOCK_UN +from time import sleep +from argparse import ArgumentParser + + +def get_file_lock(args): + """ + Gets the lock to a file and releases it after timeout + """ + file_name = args.f + timeout = args.t + f = open(file_name, 'w') + flock(f.fileno(), LOCK_EX | LOCK_NB) + sleep(int(timeout)) + flock(f.fileno(), LOCK_UN) + + +if __name__ == "__main__": + file_lock_parser = ArgumentParser( + prog="file_lock.py", description="Program to validate file lock ops") + + file_lock_req_args = file_lock_parser.add_argument_group( + 'required named arguments') + file_lock_req_args.add_argument( + '-f', type=str, required=True, + help="File on which lock has to be applied") + file_lock_req_args.add_argument( + '-t', help="time for which lock has to be retained", type=int, + required=True) + + file_lock_parser.set_defaults(func=get_file_lock) + + args = file_lock_parser.parse_args() + rc = args.func(args) diff --git a/glustolibs-io/shared_files/scripts/memory_and_cpu_logger.py b/glustolibs-io/shared_files/scripts/memory_and_cpu_logger.py new file mode 100644 index 000000000..d2ee80d6c --- /dev/null +++ b/glustolibs-io/shared_files/scripts/memory_and_cpu_logger.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +A tool to monitor and log memory consumption processes. +""" +from __future__ import print_function + +import argparse +import csv +from time import sleep +import subprocess + + +def run_command(cmd): + """ + Run command using Popen and return output + """ + ret = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True) + output = ret.stdout.read().decode('utf8').split('\n')[:-1] + return output + + +def get_memory_and_cpu_consumption(proc_name): + """ + Get the memory and cpu consumed by a given process + """ + # The command gives an output as shown below: + # [2020-08-07 09:34:48] 16422 0.0 9.99609 + # + # Where, + # [2020-08-07 09:34:48] is UTC timestamp. + # 16422 is the process ID. + # 0.0 is the CPU usage. + # 9.99609 is memory consumption in MB. + cmd = ("ps u -p `pgrep " + proc_name + "` | " + "awk 'NR>1 && $11~/" + proc_name + "$/{print " + "strftime(\"[%Y-%d-%m %H:%M:%S]\", " + "systime(), 1), $2,$3,$6/1024}'") + memory_and_cpu_consumed = run_command(cmd) + return memory_and_cpu_consumed + + +def main(): + """ + Main function of the tool. + """ + # Setting up command line arguments + parser = argparse.ArgumentParser( + description="A tool to log memory usage of a given process" + ) + parser.add_argument( + "-p", "--process_name", type=str, dest="process_name", required=True, + help="Name of process for which cpu and memory is to be logged") + parser.add_argument( + "-i", "--interval", type=int, dest="interval", default=60, + help="Time interval to wait between consecutive logs(Default:60)") + parser.add_argument( + "-c", "--count", type=int, dest="count", default=10, + help="Number of times memory and CPU has to be logged (Default:10)") + parser.add_argument( + '-t', '--testname', type=str, dest="testname", required=True, + help="Test name for which memory is logged") + args = parser.parse_args() + + # Declare all three parameters + process_name = args.process_name + count = args.count + interval = args.interval + + # Generating CSV file header + with open('{}.csv'.format(process_name), 'a') as file: + csv_writer_obj = csv.writer(file) + csv_writer_obj.writerow([args.testname, '', '', '']) + csv_writer_obj.writerow([ + 'Time stamp', 'Process ID', 'CPU Usage', 'Memory Usage']) + + # Taking memory output for a given + # number of times + for counter in range(0, count): + print("Iteration: {}".format(counter)) + data = get_memory_and_cpu_consumption(process_name) + + # Logging information to csv file + for line in data: + info = line.split(" ") + csv_writer_obj.writerow([" ".join(info[:2]), info[2], + info[3], info[4]]) + sleep(interval) + + +if __name__ == "__main__": + main() diff --git a/glustolibs-misc/glustolibs/misc/misc_libs.py b/glustolibs-misc/glustolibs/misc/misc_libs.py index fea86d125..9f9225929 100755 --- a/glustolibs-misc/glustolibs/misc/misc_libs.py +++ b/glustolibs-misc/glustolibs/misc/misc_libs.py @@ -21,7 +21,7 @@ import sys import time from glusto.core import Glusto as g -from glustolibs.gluster.lib_utils import is_rhel6 +from glustolibs.gluster.lib_utils import is_rhel6, is_rhel7 def create_dirs(list_of_nodes, list_of_dir_paths): @@ -341,8 +341,8 @@ def install_arequal(list_of_nodes): list_of_nodes = [list_of_nodes] try: - arequal_repo = (g.config['dependencies']['testing_tools']['arequal'] - ['repo']) + arequal_repo = (g.config['dependencies']['testing_tools'] + ['arequal']['repo']) except KeyError: arequal_repo = ("https://copr.fedorainfracloud.org/coprs/nigelbabu/" "arequal/repo/epel-7/nigelbabu-arequal-epel-7.repo") @@ -619,3 +619,72 @@ def git_clone_and_compile(hosts, link, dir_name, compile_option='False'): else: g.log.info("Successfully cloned/compiled repo on %s" % host) return True + + +def kill_process(mnode, process_ids='', process_names=''): + """Kills the given set of process running in the specified node + + Args: + mnode (str): Node at which the command has to be executed + process_ids (list|str): List of pid's to be terminated + process_names(list|str): List of Process names to be terminated + + Returns: + bool : True on successful termination of all the processes + False, otherwise + Example: + >>> kill_process("10.70.43.68", process_ids=27664) + True/False + >>> kill_process("10.70.43.68", process_names=["glustershd", + "glusterd"]) + True/False + """ + if process_names: + process_ids = [] + if not isinstance(process_names, list): + process_names = [process_names] + + for process in process_names: + ret, pids, _ = g.run(mnode, + "ps -aef | grep -i '%s' | grep -v 'grep' | " + "awk '{ print $2 }'" % process) + pids = pids.split("\n")[:-1] + if not pids: + g.log.error("Getting pid for process %s failed" % process) + return False + for pid in pids: + if pid: + process_ids.append(pid) + + if process_ids and not isinstance(process_ids, list): + process_ids = [process_ids] + + # Kill process + for pid in process_ids: + ret, _, _ = g.run(mnode, "kill -9 %s" % str(pid)) + if ret: + g.log.error("Failed to kill process with pid %s" % str(pid)) + return False + return True + + +def bring_down_network_interface(mnode, timeout=150): + """Brings the network interface down for a defined time + + Args: + mnode (str): Node at which the interface has to be bought down + timeout (int): Time duration (in secs) for which network has to + be down + + Returns: + network_status(object): Returns a process object + + Example: + >>> bring_down_network_interface("10.70.43.68", timout=100) + """ + interface = "eth0" if is_rhel7(mnode) else "ens3" + cmd = "ifconfig {0} down\nsleep {1}\nifconfig {0} up".format(interface, + timeout) + _, _, _ = g.run(mnode, "echo \"{}\"> 'test.sh'".format(cmd)) + network_status = g.run_async(mnode, "sh test.sh") + return network_status diff --git a/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py new file mode 100644 index 000000000..df05dd86c --- /dev/null +++ b/tests/functional/afr/heal/test_afr_self_heal_add_brick_rebalance.py @@ -0,0 +1,199 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep +from random import sample + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols, expand_volume, + wait_for_volume_process_to_be_online) +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter', 'replicated', + 'distributed-replicated'], ['glusterfs']]) +class TestAfrSelfHealAddBrickRebalance(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients % s", + cls.clients) + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Unable to setup and mount volume") + + def tearDown(self): + + # Wait if any IOs are pending from the test + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass Teardown + self.get_super_method(self, 'tearDown')() + + def test_afr_self_heal_add_brick_rebalance(self): + """ + Test Steps: + 1. Create a replicated/distributed-replicate volume and mount it + 2. Start IO from the clients + 3. Bring down a brick from the subvol and validate it is offline + 4. Bring back the brick online and wait for heal to complete + 5. Once the heal is completed, expand the volume. + 6. Trigger rebalance and wait for rebalance to complete + 7. Validate IO, no errors during the steps performed from step 2 + 8. Check arequal of the subvol and all the brick in the same subvol + should have same checksum + """ + # Start IO from the clients + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 30 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + # List a brick in each subvol and bring them offline + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + brick_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brick_to_bring_offline.extend(sample(subvol, 1)) + + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, "Unable to bring brick: {} offline".format( + brick_to_bring_offline)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, "Brick:{} is still online".format( + brick_to_bring_offline)) + + # Wait for 10 seconds for IO to be generated + sleep(10) + + # Start volume with force to bring all bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Volume start with force failed") + g.log.info("Volume: %s started successfully", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online", + self.volname)) + + # Monitor heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname, + interval_check=10), + "Heal failed after 20 mins") + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + + # Expanding volume by adding bricks to the volume when IO in progress + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume when IO in " + "progress on volume %s", self.volname)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the " + "volume %s", self.volname) + + # Without sleep the next step will fail with Glusterd Syncop locking. + sleep(2) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on " + "the volume %s", self.volname) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.io_validation_complete = True + self.assertTrue(ret, "IO failed on some of the clients") + self.all_mounts_procs *= 0 + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + self.assertTrue(ret, "Failed to list all files and dirs") + + # Check arequal checksum of all the bricks is same + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_dir_time_stamp_restoration.py b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py new file mode 100644 index 000000000..6a4ef2a19 --- /dev/null +++ b/tests/functional/afr/heal/test_dir_time_stamp_restoration.py @@ -0,0 +1,160 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Check if parent directory timestamps are restored after an entry heal. +""" +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + select_volume_bricks_to_bring_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.glusterdir import (mkdir, rmdir) +from glustolibs.gluster.glusterfile import (get_fattr, get_file_stat) +from glustolibs.gluster.volume_libs import set_volume_options +from glustolibs.gluster.heal_libs import monitor_heal_completion + + +@runs_on([['replicated'], + ['glusterfs']]) +class TestDirTimeStampRestore(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def are_mdata_xattrs_equal(self): + """Check if atime/mtime/ctime in glusterfs.mdata xattr are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + fattr = get_fattr(server, '%s/%s' % (brick, "dir1"), + 'trusted.glusterfs.mdata') + self.assertIsNotNone(fattr, 'Unable to get mdata xattr') + timestamps.append(fattr) + + g.log.debug("mdata list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def are_stat_timestamps_equal(self): + """Check if atime/mtime/ctime in stat info are identical""" + timestamps = [] + for brick_path in self.bricks_list: + server, brick = brick_path.split(':') + stat_data = get_file_stat(server, "%s/dir1" % brick) + ts_string = "{}-{}-{}".format(stat_data['epoch_atime'], + stat_data['epoch_mtime'], + stat_data['epoch_ctime']) + timestamps.append(ts_string) + + g.log.debug("stat list = %s", ''.join(map(str, timestamps))) + return timestamps.count(timestamps[0]) == len(timestamps) + + def perform_test(self, ctime): + """ + Testcase steps: + 1. Enable/disable features,ctime based on function argument. + 2. Create a directory on the mount point. + 3. Kill a brick and create a file inside the directory. + 4. Bring the brick online. + 5. Trigger heal and wait for its completion. + 6. Verify that the atime, mtime and ctime of the directory are same on + all bricks of the replica. + """ + if ctime: + option = {'features.ctime': 'on'} + else: + option = {'features.ctime': 'off'} + ret = set_volume_options(self.mnode, self.volname, option) + self.assertTrue(ret, 'failed to set option %s on %s' + % (option, self.volume)) + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + dirpath = '{}/dir1'.format(m_point) + ret = mkdir(client, dirpath) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + cmd = 'touch {}/file1'.format(dirpath) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Unable to create file from mount point') + + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + if ctime: + ret = self.are_mdata_xattrs_equal() + self.assertTrue(ret, "glusterfs.mdata mismatch for {}" + .format(dirpath)) + else: + ret = self.are_stat_timestamps_equal() + self.assertTrue(ret, "stat mismatch for {}".format(dirpath)) + + ret = rmdir(client, dirpath, force=True) + self.assertTrue(ret, 'Unable to delete directory from mount point') + + def test_dir_time_stamp_restoration(self): + """ + Create pending entry self-heal on a replica volume and verify that + after the heal is complete, the atime, mtime and ctime of the parent + directory are identical on all bricks of the replica. + + The test is run with features.ctime enabled as well as disabled. + """ + self.perform_test(ctime=True) + self.perform_test(ctime=False) diff --git a/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py new file mode 100644 index 000000000..163596bb7 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_for_conservative_merge_with_two_bricks_blame.py @@ -0,0 +1,175 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline, + bring_bricks_offline, + get_online_bricks_list, + are_bricks_online) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.glusterfile import set_fattr, get_fattr +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal + + +@runs_on([['replicated'], ['glusterfs']]) +class TestHealForConservativeMergeWithTwoBricksBlame(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it. + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_brick_offline_and_check(self, brick): + """Brings brick offline an checks if it is offline or not""" + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, "Unable to bring brick: {} offline".format(brick)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, "Brick:{} is still online".format(brick)) + + def _get_fattr_for_the_brick(self, brick): + """Get xattr of trusted.afr.volname-client-0 for the given brick""" + host, fqpath = brick.split(":") + fqpath = fqpath + "/dir1" + fattr = "trusted.afr.{}-client-0".format(self.volname) + return get_fattr(host, fqpath, fattr, encode="hex") + + def _check_peers_status(self): + """Validates peers are connected or not""" + count = 0 + while count < 4: + if self.validate_peers_are_connected(): + return + sleep(5) + count += 1 + self.fail("Peers are not in connected state") + + def test_heal_for_conservative_merge_with_two_bricks_blame(self): + """ + 1) Create 1x3 volume and fuse mount the volume + 2) On mount created a dir dir1 + 3) Pkill glusterfsd on node n1 (b2 on node2 and b3 and node3 up) + 4) touch f{1..10} on the mountpoint + 5) b2 and b3 xattrs would be blaming b1 as files are created while + b1 is down + 6) Reset the b3 xattrs to NOT blame b1 by using setattr + 7) Now pkill glusterfsd of b2 on node2 + 8) Restart glusterd on node1 to bring up b1 + 9) Now bricks b1 online , b2 down, b3 online + 10) touch x{1..10} under dir1 itself + 11) Again reset xattr on node3 of b3 so that it doesn't blame b2, + as done for b1 in step 6 + 12) Do restart glusterd on node2 hosting b2 to bring all bricks online + 13) Check for heal info, split-brain and arequal for the bricks + """ + # pylint: disable=too-many-locals + # Create dir `dir1/` on mountpont + path = self.mounts[0].mountpoint + "/dir1" + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume") + brick1, brick2, brick3 = all_bricks + + # Bring first brick offline + self._bring_brick_offline_and_check(brick1) + + # touch f{1..10} files on the mountpoint + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch f$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Check b2 and b3 xattrs are blaming b1 and are same + self.assertEqual(self._get_fattr_for_the_brick(brick2), + self._get_fattr_for_the_brick(brick3), + "Both the bricks xattrs are not blaming " + "brick: {}".format(brick1)) + + # Reset the xattrs of dir1 on b3 for brick b1 + first_xattr_to_reset = "trusted.afr.{}-client-0".format(self.volname) + xattr_value = "0x000000000000000000000000" + host, brick_path = brick3.split(":") + brick_path = brick_path + "/dir1" + ret = set_fattr(host, brick_path, first_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Kill brick2 on the node2 + self._bring_brick_offline_and_check(brick2) + + # Restart glusterd on node1 to bring the brick1 online + self.assertTrue(restart_glusterd([brick1.split(":")[0]]), "Unable to " + "restart glusterd") + # checking for peer status post glusterd restart + self._check_peers_status() + + # Check if the brick b1 on node1 is online or not + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Unable to fetch online bricks") + self.assertIn(brick1, online_bricks, "Brick:{} is still offline after " + "glusterd restart".format(brick1)) + + # Create 10 files under dir1 naming x{1..10} + cmd = ("cd {mpt}; for i in `seq 1 10`; do touch x$i" + "; done".format(mpt=path)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Unable to create files on mountpoint") + + # Reset the xattrs from brick3 on to brick2 + second_xattr_to_reset = "trusted.afr.{}-client-1".format(self.volname) + ret = set_fattr(host, brick_path, second_xattr_to_reset, xattr_value) + self.assertTrue(ret, "Unable to set xattr for the directory") + + # Bring brick2 online + self.assertTrue(restart_glusterd([brick2.split(":")[0]]), "Unable to " + "restart glusterd") + self._check_peers_status() + + self.assertTrue(are_bricks_online(self.mnode, self.volname, [brick2])) + + # Check are there any files in split-brain and heal completion + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + "Conservative merge of files failed") + + # Check arequal checksum of all the bricks is same + ret, arequal_from_the_bricks = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, "Arequal is collected successfully across the" + " bricks in the subvol {}".format(all_bricks)) + self.assertEqual(len(set(arequal_from_the_bricks)), 1, "Arequal is " + "same on all the bricks in the subvol") diff --git a/tests/functional/afr/heal/test_heal_info_no_hang.py b/tests/functional/afr/heal/test_heal_info_no_hang.py new file mode 100644 index 000000000..82f8b0598 --- /dev/null +++ b/tests/functional/afr/heal/test_heal_info_no_hang.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + heal info completes when there is ongoing I/O and a lot of pending heals. +""" +import random +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.io.utils import run_linux_untar +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['distributed-replicated'], + ['glusterfs']]) +class TestHealInfoNoHang(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + self.is_io_running = False + + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.bricks_list = get_all_bricks(self.mnode, self.volname) + self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + + def tearDown(self): + if self.is_io_running: + if not self._wait_for_untar_completion(): + g.log.error("I/O failed to stop on clients") + + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def _wait_for_untar_completion(self): + """Wait for the kernel untar to complete""" + has_process_stopped = [] + for proc in self.list_of_io_processes: + try: + ret, _, _ = proc.async_communicate() + if not ret: + has_process_stopped.append(False) + has_process_stopped.append(True) + except ValueError: + has_process_stopped.append(True) + return all(has_process_stopped) + + def _does_heal_info_complete_within_timeout(self): + """Check if heal info CLI completes within a specific timeout""" + # We are just assuming 1 entry takes one second to process, which is + # a very high number but some estimate is better than a random magic + # value for timeout. + timeout = self.num_entries * 1 + + # heal_info_data = get_heal_info(self.mnode, self.volname) + cmd = "timeout %s gluster volume heal %s info" % (timeout, + self.volname) + ret, _, _ = g.run(self.mnode, cmd) + if ret: + return False + return True + + def test_heal_info_no_hang(self): + """ + Testcase steps: + 1. Start kernel untar on the mount + 2. While untar is going on, kill a brick of the replica. + 3. Wait for the untar to be over, resulting in pending heals. + 4. Get the approx. number of pending heals and save it + 5. Bring the brick back online. + 6. Trigger heal + 7. Run more I/Os with dd command + 8. Run heal info command and check that it completes successfully under + a timeout that is based on the no. of heals in step 4. + """ + self.list_of_io_processes = [] + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Kill brick resulting in heal backlog. + brick_to_bring_offline = random.choice(self.bricks_list) + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' + % brick_to_bring_offline) + ret = are_bricks_offline(self.mnode, self.volname, + [brick_to_bring_offline]) + self.assertTrue(ret, 'Bricks %s are not offline' + % brick_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + brick_to_bring_offline) + + ret = self._wait_for_untar_completion() + self.assertFalse(ret, "IO didn't complete or failed on client") + self.is_io_running = False + + # Get approx. no. of entries to be healed. + cmd = ("gluster volume heal %s statistics heal-count | grep Number " + "| awk '{sum+=$4} END {print sum/2}'" % self.volname) + ret, self.num_entries, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to get heal-count statistics") + + # Restart the down bricks + ret = bring_bricks_online(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick_to_bring_offline) + g.log.info('Bringing brick %s online is successful', + brick_to_bring_offline) + # Trigger heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Starting heal failed') + g.log.info('Index heal launched') + + # Run more I/O + cmd = ("for i in `seq 1 10`; do dd if=/dev/urandom of=%s/file_$i " + "bs=1M count=100; done" % self.mounts[0].mountpoint) + ret = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + + # Get heal info + ret = self._does_heal_info_complete_within_timeout() + self.assertTrue(ret, 'Heal info timed out') + g.log.info('Heal info completed succesfully') diff --git a/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py new file mode 100644 index 000000000..efd2f8745 --- /dev/null +++ b/tests/functional/afr/heal/test_impact_of_replace_brick_on_glustershd.py @@ -0,0 +1,186 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, wait_for_volume_process_to_be_online, + setup_volume, cleanup_volume) +from glustolibs.gluster.lib_utils import get_servers_bricks_dict +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_ops import replace_brick +from glustolibs.gluster.heal_libs import (get_self_heal_daemon_pid, + do_bricks_exist_in_shd_volfile, + is_shd_daemonized) +from glustolibs.gluster.volume_ops import get_volume_list + + +class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithMultipleVolumes contains tests which + verifies the self-heal daemon process on multiple volumes running. + """ + def setUp(self): + """ + setup volume and initialize necessary variables + which is used in tests + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume for all the volume types + self.volume_configs = [] + for volume_type in self.default_volume_type_config: + self.volume_configs.append( + {'name': 'testvol_%s' % volume_type, + 'servers': self.servers, + 'voltype': self.default_volume_type_config[volume_type]}) + + for volume_config in self.volume_configs[1:]: + ret = setup_volume(mnode=self.mnode, + all_servers_info=self.all_servers_info, + volume_config=volume_config, + multi_vol=True) + volname = volume_config['name'] + if not ret: + raise ExecutionError("Failed to setup Volume" + " %s" % volname) + g.log.info("Successful in setting volume %s", volname) + + # Verify volume's all process are online for 60 sec + ret = wait_for_volume_process_to_be_online(self.mnode, volname, 60) + if not ret: + raise ExecutionError("Volume %s : All process are not online" + % volname) + g.log.info("Successfully Verified volume %s processes are online", + volname) + + # Verfiy glustershd process releases its parent process + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + self.glustershd = "/var/lib/glusterd/glustershd/glustershd-server.vol" + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + + # Cleanup volume + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup Volume %s" % volume) + g.log.info("Successfully Cleaned up all Volumes") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_impact_of_replace_brick_on_glustershd(self): + """ + Test Script to verify the glustershd server vol file + has only entries for replicate volumes + 1.Create multiple volumes and start all volumes + 2.Check the glustershd processes - Only 1 glustershd should be listed + 3.Do replace brick on the replicate volume + 4.Confirm that the brick is replaced + 5.Check the glustershd processes - Only 1 glustershd should be listed + and pid should be different + 6.glustershd server vol should be updated with new bricks + """ + # Check the self-heal daemon process + ret, glustershd_pids = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, ("Either no self heal daemon process found or " + "more than one self heal daemon process " + "found : %s" % glustershd_pids)) + g.log.info("Successful in getting single self heal daemon process" + " on all nodes %s", self.servers) + + volume_list = get_volume_list(self.mnode) + for volume in volume_list: + + # Log Volume Info and Status before replacing brick + ret = log_volume_info_and_status(self.mnode, volume) + self.assertTrue(ret, ("Logging volume info and status " + "failed on volume %s", volume)) + g.log.info("Successful in logging volume info and status " + "of volume %s", volume) + + # Selecting a random source brick to replace + src_brick = choice(get_all_bricks(self.mnode, volume)) + src_node, original_brick = src_brick.split(":") + + # Creating a random destination brick in such a way + # that the brick is select from the same node but always + # picks a different from the original brick + list_of_bricks = [ + brick for brick in get_servers_bricks_dict( + src_node, self.all_servers_info)[src_node] + if brick not in original_brick] + dst_brick = ('{}:{}/{}_replaced'.format( + src_node, choice(list_of_bricks), + original_brick.split('/')[::-1][0])) + + # Replace brick for the volume + ret, _, _ = replace_brick(self.mnode, volume, + src_brick, dst_brick) + self.assertFalse(ret, "Failed to replace brick " + "from the volume %s" % volume) + g.log.info("Successfully replaced faulty brick from " + "the volume %s", volume) + + # Verify all volume process are online + ret = wait_for_volume_process_to_be_online(self.mnode, volume) + self.assertTrue(ret, "Volume %s : All process are not online" + % volume) + g.log.info("Volume %s : All process are online", volume) + + # Check the self-heal daemon process after replacing brick + ret, pid_after_replace = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, "Either no self heal daemon process " + "found or more than one self heal " + "daemon process found : %s" % pid_after_replace) + g.log.info("Successful in getting Single self heal " + " daemon process on all nodes %s", self.servers) + + # Compare the glustershd pids + self.assertNotEqual(glustershd_pids, pid_after_replace, + "Self heal daemon process should be different " + "after replacing bricks in %s volume" + % volume) + g.log.info("EXPECTED: Self heal daemon process should be different" + " after replacing bricks in replicate volume") + + # Get the bricks for the volume + bricks_list = get_all_bricks(self.mnode, volume) + g.log.info("Brick List : %s", bricks_list) + + # Validate the bricks present in volume info with + # glustershd server volume file + ret = do_bricks_exist_in_shd_volfile(self.mnode, volume, + bricks_list) + self.assertTrue(ret, ("Brick List from volume info is " + "different from glustershd server " + "volume file. Please check log file " + "for details")) + g.log.info("Bricks in volume %s exists in glustershd server " + "volume file", volume) diff --git a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py index d2b43bfe3..bbefe0cff 100644 --- a/tests/functional/afr/heal/test_no_glustershd_with_distribute.py +++ b/tests/functional/afr/heal/test_no_glustershd_with_distribute.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,7 +58,7 @@ class SelfHealDaemonProcessTestsWithMultipleVolumes(GlusterBaseClass): for volume_config in cls.volume_configs: ret = setup_volume(mnode=cls.mnode, all_servers_info=cls.all_servers_info, - volume_config=volume_config) + volume_config=volume_config, multi_vol=True) volname = volume_config['name'] if not ret: raise ExecutionError("Failed to setup Volume" diff --git a/tests/functional/afr/heal/test_self_heal.py b/tests/functional/afr/heal/test_self_heal.py index 6bbcccdfc..4fb6dea7e 100755 --- a/tests/functional/afr/heal/test_self_heal.py +++ b/tests/functional/afr/heal/test_self_heal.py @@ -16,14 +16,12 @@ # pylint: disable=too-many-lines from glusto.core import Glusto as g - from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError -from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_ops import get_volume_options from glustolibs.gluster.volume_libs import ( verify_all_process_of_volume_are_online, wait_for_volume_process_to_be_online) -from glustolibs.gluster.volume_libs import expand_volume from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, bring_bricks_offline, bring_bricks_online, @@ -34,8 +32,6 @@ from glustolibs.gluster.heal_libs import ( is_heal_complete, is_volume_in_split_brain, is_shd_daemonized) -from glustolibs.gluster.rebalance_ops import (rebalance_start, - wait_for_rebalance_to_complete) from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @@ -43,12 +39,12 @@ from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, @runs_on([['replicated', 'distributed-replicated'], - ['glusterfs', 'cifs', 'nfs']]) + ['glusterfs', 'cifs']]) class TestSelfHeal(GlusterBaseClass): """ Description: - Arbiter Test cases related to - healing in default configuration of the volume + AFR Test cases related to healing in + default configuration of the volume """ @classmethod @@ -121,12 +117,15 @@ class TestSelfHeal(GlusterBaseClass): # Calling GlusterBaseClass teardown self.get_super_method(self, 'tearDown')() - def test_data_self_heal_daemon_off(self): + def test_data_self_heal_command(self): """ Test Data-Self-Heal (heal command) Description: - - set the volume option + - get the client side healing volume options and check + if they have already been disabled by default + NOTE: Client side healing has been disabled by default + since GlusterFS 6.0 "metadata-self-heal": "off" "entry-self-heal": "off" "data-self-heal": "off" @@ -135,7 +134,7 @@ class TestSelfHeal(GlusterBaseClass): - set the volume option "self-heal-daemon": "off" - bring down all bricks processes from selected set - - Get areeual after getting bricks offline and compare with + - Get arequal after getting bricks offline and compare with arequal before getting bricks offline - modify the data - bring bricks online @@ -144,8 +143,6 @@ class TestSelfHeal(GlusterBaseClass): - check daemons and start healing - check if heal is completed - check for split-brain - - add bricks - - do rebalance - create 5k files - while creating files - kill bricks and bring bricks online one by one in cycle @@ -153,15 +150,16 @@ class TestSelfHeal(GlusterBaseClass): """ # pylint: disable=too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Successfully set %s for volume %s", - options, self.volname) + # Checking if Client side healing options are disabled by default + g.log.info('Checking Client side healing is disabled by default') + options = ('cluster.metadata-self-heal', 'cluster.data-self-heal', + 'cluster.entry-self-heal') + for option in options: + ret = get_volume_options(self.mnode, self.volname, option)[option] + self.assertTrue(bool(ret == 'off' or ret == 'off (DEFAULT)'), + "{} option is not disabled by default" + .format(option)) + g.log.info("Client side healing options are disabled by default") # Creating files on client side for mount_obj in self.mounts: @@ -193,20 +191,10 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Getting arequal before getting bricks offline ' 'is successful') - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") - # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -269,13 +257,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -284,7 +265,7 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("Successful in waiting for volume %s processes to be " "online", self.volname) - # Verify volume's all process are online + # Verify volume's all processes are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" @@ -316,23 +297,6 @@ class TestSelfHeal(GlusterBaseClass): self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) - g.log.info("Expanding volume is successful on " - "volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - # Create 1k files self.all_mounts_procs = [] for mount_obj in self.mounts: @@ -405,50 +369,26 @@ class TestSelfHeal(GlusterBaseClass): ) self.io_validation_complete = True - def test_self_heal_50k_files_heal_command_by_add_brick(self): + def test_self_heal_50k_files_heal_default(self): """ - Test self-heal of 50k files (heal command + Test self-heal of 50k files by heal default Description: - - set the volume option - "metadata-self-heal": "off" - "entry-self-heal": "off" - "data-self-heal": "off" - "self-heal-daemon": "off" - bring down all bricks processes from selected set - create IO (50k files) - Get arequal before getting bricks online - - bring bricks online - - set the volume option - "self-heal-daemon": "on" - - check for daemons - - start healing + - check for daemons to come online + - heal daemon should pick up entries to heal automatically - check if heal is completed - check for split-brain - get arequal after getting bricks online and compare with arequal before getting bricks online - - add bricks - - do rebalance - - get arequal after adding bricks and compare with - arequal after getting bricks online """ # pylint: disable=too-many-locals,too-many-statements - # Setting options - g.log.info('Setting options...') - options = {"metadata-self-heal": "off", - "entry-self-heal": "off", - "data-self-heal": "off", - "self-heal-daemon": "off"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options') - g.log.info("Successfully set %s for volume %s", options, self.volname) # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -500,13 +440,6 @@ class TestSelfHeal(GlusterBaseClass): g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) - # Setting options - g.log.info('Setting options...') - options = {"self-heal-daemon": "on"} - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, 'Failed to set options %s' % options) - g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") - # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) @@ -528,11 +461,7 @@ class TestSelfHeal(GlusterBaseClass): self.assertTrue(ret, "Either No self heal daemon process found") g.log.info("All self-heal-daemons are online") - # Start healing - ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Heal is not started') - g.log.info('Healing is started') - + # Default Heal testing, wait for shd to pick up healing # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname, timeout_period=3600) @@ -557,40 +486,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(result_before_online, result_after_online, + 'Checksums before and after bringing bricks online ' + 'are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') - - # Add bricks - g.log.info("Start adding bricks to volume...") - ret = expand_volume(self.mnode, self.volname, self.servers, - self.all_servers_info) - self.assertTrue(ret, ("Failed to expand the volume when IO in " - "progress on volume %s", self.volname)) - g.log.info("Expanding volume is successful on volume %s", self.volname) - - # Do rebalance - ret, _, _ = rebalance_start(self.mnode, self.volname) - self.assertEqual(ret, 0, 'Failed to start rebalance') - g.log.info('Rebalance is started') - - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Rebalance is not completed') - g.log.info('Rebalance is completed successfully') - - # Get arequal after adding bricks - g.log.info('Getting arequal after adding bricks...') - ret, result_after_adding_bricks = collect_mounts_arequal(self.mounts) - self.assertTrue(ret, 'Failed to get arequal') - g.log.info('Getting arequal after getting bricks ' - 'is successful') - - # Checking arequals after bringing bricks online - # and after adding bricks - self.assertItemsEqual(result_after_online, result_after_adding_bricks, - 'Checksums after bringing bricks online and ' - 'after adding bricks are not equal') - g.log.info('Checksums after bringing bricks online and ' - 'after adding bricks are equal') diff --git a/tests/functional/afr/heal/test_self_heal_daemon_process.py b/tests/functional/afr/heal/test_self_heal_daemon_process.py index 5c88460f6..ea598b1fc 100755 --- a/tests/functional/afr/heal/test_self_heal_daemon_process.py +++ b/tests/functional/afr/heal/test_self_heal_daemon_process.py @@ -449,10 +449,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # bring bricks offline g.log.info("Going to bring down the brick process " @@ -533,10 +530,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) diff --git a/tests/functional/afr/heal/test_self_heal_with_link_files.py b/tests/functional/afr/heal/test_self_heal_with_link_files.py new file mode 100644 index 000000000..d029c3d9e --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_link_files.py @@ -0,0 +1,405 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import (get_subvols, + replace_brick_from_volume) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_files_and_dirs_on_mount_point(self, second_attempt=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_link_self_heal on mount point + if not second_attempt: + ret = mkdir(self.first_client, + '{}/{}'.format(self.mountpoint, + 'test_link_self_heal')) + self.assertTrue(ret, "Failed to create dir test_link_self_heal") + + # Create dirctories and files inside directory test_link_self_heal + io_cmd = ("for i in `seq 1 5`; do mkdir dir.$i; " + "for j in `seq 1 10`; do dd if=/dev/random " + "of=dir.$i/file.$j bs=1k count=$j; done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5` ; do for j in `seq 1 10`; " + "do dd if=/dev/random of=sym_link_dir.$i/" + "new_file.$j bs=1k count=$j; done; done ") + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create dirs and files inside") + + def _create_soft_links_to_directories(self): + """Create soft links to directories""" + cmd = ("cd {}/test_link_self_heal; for i in `seq 1 5`; do ln -s " + "dir.$i sym_link_dir.$i; done".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create soft links to dirs") + + def _verify_soft_links_to_dir(self, option=0): + """Verify soft links to dir""" + + cmd_list = [ + ("for i in `seq 1 5`; do stat -c %F sym_link_dir.$i | " + "grep -F 'symbolic link'; if [ $? -ne 0 ]; then exit 1;" + " fi ; done; for i in `seq 1 5` ; do readlink sym_link_dir.$i | " + "grep \"dir.$i\"; if [ $? -ne 0 ]; then exit 1; fi; done; "), + ("for i in `seq 1 5`; do for j in `seq 1 10`; do ls " + "dir.$i/new_file.$j; if [ $? -ne 0 ]; then exit 1; fi; done; " + "done")] + + # Generate command to check according to option + if option == 2: + verify_cmd = "".join(cmd_list) + else: + verify_cmd = cmd_list[option] + + cmd = ("cd {}/test_link_self_heal; {}".format(self.mountpoint, + verify_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Symlinks aren't proper") + + def _create_hard_links_to_files(self, second_attempt=False): + """Create hard links to files""" + io_cmd = ("for i in `seq 1 5`;do for j in `seq 1 10`;do ln " + "dir.$i/file.$j dir.$i/link_file.$j;done; done") + if second_attempt: + io_cmd = ("for i in `seq 1 5`; do mkdir new_dir.$i; for j in " + "`seq 1 10`; do ln dir.$i/file.$j new_dir.$i/new_file." + "$j;done; done;") + + cmd = ("cd {}/test_link_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create hard links to files") + + def _verify_hard_links_to_files(self, second_set=False): + """Verify if hard links to files""" + file_to_compare = "dir.$i/link_file.$j" + if second_set: + file_to_compare = "new_dir.$i/new_file.$j" + + cmd = ("cd {}/test_link_self_heal;for i in `seq 1 5`; do for j in `seq" + " 1 10`;do if [ `stat -c %i dir.$i/file.$j` -ne `stat -c %i " + "{}` ];then exit 1; fi; done; done" + .format(self.mountpoint, file_to_compare)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to verify hard links to files") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal([self.mounts[0]]) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + ret, arequals = collect_bricks_arequal([brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _replace_one_random_brick(self): + """Replace one random brick from the volume""" + brick = choice(get_all_bricks(self.mnode, self.volname)) + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, self.all_servers_info, + src_brick=brick) + self.assertTrue(ret, "Failed to replace brick %s " % brick) + g.log.info("Successfully replaced brick %s", brick) + + def test_self_heal_of_hard_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create hard links for the files created in step 2. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring brack all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if hard links are proper or not. + 12. Do a lookup on mount point. + 13. Bring down brick processes accoding to the volume type. + 14. Create a second set of hard links to the files. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring brack all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify both set of hard links are proper or not. + 21. Do a lookup on mount point. + 22. Pick a random brick and replace it. + 23. Wait for heal to complete on the volume. + 24. Check if heal is complete and check if volume is in split brain. + 25. Collect and compare arequal-checksum according to the volume type + for bricks. + 26. Verify both set of hard links are proper or not. + 27. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + for attempt in (False, True): + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create hardlinks for the files created in step 2 + self._create_hard_links_to_files(second_attempt=attempt) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + if attempt: + self._verify_hard_links_to_files(second_set=attempt) + + # Pick a random brick and replace it + self._replace_one_random_brick() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume + # type for bricks + self._check_arequal_checksum_for_the_volume() + + # Verify if hard links are proper or not + self._verify_hard_links_to_files() + self._verify_hard_links_to_files(second_set=True) + + def test_self_heal_of_soft_links(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create soft links for the dirs created in step 2. + 6. Verify if soft links are proper or not. + 7. Add files through the soft links. + 8. Verify if the soft links are proper or not. + 9. Check if heal info is showing all the files and dirs to be healed. + 10. Bring brack all brick processes which were killed. + 11. Wait for heal to complete on the volume. + 12. Check if heal is complete and check if volume is in split brain. + 13. Collect and compare arequal-checksum according to the volume type + for bricks. + 14. Verify if soft links are proper or not. + 15. Do a lookup on mount point. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create soft links for the dirs created in step 2 + self._create_soft_links_to_directories() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir() + + # Add files through the soft links + self._create_files_and_dirs_on_mount_point(second_attempt=True) + + # Verify if the soft links are proper or not + self._verify_soft_links_to_dir(option=1) + + # Check if heal info is showing all the files and dirs to + # be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Verify if soft links are proper or not + self._verify_soft_links_to_dir(option=2) diff --git a/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py new file mode 100644 index 000000000..37bd2ec52 --- /dev/null +++ b/tests/functional/afr/heal/test_self_heal_with_meta_data_entry_and_files_removed.py @@ -0,0 +1,600 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, + get_all_bricks) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain, + is_heal_complete, + enable_granular_heal, + disable_granular_heal) +from glustolibs.gluster.lib_utils import (add_user, del_user, group_del, + group_add, collect_bricks_arequal) +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated'], ['glusterfs']]) +class TestHealWithLinkFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + self.user_group_created = False + + # If test case running is test_self_heal_meta_data + # create user and group + test_name_splitted = self.id().split('.') + test_id = test_name_splitted[len(test_name_splitted) - 1] + if test_id == 'test_self_heal_meta_data': + + # Create non-root group + if not group_add(self.first_client, 'qa_all'): + raise ExecutionError("Failed to create group qa_all") + + # Create non-root users + self.users = ('qa_func', 'qa_system', 'qa_perf') + for user in self.users: + if not add_user(self.first_client, user, group='qa_all'): + raise ExecutionError("Failed to create user {}" + .format(user)) + + self.user_group_created = True + g.log.info("Successfully created all users.") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users and group if created + if self.user_group_created: + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + g.log.info("Successfully deleted all users") + + # Delete non-root group + group_del(self.first_client, 'qa_all') + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _set_granular_heal_to_on_or_off(self, enabled=False): + """Set granular heal to ON or OFF""" + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if enabled: + if granular['cluster.granular-entry-heal'] != 'on': + ret = enable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + else: + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to off") + + def _run_cmd(self, io_cmd, err_msg): + """Run cmd and show error message if it fails""" + cmd = ("cd {}/test_self_heal;{}".format(self.mountpoint, io_cmd)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, err_msg) + + def _create_files_and_dirs_on_mount_point(self, index, second_set=False): + """A function to create files and dirs on mount point""" + # Create a parent directory test_self_heal on mount point + if not second_set: + ret = mkdir(self.first_client, '{}/{}'.format( + self.mountpoint, 'test_self_heal')) + self.assertTrue(ret, "Failed to create dir test_self_heal") + + # Create dirctories and files inside directory test_self_heal + io_cmd = ("for i in `seq 1 50`; do mkdir dir.$i; dd if=/dev/random" + " of=file.$i count=1K bs=$i; done", + + "for i in `seq 1 100`; do mkdir dir.$i; for j in `seq 1 5`;" + " do dd if=/dev/random of=dir.$i/file.$j bs=1K count=$j" + ";done;done", + + "for i in `seq 1 10`; do mkdir l1_dir.$i; for j in `seq " + "1 5`; do mkdir l1_dir.$i/l2_dir.$j; for k in `seq 1 10`;" + " do dd if=/dev/random of=l1_dir.$i/l2_dir.$j/test.$k" + " bs=1k count=$k; done; done; done;", + + "for i in `seq 51 100`; do mkdir new_dir.$i; for j in `seq" + " 1 10`; do dd if=/dev/random of=new_dir.$i/new_file.$j " + "bs=1K count=$j; done; dd if=/dev/random of=new_file.$i" + " count=1K bs=$i; done ;") + self._run_cmd( + io_cmd[index], "Failed to create dirs and files inside") + + def _delete_files_and_dirs(self): + """Delete files and dirs from mount point""" + io_cmd = ("for i in `seq 1 50`; do rm -rf dir.$i; rm -f file.$i;done") + self._run_cmd(io_cmd, "Failed to delete dirs and files") + + def _rename_files_and_dirs(self): + """Rename files and dirs from mount point""" + io_cmd = ("for i in `seq 51 100`; do mv new_file.$i renamed_file.$i;" + " for j in `seq 1 10`; do mv new_dir.$i/new_file.$j " + "new_dir.$i/renamed_file.$j ; done ; mv new_dir.$i " + "renamed_dir.$i; done;") + self._run_cmd(io_cmd, "Failed to rename dirs and files") + + def _change_meta_deta_of_dirs_and_files(self): + """Change meta data of dirs and files""" + cmds = ( + # Change permission + "for i in `seq 1 100`; do chmod 555 dir.$i; done; " + "for i in `seq 1 50`; do for j in `seq 1 5`; do chmod 666 " + "dir.$i/file.$j; done; done; for i in `seq 51 100`; do for " + "j in `seq 1 5`;do chmod 444 dir.$i/file.$j; done; done;", + + # Change ownership + "for i in `seq 1 35`; do chown -R qa_func dir.$i; done; " + "for i in `seq 36 70`; do chown -R qa_system dir.$i; done; " + "for i in `seq 71 100`; do chown -R qa_perf dir.$i; done;", + + # Change group + "for i in `seq 1 100`; do chgrp -R qa_all dir.$i; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, + "Failed to change meta data on dirs and files") + g.log.info("Successfully changed meta data on dirs and files") + + def _verify_meta_data_of_files_and_dirs(self): + """Verify meta data of files and dirs""" + cmds = ( + # Verify permissions + "for i in `seq 1 50`; do stat -c %a dir.$i | grep -F \"555\";" + " if [ $? -ne 0 ]; then exit 1; fi; for j in `seq 1 5` ; do " + "stat -c %a dir.$i/file.$j | grep -F \"666\"; if [ $? -ne 0 ]" + "; then exit 1; fi; done; done; for i in `seq 51 100`; do " + "stat -c %a dir.$i | grep -F \"555\";if [ $? -ne 0 ]; then " + "exit 1; fi; for j in `seq 1 5`; do stat -c %a dir.$i/file.$j" + " | grep -F \"444\"; if [ $? -ne 0 ]; then exit 1; fi; done;" + "done;", + + # Verify ownership + "for i in `seq 1 35`; do stat -c %U dir.$i | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_func\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 36 70` ; do stat -c %U dir.$i | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_system\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;" + " for i in `seq 71 100` ; do stat -c %U dir.$i | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %U dir.$i/file.$j | grep -F " + "\"qa_perf\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;", + + # Verify group + "for i in `seq 1 100`; do stat -c %G dir.$i | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; for j in " + "`seq 1 5`; do stat -c %G dir.$i/file.$j | grep -F " + "\"qa_all\"; if [ $? -ne 0 ]; then exit 1; fi; done; done;") + + for io_cmd in cmds: + self._run_cmd(io_cmd, "Meta data of dirs and files not proper") + + def _set_and_remove_extended_attributes(self, remove=False): + """Set and remove extended attributes""" + # Command to set extended attribute to files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_dirs dir.$i; for j in `seq 1 " + "5`;do setfattr -n trusted.name -v " + "testing_xattr_selfheal_on_files dir.$i/file.$j; done; " + "done;") + err_msg = "Failed to set extended attributes to files and dirs" + if remove: + # Command to remove extended attribute set on files and dirs + io_cmd = ("for i in `seq 1 100`; do setfattr -x trusted.name " + "dir.$i; for j in `seq 1 5`; do setfattr -x " + "trusted.name dir.$i/file.$j ; done ; done ;") + err_msg = "Failed to remove extended attributes to files and dirs" + + self._run_cmd(io_cmd, err_msg) + + def _verify_if_extended_attributes_are_proper(self, remove=False): + """Verify if extended attributes are set or remove properly""" + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e text " + "dir.$i | grep -F 'testing_xattr_selfheal_on_dirs'; if [ $? " + "-ne 0 ]; then exit 1 ; fi ; for j in `seq 1 5` ; do " + "getfattr -n trusted.name -e text dir.$i/file.$j | grep -F " + "'testing_xattr_selfheal_on_files'; if [ $? -ne 0 ]; then " + "exit 1; fi; done; done;") + err_msg = "Extended attributes on files and dirs are not proper" + if remove: + io_cmd = ("for i in `seq 1 100`; do getfattr -n trusted.name -e " + "text dir.$i; if [ $? -eq 0 ]; then exit 1; fi; for j in" + " `seq 1 5`; do getfattr -n trusted.name -e text " + "dir.$i/file.$j; if [ $? -eq 0]; then exit 1; fi; done; " + "done;") + err_msg = "Extended attributes set to files and dirs not removed" + self._run_cmd(io_cmd, err_msg) + + def _remove_files_and_create_dirs_with_the_same_name(self): + """Remove files and create dirs with the same name""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do rm -f l1_dir.$i/l2_dir.$j/test.$k; mkdir " + "l1_dir.$i/l2_dir.$j/test.$k; done; done; done;") + self._run_cmd(io_cmd, + "Failed to remove files and create dirs with same name") + + def _verify_if_dirs_are_proper_or_not(self): + """Verify if dirs are proper or not""" + io_cmd = ("for i in `seq 1 10`; do for j in `seq 1 5`; do for k in " + "`seq 1 10`; do stat -c %F l1_dir.$i/l2_dir.$j/test.$k | " + "grep -F 'directory'; if [ $? -ne 0 ]; then exit 1; fi; " + "done; done; done;") + self._run_cmd(io_cmd, "Dirs created instead of files aren't proper") + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(subvol[0]) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'test_self_heal')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/test_self_heal'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def _check_heal_is_completed_and_not_in_split_brain(self): + """Check if heal is completed and volume not in split brain""" + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check if volume is in split brian or not + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + def _check_if_there_are_files_and_dirs_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _wait_for_heal_is_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_heal_status_restart_vol_wait_and_check_data(self): + """ + Perform repatative steps mentioned below: + 1 Check if heal info is showing all the files and dirs to be healed + 2 Bring back all brick processes which were killed + 3 Wait for heal to complete on the volume + 4 Check if heal is complete and check if volume is in split brain + 5 Collect and compare arequal-checksum according to the volume type + for bricks + """ + # Check if heal info is showing all the files and dirs to be healed + self._check_if_there_are_files_and_dirs_to_be_healed() + + # Bring back all brick processes which were killed + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete on the volume + self._wait_for_heal_is_completed() + + # Check if heal is complete and check if volume is in split brain + self._check_heal_is_completed_and_not_in_split_brain() + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + def _run_test_self_heal_entry_heal(self): + """Run steps of test_self_heal_entry_heal""" + # Create a directory and create files and directories inside it on + # mount point + self._create_files_and_dirs_on_mount_point(0) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Create a new set of files and directories on mount point + self._create_files_and_dirs_on_mount_point(3, second_set=True) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Delete files and directories from mount point + self._delete_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Rename the existing files and dirs + self._rename_files_and_dirs() + + self._check_heal_status_restart_vol_wait_and_check_data() + + def test_self_heal_entry_heal(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Create a new set of files and directories on mount point. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Bring down brick processes accoding to the volume type. + 12. Delete files and directories from mount point. + 13. Check if heal info is showing all the files and dirs to be healed. + 14. Bring back all brick processes which were killed. + 15. Wait for heal to complete on the volume. + 16. Check if heal is complete and check if volume is in split brain. + 17. Collect and compare arequal-checksum according to the volume type + for bricks. + 18. Bring down brick processes accoding to the volume type. + 19. Rename the existing files and dirs. + 20. Check if heal info is showing all the files and dirs to be healed. + 21. Bring back all brick processes which were killed. + 22. Wait for heal to complete on the volume. + 23. Check if heal is complete and check if volume is in split brain. + 24. Collect and compare arequal-checksum according to the volume type + for bricks. + + Note: + Do this test with both Granular-entry-heal set enable and disable. + """ + for value in (False, True): + if value: + # Cleanup old data from mount point + ret, _, _ = g.run(self.first_client, + 'rm -rf {}/*'.format(self.mountpoint)) + self.assertFalse(ret, 'Failed to cleanup mount point') + g.log.info("Testing with granular heal set to enabled") + self._set_granular_heal_to_on_or_off(enabled=value) + self._run_test_self_heal_entry_heal() + + def test_self_heal_meta_data(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Change the meta data of files and dirs. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if the meta data of files and dirs. + 12. Bring down brick processes accoding to the volume type. + 13. Set extended attributes on the files and dirs. + 14. Verify if the extended attributes are set properly or not. + 15. Check if heal info is showing all the files and dirs to be healed. + 16. Bring back all brick processes which were killed. + 17. Wait for heal to complete on the volume. + 18. Check if heal is complete and check if volume is in split brain. + 19. Collect and compare arequal-checksum according to the volume type + for bricks. + 20. Verify if extended attributes are consitent or not. + 21. Bring down brick processes accoding to the volume type + 22. Remove extended attributes on the files and dirs. + 23. Verify if extended attributes were removed properly. + 24. Check if heal info is showing all the files and dirs to be healed. + 25. Bring back all brick processes which were killed. + 26. Wait for heal to complete on the volume. + 27. Check if heal is complete and check if volume is in split brain. + 28. Collect and compare arequal-checksum according to the volume type + for bricks. + 29. Verify if extended attributes are removed or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(1) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Change the meta data of files and dirs + self._change_meta_deta_of_dirs_and_files() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if the meta data of files and dirs + self._verify_meta_data_of_files_and_dirs() + + for value in (False, True): + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Set or remove extended attributes on the files and dirs + self._set_and_remove_extended_attributes(remove=value) + + # Verify if the extended attributes are set properly or not + self._verify_if_extended_attributes_are_proper(remove=value) + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if extended attributes are consitent or not + self._verify_if_extended_attributes_are_proper(remove=value) + + def test_self_heal_of_dir_with_files_removed(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a directory and create files and directories inside it + on mount point. + 3. Collect and compare arequal-checksum according to the volume type + for bricks. + 4. Bring down brick processes accoding to the volume type. + 5. Remove all files and create dir which have name of files. + 6. Check if heal info is showing all the files and dirs to be healed. + 7. Bring back all brick processes which were killed. + 8. Wait for heal to complete on the volume. + 9. Check if heal is complete and check if volume is in split brain. + 10. Collect and compare arequal-checksum according to the volume type + for bricks. + 11. Verify if dirs are healed properly or not. + """ + # Create a directory and create files and directories inside it + # on mount point + self._create_files_and_dirs_on_mount_point(2) + + # Collect and compare arequal-checksum according to the volume type + # for bricks + self._check_arequal_checksum_for_the_volume() + + # Bring down brick processes accoding to the volume type + self._bring_bricks_offline() + + # Remove all files and create dir which have name of files + self._remove_files_and_create_dirs_with_the_same_name() + + self._check_heal_status_restart_vol_wait_and_check_data() + + # Verify if dirs are healed properly or not + self._verify_if_dirs_are_proper_or_not() diff --git a/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py new file mode 100644 index 000000000..a449e396f --- /dev/null +++ b/tests/functional/afr/heal/test_server_side_healing_happens_only_when_glustershd_running.py @@ -0,0 +1,250 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test Cases in this module tests the self heal daemon process. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, bring_bricks_online, + select_volume_bricks_to_bring_offline, get_online_bricks_list) +from glustolibs.gluster.heal_libs import ( + get_self_heal_daemon_pid, is_shd_daemonized, + monitor_heal_completion, bring_self_heal_daemon_process_offline, + disable_granular_heal) +from glustolibs.gluster.heal_ops import (get_heal_info_summary, + trigger_heal_full) +from glustolibs.io.utils import validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) +from glustolibs.gluster.mount_ops import mount_volume, umount_volume + + +@runs_on([['replicated'], ['glusterfs']]) +class SelfHealDaemonProcessTestsWithSingleVolume(GlusterBaseClass): + """ + SelfHealDaemonProcessTestsWithSingleVolume contains tests which + verifies the self-heal daemon process on a single volume + """ + + def setUp(self): + + # Calling GlusterBaseClass setUpClass + self.get_super_method(self, 'setUp')() + + # Upload script + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.clients, [self.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + # Verify glustershd process releases its parent process + ret = is_shd_daemonized(self.servers) + if not ret: + raise ExecutionError("Self Heal Daemon process was still" + " holding parent process.") + g.log.info("Self Heal Daemon processes are online") + + def tearDown(self): + """ + Clean up the volume and umount volume from client + """ + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_server_side_healing_happens_only_when_glustershd_running(self): + """ + Test Script which verifies that the server side healing must happen + only if the heal daemon is running on the node where source brick + resides. + + * Create and start the Replicate volume + * Check the glustershd processes - Only 1 glustershd should be listed + * Bring down the bricks without affecting the cluster + * Create files on volume + * kill the glustershd on node where bricks is running + * bring the bricks up which was killed in previous steps + * check the heal info - heal info must show pending heal info, heal + shouldn't happen since glustershd is down on source node + * issue heal + * trigger client side heal + * heal should complete successfully + """ + # pylint: disable=too-many-locals,too-many-statements,too-many-lines + + # Disable granular heal if not disabled already + granular = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + if granular['cluster.granular-entry-heal'] == 'on': + ret = disable_granular_heal(self.mnode, self.volname) + self.assertTrue(ret, + "Unable to set granular-entry-heal to on") + + # Setting Volume options + options = {"metadata-self-heal": "on", + "entry-self-heal": "on", + "data-self-heal": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Successfully set %s for volume %s", + options, self.volname) + + # Check the self-heal daemon process + ret, pids = get_self_heal_daemon_pid(self.servers) + self.assertTrue(ret, ("Either No self heal daemon process found or " + "more than One self heal daemon process " + "found : %s" % pids)) + g.log.info("Successful in verifying self heal daemon process" + " on all nodes %s", self.servers) + + # Select the bricks to bring offline + bricks_to_bring_offline = (select_volume_bricks_to_bring_offline + (self.mnode, self.volname)) + g.log.info("Brick List to bring offline : %s", bricks_to_bring_offline) + + # Bring down the selected bricks + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, "Failed to bring down the bricks") + g.log.info("Brought down the brick process " + "for %s", bricks_to_bring_offline) + + # Write files on all mounts + all_mounts_procs, num_files_to_write = [], 100 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_files " + "-f %d --base-file-name file %s" % (self.script_upload_path, + num_files_to_write, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Get online bricks list + online_bricks = get_online_bricks_list(self.mnode, self.volname) + g.log.info("Online Bricks for volume %s : %s", + self.volname, online_bricks) + + # Get the nodes where bricks are running + bring_offline_glustershd_nodes = [] + for brick in online_bricks: + bring_offline_glustershd_nodes.append(brick.split(":")[0]) + g.log.info("self heal deamon on nodes %s to be killed", + bring_offline_glustershd_nodes) + + # Kill the self heal daemon process on nodes + ret = bring_self_heal_daemon_process_offline( + bring_offline_glustershd_nodes) + self.assertTrue(ret, ("Unable to bring self heal daemon process" + " offline for nodes %s" + % bring_offline_glustershd_nodes)) + g.log.info("Sucessfully brought down self heal process for " + "nodes %s", bring_offline_glustershd_nodes) + + # Check the heal info + heal_info = get_heal_info_summary(self.mnode, self.volname) + g.log.info("Successfully got heal info %s for the volume %s", + heal_info, self.volname) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline, 'glusterd_restart') + self.assertTrue(ret, ("Failed to bring bricks: %s online" + % bricks_to_bring_offline)) + + # Issue heal + ret = trigger_heal_full(self.mnode, self.volname) + self.assertFalse(ret, ("Able to trigger heal on volume %s where " + "self heal daemon is not running" + % self.volname)) + g.log.info("Expected : Unable to trigger heal on volume %s where " + "self heal daemon is not running", self.volname) + + # Wait for 130 sec to heal + ret = monitor_heal_completion(self.mnode, self.volname, 130) + self.assertFalse(ret, ("Heal Completed on volume %s" % self.volname)) + g.log.info("Expected : Heal pending on volume %s", self.volname) + + # Check the heal info + heal_info_after_triggering_heal = get_heal_info_summary(self.mnode, + self.volname) + g.log.info("Successfully got heal info for the volume %s", + self.volname) + + # Compare with heal pending with the files wrote + for node in online_bricks: + self.assertGreaterEqual( + int(heal_info_after_triggering_heal[node]['numberOfEntries']), + num_files_to_write, + ("Some of the files are healed from source bricks %s where " + "self heal daemon is not running" % node)) + g.log.info("EXPECTED: No files are healed from source bricks where " + "self heal daemon is not running") + + # Unmount and Mount volume again as volume options were set + # after mounting the volume + for mount_obj in self.mounts: + ret, _, _ = umount_volume(mount_obj.client_system, + mount_obj.mountpoint) + self.assertEqual(ret, 0, "Failed to unmount %s" + % mount_obj.client_system) + ret, _, _ = mount_volume(self.volname, + mtype='glusterfs', + mpoint=mount_obj.mountpoint, + mserver=self.mnode, + mclient=mount_obj.client_system) + self.assertEqual(ret, 0, "Failed to mount %s" + % mount_obj.client_system) + + all_mounts_procs = [] + for mount_obj in self.mounts: + cmd = ("cd %s;for i in `seq 1 5`; do ls -l;cat *; stat *; sleep 5;" + " done " % (mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "Reads failed on some of the clients") + g.log.info("Reads successful on all mounts") + + # Wait for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "Unable to heal the pending entries") + g.log.info("Successfully healed the pending entries for volume %s", + self.volname) diff --git a/tests/functional/afr/test_add_brick_followed_by_remove_brick.py b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py new file mode 100644 index 000000000..a653b792d --- /dev/null +++ b/tests/functional/afr/test_add_brick_followed_by_remove_brick.py @@ -0,0 +1,170 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.dht_test_utils import is_layout_complete +from glustolibs.gluster.glusterfile import (file_exists, + occurences_of_pattern_in_file) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume, shrink_volume +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['replicated'], ['glusterfs']]) +class TestAddBrickFollowedByRemoveBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + cls.first_client = cls.mounts[0].client_system + cls.mountpoint = cls.mounts[0].mountpoint + cls.is_io_running = False + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not file_exists(cls.first_client, cls.script_upload_path): + if not upload_scripts(cls.first_client, cls.script_upload_path): + raise ExecutionError( + "Failed to upload IO scripts to client %s" + % cls.first_client) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if self.is_io_running: + if not wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[0]]): + raise ExecutionError("IO failed on some of the clients") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_layout_of_bricks(self): + """Check the layout of bricks""" + ret = is_layout_complete(self.mnode, self.volname, "/") + self.assertTrue(ret, ("Volume %s: Layout is not complete", + self.volname)) + g.log.info("Volume %s: Layout is complete", self.volname) + + def _add_brick_and_wait_for_rebalance_to_complete(self): + """Add brick and wait for rebalance to complete""" + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + self._check_layout_of_bricks() + + def _remove_brick_from_volume(self): + """Remove bricks from volume""" + # Remove bricks from the volume + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=2000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + def test_add_brick_followed_by_remove_brick(self): + """ + Test case: + 1. Create a volume, start it and mount it to a client. + 2. Start I/O on volume. + 3. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 1x3 should now be 2x3) + 4. Add brick and trigger rebalance, wait for rebalance to complete. + (The volume which was 2x3 should now be 3x3) + 5. Remove brick from volume such that it becomes a 2x3. + 6. Remove brick from volume such that it becomes a 1x3. + 7. Wait for I/O to complete and check for any input/output errors in + both client and rebalance logs. + """ + # Start I/O on mount point + self.all_mounts_procs = [] + cmd = ("/usr/bin/env python {} create_deep_dirs_with_files " + "--dirname-start-num {} --dir-depth 5 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 {}" + .format(self.script_upload_path, 10, self.mountpoint)) + proc = g.run_async(self.first_client, cmd) + self.all_mounts_procs.append(proc) + self.is_io_running = True + + # Convert 1x3 to 2x3 and then convert 2x3 to 3x3 + for _ in range(0, 2): + self._add_brick_and_wait_for_rebalance_to_complete() + + # Convert 3x3 to 2x3 and then convert 2x3 to 1x3 + for _ in range(0, 2): + self._remove_brick_from_volume() + + # Validate I/O processes running on the nodes + ret = validate_io_procs(self.all_mounts_procs, [self.mounts[0]]) + self.is_io_running = False + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO on all mounts: Complete") + + # Check for Input/output errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "Input/output error", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + + # Check for Input/output errors in client logs + ret = occurences_of_pattern_in_file( + self.first_client, "Input/output error", + "/var/log/glusterfs/mnt-{}_{}.log".format(self.volname, + self.mount_type)) + self.assertEqual(ret, 0, + "[Input/output error] present in client log file") + g.log.info("Expanding and shrinking volume successful and no I/O " + "errors see in rebalance and client logs") diff --git a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py index ad6f336a5..1acd11faa 100644 --- a/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py +++ b/tests/functional/afr/test_afr_cli_no_splitbrain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -112,17 +112,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 files from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s create_files -f 5 " - "--base-file-name test_file --fixed-file-size 1k %s" % ( - self.script_upload_path, - mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 1k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a file from mount point") @@ -149,17 +148,16 @@ class TestSelfHeal(GlusterBaseClass): g.log.info("creating 5 new files of same name from mount point") all_mounts_procs = [] - for mount_obj in self.mounts: - cmd = ("/usr/bin/env python %s create_files -f 5 " - "--base-file-name test_file --fixed-file-size 10k %s" % ( - self.script_upload_path, - mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, - user=mount_obj.user) - all_mounts_procs.append(proc) + cmd = ("/usr/bin/env python %s create_files -f 5 " + "--base-file-name test_file --fixed-file-size 10k %s" % ( + self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + # Validate I/O g.log.info("Wait for IO to complete and validate IO.....") - ret = validate_io_procs(all_mounts_procs, self.mounts) + ret = validate_io_procs(all_mounts_procs, [self.mounts[0]]) self.assertTrue(ret, "IO failed on some of the clients") g.log.info("IO is successful on all mounts") g.log.info("Successfully created a new file of same name " @@ -225,10 +223,11 @@ class TestSelfHeal(GlusterBaseClass): fpath = (self.mounts[0].mountpoint + '/test_file' + str(fcount) + '.txt') status = get_fattr(self.mounts[0].client_system, - fpath, 'replica.split-brain-status') + fpath, 'replica.split-brain-status', + encode="text") compare_string = ("The file is not under data or metadata " "split-brain") - self.assertEqual(status.rstrip('\x00'), compare_string, + self.assertEqual(status, compare_string, "file test_file%s is under" " split-brain" % str(fcount)) g.log.info("none of the files are under split-brain") diff --git a/tests/functional/afr/test_afr_dir_entry_creation_with_subvol_down.py b/tests/functional/afr/test_afr_dir_entry_creation_with_subvol_down.py new file mode 100644 index 000000000..9cc249e3c --- /dev/null +++ b/tests/functional/afr/test_afr_dir_entry_creation_with_subvol_down.py @@ -0,0 +1,194 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.dht_test_utils import (create_brickobjectlist, + find_specific_hashed) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols) +from glustolibs.gluster.mount_ops import umount_volume, mount_volume + + +@runs_on([['distributed-arbiter', 'distributed-replicated'], ['glusterfs']]) +class TestAfrDirEntryCreationWithSubvolDown(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Changing the distrubte count to 3 as per the test. + self.volume['voltype']['dist_count'] = 3 + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_file_exists(self, subvol, directory, exists=True): + """ Validates given directory present on brick path of each subvol """ + for each_brick in subvol: + node, brick_path = each_brick.split(":") + path = brick_path + directory + ret = file_exists(node, path) + self.assertEqual(exists, ret, "Unexpected behaviour, existence " + "check of directory {} on brick returned" + " {}".format(directory, each_brick)) + + def _create_file(self, location, file_name): + """ Creates a file with file_name on the specified location""" + source_file = "{}/{}".format(location, file_name) + ret, _, err = g.run(self.mounts[0].client_system, + ("touch %s" % source_file)) + self.assertEqual(ret, 0, ("Failed to create {} on {}: err" + " {}".format(source_file, location, err))) + g.log.info("Successfully created %s on: %s", file_name, location) + + def _create_number_of_files_on_the_subvol(self, subvol_object, directory, + number_of_files, mountpath): + """Creates number of files specified on the given subvol""" + name = None + for _ in range(number_of_files): + hashed = find_specific_hashed(self.subvols, directory, + subvol_object, existing_names=name) + self.assertIsNotNone(hashed, "Couldn't find a subvol to " + "create a file.") + self._create_file(mountpath, hashed.newname) + name = hashed.newname + + def test_afr_dir_entry_creation_with_subvol_down(self): + """ + 1. Create a distributed-replicated(3X3)/distributed-arbiter(3X(2+1)) + and mount it on one client + 2. Kill 3 bricks corresponding to the 1st subvol + 3. Unmount and remount the volume on the same client + 4. Create deep dir from mount point + mkdir -p dir1/subdir1/deepdir1 + 5. Create files under dir1/subdir1/deepdir1; touch <filename> + 6. Now bring all sub-vols up by volume start force + 7. Validate backend bricks for dir creation, the subvol which is + offline will have no dirs created, whereas other subvols will have + dirs created from step 4 + 8. Trigger heal from client by "#find . | xargs stat" + 9. Verify that the directory entries are created on all back-end bricks + 10. Create new dir (dir2) on location dir1/subdir1/deepdir1 + 11. Trigger rebalance and wait for the completion + 12. Check backend bricks for all entries of dirs + 13. Check if files are getting created on the subvol which was offline + """ + # Bring down first subvol of bricks offline + self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + first_subvol = self.subvols[0] + ret = bring_bricks_offline(self.volname, first_subvol) + self.assertTrue(ret, "Unable to bring {} bricks offline". + format(first_subvol)) + + # Check bricks are offline or not + ret = are_bricks_offline(self.mnode, self.volname, first_subvol) + self.assertTrue(ret, "Bricks {} are still online".format(first_subvol)) + + # Unmount and remount the volume + ret, _, _ = umount_volume( + self.mounts[0].client_system, self.mounts[0].mountpoint) + self.assertFalse(ret, "Failed to unmount volume.") + ret, _, _ = mount_volume(self.volname, self.mount_type, + self.mounts[0].mountpoint, self.mnode, + self.mounts[0].client_system) + self.assertFalse(ret, "Failed to remount volume.") + g.log.info('Successfully umounted and remounted volume.') + + # At this step, sleep is must otherwise file creation will fail + sleep(2) + + # Create dir `dir1/subdir1/deepdir1` on mountpont + directory1 = "dir1/subdir1/deepdir1" + path = self.mounts[0].mountpoint + "/" + directory1 + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + # Create files on the 2nd and 3rd subvols which are online + brickobject = create_brickobjectlist(self.subvols, directory1) + self.assertIsNotNone(brickobject, "Failed to get brick object list") + self._create_number_of_files_on_the_subvol( + brickobject[1], directory1, 5, mountpath=path) + self._create_number_of_files_on_the_subvol( + brickobject[2], directory1, 5, mountpath=path) + + # Bring bricks online using volume start force + ret, _, err = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, err) + g.log.info("Volume: %s started successfully", self.volname) + + # Check all bricks are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, "Few process after volume start are offline for " + "volume: {}".format(self.volname)) + + # Validate Directory is not created on the bricks of the subvol which + # is offline + for subvol in self.subvols: + self._check_file_exists(subvol, "/" + directory1, + exists=(subvol != first_subvol)) + + # Trigger heal from the client + cmd = "cd {}; find . | xargs stat".format(self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + + # Validate the directory1 is present on all the bricks + for subvol in self.subvols: + self._check_file_exists(subvol, "/" + directory1, exists=True) + + # Create new dir (dir2) on location dir1/subdir1/deepdir1 + directory2 = "/" + directory1 + '/dir2' + path = self.mounts[0].mountpoint + directory2 + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Directory {} creation failed".format(path)) + + # Trigger rebalance and validate the completion + ret, _, err = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, err) + g.log.info("Rebalance on volume %s started successfully", self.volname) + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, "Rebalance didn't complete on the volume: " + "{}".format(self.volname)) + + # Validate all dirs are present on all bricks in each subvols + for subvol in self.subvols: + for each_dir in ("/" + directory1, directory2): + self._check_file_exists(subvol, each_dir, exists=True) + + # Validate if files are getting created on the subvol which was + # offline + self._create_number_of_files_on_the_subvol( + brickobject[0], directory1, 5, mountpath=path) diff --git a/tests/functional/afr/test_afr_reset_brick_heal_full.py b/tests/functional/afr/test_afr_reset_brick_heal_full.py new file mode 100644 index 000000000..bdc90ee62 --- /dev/null +++ b/tests/functional/afr/test_afr_reset_brick_heal_full.py @@ -0,0 +1,157 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_ops import reset_brick +from glustolibs.gluster.brick_libs import (get_all_bricks, are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import rmdir +from glustolibs.gluster.glusterfile import remove_file +from glustolibs.gluster.heal_ops import trigger_heal_full +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import ( + get_subvols, wait_for_volume_process_to_be_online) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['replicated', 'distributed-replicated'], + ['glusterfs', 'nfs']]) +class TestAfrResetBrickHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ( + "/usr/share/glustolibs/io/scripts/file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients {}". + format(cls.clients)) + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it. + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Wait if any IOs are pending from the test + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + @classmethod + def tearDownClass(cls): + for each_client in cls.clients: + ret = remove_file(each_client, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to delete file {}". + format(cls.script_upload_path)) + + cls.get_super_method(cls, 'tearDownClass')() + + def test_afr_reset_brick_heal_full(self): + """ + 1. Create files/dirs from mount point + 2. With IO in progress execute reset-brick start + 3. Now format the disk from back-end, using rm -rf <brick path> + 4. Execute reset brick commit and check for the brick is online. + 5. Issue volume heal using "gluster vol heal <volname> full" + 6. Check arequal for all bricks to verify all backend bricks + including the resetted brick have same data + """ + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to fetch bricks of volume") + brick_to_reset = choice(all_bricks) + + # Start reset brick + ret, _, err = reset_brick(self.mnode, self.volname, + src_brick=brick_to_reset, option="start") + self.assertEqual(ret, 0, err) + g.log.info("Reset brick: %s started", brick_to_reset) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, [brick_to_reset]) + self.assertTrue(ret, "Brick:{} is still online".format(brick_to_reset)) + + # rm -rf of the brick directory + node, brick_path = brick_to_reset.split(":") + ret = rmdir(node, brick_path, force=True) + self.assertTrue(ret, "Unable to delete the brick {} on " + "node {}".format(brick_path, node)) + + # Reset brick commit + ret, _, err = reset_brick(self.mnode, self.volname, + src_brick=brick_to_reset, option="commit") + self.assertEqual(ret, 0, err) + g.log.info("Reset brick committed successfully") + + # Check the brick is online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Few volume processess are offline for the " + "volume: {}".format(self.volname)) + + # Trigger full heal + ret = trigger_heal_full(self.mnode, self.volname) + self.assertTrue(ret, "Unable to trigger the heal full command") + + # Wait for the heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "Heal didn't complete in 20 mins time") + + # Validate io on the clients + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on the mounts") + self.all_mounts_procs *= 0 + + # Check arequal of the back-end bricks after heal completion + all_subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + for subvol in all_subvols: + ret, arequal_from_subvol = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across the" + " bricks in the subvol {}".format(subvol)) + self.assertEqual(len(set(arequal_from_subvol)), 1, "Arequal is " + "same on all the bricks in the subvol") diff --git a/tests/functional/afr/test_arb_to_repl_conversion_with_io.py b/tests/functional/afr/test_arb_to_repl_conversion_with_io.py new file mode 100644 index 000000000..8e54fa6ee --- /dev/null +++ b/tests/functional/afr/test_arb_to_repl_conversion_with_io.py @@ -0,0 +1,221 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from datetime import datetime, timedelta +from time import sleep, time + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_ops import add_brick, remove_brick +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import get_volume_info, set_volume_options +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +# pylint: disable=too-many-locals,too-many-statements +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestArbiterToReplicatedConversion(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + cls.script_path = '/usr/share/glustolibs/io/scripts/file_dir_ops.py' + ret = upload_scripts(cls.clients, cls.script_path) + if not ret: + raise ExecutionError('Failed to upload IO scripts to clients') + + def setUp(self): + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + def tearDown(self): + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[1]]) + if not ret: + raise ExecutionError('Wait for IO completion failed on client') + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def _get_arbiter_bricks(self): + """ + Returns tuple of arbiter bricks from the volume + """ + + # Get all subvols + subvols = get_subvols(self.mnode, self.volname) + self.assertTrue(subvols, + 'Not able to get subvols of {}'.format(self.volname)) + + # Last brick in every subvol will be the arbiter + return tuple(zip(*subvols.get('volume_subvols')))[-1] + + def test_arb_to_repl_conversion_with_io(self): + """ + Description: To perform a volume conversion from Arbiter to Replicated + with background IOs + + Steps: + - Create, start and mount an arbiter volume in two clients + - Create two dir's, fill IO in first dir and take note of arequal + - Start a continuous IO from second directory + - Convert arbiter to x2 replicated volume (remove brick) + - Convert x2 replicated to x3 replicated volume (add brick) + - Wait for ~5 min for vol file to be updated on all clients + - Enable client side heal options and issue volume heal + - Validate heal completes with no errors and arequal of first dir + matches against initial checksum + """ + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + # Fill IO in first directory + cmd = ('/usr/bin/env python {} ' + 'create_deep_dirs_with_files --dir-depth 10 ' + '--fixed-file-size 1M --num-of-files 100 ' + '--dirname-start-num 1 {}'.format(self.script_path, m_point)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Not able to fill directory with IO') + + # Take `arequal` checksum on first directory + ret, exp_arequal = collect_mounts_arequal(self.mounts[0], + m_point + '/user1') + self.assertTrue(ret, 'Failed to get arequal checksum on mount') + + # Start continuous IO from second directory + client = self.mounts[1].client_system + cmd = ('/usr/bin/env python {} ' + 'create_deep_dirs_with_files --dir-depth 10 ' + '--fixed-file-size 1M --num-of-files 250 ' + '--dirname-start-num 2 {}'.format(self.script_path, m_point)) + proc = g.run_async(client, cmd) + self.all_mounts_procs.append(proc) + + # Wait for IO to fill before volume conversion + sleep(30) + + # Remove arbiter bricks ( arbiter to x2 replicated ) + kwargs = {'replica_count': 2} + ret, _, _ = remove_brick(self.mnode, + self.volname, + self._get_arbiter_bricks(), + option='force', + **kwargs) + self.assertEqual(ret, 0, 'Not able convert arbiter to x2 replicated ' + 'volume') + # Wait for IO to fill after volume conversion + sleep(30) + + # Add bricks (x2 replicated to x3 replicated) + kwargs['replica_count'] = 3 + vol_info = get_volume_info(self.mnode, volname=self.volname) + self.assertIsNotNone(vol_info, 'Not able to get volume info') + dist_count = vol_info[self.volname]['distCount'] + bricks_list = form_bricks_list( + self.mnode, + self.volname, + number_of_bricks=int(dist_count) * 1, + servers=self.servers, + servers_info=self.all_servers_info, + ) + self.assertTrue(bricks_list, 'Not able to get unused list of bricks') + ret, _, _ = add_brick(self.mnode, + self.volname, + bricks_list, + force='True', + **kwargs) + self.assertEqual(ret, 0, 'Not able to add-brick to ' + '{}'.format(self.volname)) + # Wait for IO post x3 replicated volume conversion + sleep(30) + + # Validate volume info + vol_info = get_volume_info(self.mnode, volname=self.volname) + self.assertIsNotNone(vol_info, 'Not able to get volume info') + vol_info = vol_info[self.volname] + repl_count, brick_count = (vol_info['replicaCount'], + vol_info['brickCount']) + + # Wait for the volfile to sync up on clients + cmd = ('grep -ir connected {}/.meta/graphs/active/{}-client-*/private ' + '| wc -l') + wait_time = time() + 300 + in_sync = False + while time() <= wait_time: + ret, rout, _ = g.run(client, cmd.format(m_point, self.volname)) + self.assertEqual(ret, 0, + 'Not able to grep for volfile sync from client') + if int(rout) == int(brick_count): + in_sync = True + break + sleep(30) + self.assertTrue( + in_sync, 'Volfiles from clients are not synced even ' + 'after polling for ~5 min') + + self.assertEqual( + int(repl_count), kwargs['replica_count'], 'Not able ' + 'to validate x2 to x3 replicated volume conversion') + + # Enable client side heal options, trigger and monitor heal + ret = set_volume_options( + self.mnode, self.volname, { + 'data-self-heal': 'on', + 'entry-self-heal': 'on', + 'metadata-self-heal': 'on' + }) + self.assertTrue(ret, 'Unable to set client side heal options') + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger heal on volume') + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, + 'Heal is not completed for {}'.format(self.volname)) + + # Validate IO + prev_time = datetime.now().replace(microsecond=0) + ret = validate_io_procs(self.all_mounts_procs, [self.mounts[1]]) + curr_time = datetime.now().replace(microsecond=0) + self.assertTrue(ret, 'Not able to validate completion of IO on mount') + self.all_mounts_procs *= 0 + + # To ascertain IO was happening during brick operations + self.assertGreater( + curr_time - prev_time, timedelta(seconds=10), 'Unable ' + 'to validate IO was happening during brick operations') + + # Take and validate `arequal` checksum on first directory + ret, act_areequal = collect_mounts_arequal(self.mounts[1], + m_point + '/user1') + self.assertTrue(ret, 'Failed to get arequal checksum from mount') + self.assertEqual( + exp_arequal, act_areequal, '`arequal` checksum did ' + 'not match post arbiter to x3 replicated volume conversion') + + g.log.info('PASS: Arbiter to x3 replicated volume conversion complete') diff --git a/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py b/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py index a30c53148..4a695c241 100644 --- a/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py +++ b/tests/functional/afr/test_brick_process_not_started_on_read_only_node_disks.py @@ -126,10 +126,7 @@ class SelfHealDaemonProcessTests(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) diff --git a/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py b/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py index 0d7241aef..0ead8b2fc 100755 --- a/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py +++ b/tests/functional/afr/test_client_side_quorum_with_fixed_for_cross3.py @@ -23,10 +23,10 @@ from glusto.core import Glusto as g from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on -from glustolibs.gluster.volume_libs import ( - set_volume_options, get_subvols) +from glustolibs.gluster.volume_libs import get_subvols from glustolibs.misc.misc_libs import upload_scripts -from glustolibs.gluster.volume_ops import reset_volume_option +from glustolibs.gluster.volume_ops import (set_volume_options, + reset_volume_option) from glustolibs.gluster.brick_libs import (bring_bricks_offline, bring_bricks_online) from glustolibs.io.utils import (validate_io_procs, diff --git a/tests/functional/afr/test_default_granular_entry_heal.py b/tests/functional/afr/test_default_granular_entry_heal.py new file mode 100644 index 000000000..91ca25907 --- /dev/null +++ b/tests/functional/afr/test_default_granular_entry_heal.py @@ -0,0 +1,235 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import get_volume_options +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'replicated', + 'arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestDefaultGranularEntryHeal(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + for subvol in subvols: + self.bricks_to_bring_offline.append(choice(subvol)) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_arequal_on_bricks_with_a_specific_arequal(self, arequal, + brick_list): + """ + Compare an inital arequal checksum with bricks from a given brick list + """ + init_val = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for brick_arequal in arequals: + brick_total = brick_arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(init_val, brick_total, 'Arequals not matching') + + @staticmethod + def _add_dir_path_to_brick_list(brick_list): + """Add test_self_heal at the end of brick path""" + dir_brick_list = [] + for brick in brick_list: + dir_brick_list.append('{}/{}'.format(brick, 'mydir')) + return dir_brick_list + + def _check_arequal_checksum_for_the_volume(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + if self.volume_type == "replicated": + # Check arequals for "replicated" + brick_list = get_all_bricks(self.mnode, self.volname) + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + + # Get arequal before getting bricks offline + work_dir = '{}/mydir'.format(self.mountpoint) + ret, arequals = collect_mounts_arequal([self.mounts[0]], + path=work_dir) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Get arequal on bricks and compare with mount_point_total + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # Get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + brick_list = subvols_dict['volume_subvols'][i] + dir_brick_list = self._add_dir_path_to_brick_list(brick_list) + ret, arequals = collect_bricks_arequal([dir_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first brick') + + # Get arequal for every brick and compare with first brick + self._check_arequal_on_bricks_with_a_specific_arequal( + arequals, dir_brick_list) + + def test_default_granular_entry_heal(self): + """ + Test case: + 1. Create a cluster. + 2. Create volume start it and mount it. + 3. Check if cluster.granular-entry-heal is ON by default or not. + 4. Check /var/lib/glusterd/<volname>/info for + cluster.granular-entry-heal=on. + 5. Check if option granular-entry-heal is present in the + volume graph or not. + 6. Kill one or two bricks of the volume depending on volume type. + 7. Create all types of files on the volume like text files, hidden + files, link files, dirs, char device, block device and so on. + 8. Bring back the killed brick by restarting the volume. + 9. Wait for heal to complete. + 10. Check arequal-checksum of all the bricks and see if it's proper or + not. + """ + # Check if cluster.granular-entry-heal is ON by default or not + ret = get_volume_options(self.mnode, self.volname, + 'granular-entry-heal') + self.assertEqual(ret['cluster.granular-entry-heal'], 'on', + "Value of cluster.granular-entry-heal not on " + "by default") + + # Check var/lib/glusterd/<volname>/info for + # cluster.granular-entry-heal=on + ret = occurences_of_pattern_in_file(self.mnode, + 'cluster.granular-entry-heal=on', + '/var/lib/glusterd/vols/{}/info' + .format(self.volname)) + self.assertEqual(ret, 1, "Failed get cluster.granular-entry-heal=on in" + " info file") + + # Check if option granular-entry-heal is present in the + # volume graph or not + ret = occurences_of_pattern_in_file(self.first_client, + 'option granular-entry-heal on', + "/var/log/glusterfs/mnt-{}_{}.log" + .format(self.volname, + self.mount_type)) + self.assertTrue(ret > 0, + "Failed to find granular-entry-heal in volume graph") + g.log.info("granular-entry-heal properly set to ON by default") + + # Kill one or two bricks of the volume depending on volume type + self._bring_bricks_offline() + + # Create all types of files on the volume like text files, hidden + # files, link files, dirs, char device, block device and so on + cmd = ("cd {};mkdir mydir;cd mydir;mkdir dir;mkdir .hiddendir;" + "touch file;touch .hiddenfile;mknod blockfile b 1 5;" + "mknod charfile b 1 5; mkfifo pipefile;touch fileforhardlink;" + "touch fileforsoftlink;ln fileforhardlink hardlinkfile;" + "ln -s fileforsoftlink softlinkfile".format(self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files of all types") + + # Bring back the killed brick by restarting the volume Bricks should + # be online again + self._restart_volume_and_bring_all_offline_bricks_online() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check arequal-checksum of all the bricks and see if it's proper or + # not + self._check_arequal_checksum_for_the_volume() diff --git a/tests/functional/afr/test_dir_gfid_heal_on_all_subvols.py b/tests/functional/afr/test_dir_gfid_heal_on_all_subvols.py index b457442cf..dcce8b418 100644 --- a/tests/functional/afr/test_dir_gfid_heal_on_all_subvols.py +++ b/tests/functional/afr/test_dir_gfid_heal_on_all_subvols.py @@ -52,22 +52,26 @@ class AssignGfidsOnAllSubvols(GlusterBaseClass): g.log.info("Successfully uploaded IO scripts to clients %s", cls.clients) + def setUp(self): + + # Calling GlusterBaseClass setUpClass + self.get_super_method(self, 'setUp')() + # Setup Volume and Mount Volume - ret = cls.setup_volume_and_mount_volume(cls.mounts) + ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): # Cleanup Volume - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(self.mounts) if not ret: raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + g.log.info("Successful in cleaning up Volume %s", self.volname) - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() def verify_gfid_and_retun_gfid(self, dirname): dir_gfids = dict() diff --git a/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py b/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py index d49a95fec..2f2bdae88 100644 --- a/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py +++ b/tests/functional/afr/test_gfid_assignment_on_dist_rep_vol.py @@ -55,24 +55,26 @@ class AssignGfidsOnAllSubvols(GlusterBaseClass): g.log.info("Successfully uploaded IO scripts to clients %s", cls.clients) + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = cls.setup_volume_and_mount_volume(cls.mounts) + ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(self.mounts) if not ret: raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + g.log.info("Successful in cleaning up Volume %s", self.volname) - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() def verify_gfid(self, dirname): dir_gfids = dict() diff --git a/tests/functional/afr/test_gfid_assignment_on_lookup.py b/tests/functional/afr/test_gfid_assignment_on_lookup.py index c84a9a19c..edd154fc9 100644 --- a/tests/functional/afr/test_gfid_assignment_on_lookup.py +++ b/tests/functional/afr/test_gfid_assignment_on_lookup.py @@ -63,24 +63,24 @@ class AssignGfidOnLookup(GlusterBaseClass): g.log.info("Successfully uploaded IO scripts to clients %s", cls.clients) + def setUp(self): + + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = cls.setup_volume_and_mount_volume(cls.mounts) + ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(self.mounts) if not ret: raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + g.log.info("Successful in cleaning up Volume %s", self.volname) - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() def verify_gfid(self, dirname): dir_gfids = dict() diff --git a/tests/functional/afr/test_gfid_split_brain_resolution.py b/tests/functional/afr/test_gfid_split_brain_resolution.py index 8d8317a01..6e74376fc 100644 --- a/tests/functional/afr/test_gfid_split_brain_resolution.py +++ b/tests/functional/afr/test_gfid_split_brain_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,260 +14,232 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from random import choice + from glusto.core import Glusto as g -from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) -from glustolibs.gluster.exceptions import ExecutionError -from glustolibs.gluster.volume_libs import get_subvols + from glustolibs.gluster.brick_libs import (bring_bricks_offline, - bring_bricks_online, - are_bricks_offline, - wait_for_bricks_to_be_online, - get_all_bricks) -from glustolibs.gluster.volume_ops import set_volume_options + bring_bricks_online) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.glusterdir import mkdir -from glustolibs.gluster.heal_ops import (enable_self_heal_daemon, - trigger_heal) from glustolibs.gluster.heal_libs import ( - is_volume_in_split_brain, - is_heal_complete, - wait_for_self_heal_daemons_to_be_online, - monitor_heal_completion) -from glustolibs.gluster.glusterfile import GlusterFile + is_volume_in_split_brain, monitor_heal_completion, + wait_for_self_heal_daemons_to_be_online) +from glustolibs.gluster.heal_ops import (enable_self_heal_daemon, trigger_heal, + trigger_heal_full) +from glustolibs.gluster.lib_utils import collect_bricks_arequal, list_files +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import set_volume_options -@runs_on([['replicated', 'distributed-replicated'], - ['glusterfs']]) +# pylint: disable=stop-iteration-return, too-many-locals, too-many-statements +@runs_on([[ + 'replicated', 'distributed-replicated', 'arbiter', 'distributed-arbiter' +], ['glusterfs']]) class TestSelfHeal(GlusterBaseClass): - """ - Description: - Test cases related to - healing in default configuration of the volume - """ - - @classmethod - def setUpClass(cls): - # Calling GlusterBaseClass setUpClass - cls.get_super_method(cls, 'setUpClass')() - - # Override replica count to be 3 - if cls.volume_type == "replicated": - cls.volume['voltype'] = { - 'type': 'replicated', - 'replica_count': 3, - 'transport': 'tcp'} - - if cls.volume_type == "distributed-replicated": - cls.volume['voltype'] = { - 'type': 'distributed-replicated', - 'dist_count': 2, - 'replica_count': 3, - 'transport': 'tcp'} - def setUp(self): - # Calling GlusterBaseClass setUp self.get_super_method(self, 'setUp')() - # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = self.setup_volume_and_mount_volume(mounts=self.mounts, - volume_create_force=False) - if not ret: - raise ExecutionError("Failed to Setup_Volume and Mount_Volume") - g.log.info("Successful in Setup Volume and Mount Volume") + # A single mount is enough for the test + self.mounts = self.mounts[0::-1] - self.bricks_list = get_all_bricks(self.mnode, self.volname) + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) def tearDown(self): - """ - If test method failed before validating IO, tearDown waits for the - IO's to complete and checks for the IO exit status - - Cleanup and umount volume - """ - # Cleanup and umount volume - g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) - if not ret: - raise ExecutionError("Failed to umount the vol & cleanup Volume") - g.log.info("Successful in umounting the volume and Cleanup") - - # Calling GlusterBaseClass teardown + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) self.get_super_method(self, 'tearDown')() - def toggle_bricks_and_perform_io(self, file_list, brick_list): + @staticmethod + def _get_two_bricks(subvols, arbiter): + """ + Yields two bricks from each subvol for dist/pure X arb/repl volumes + """ + # Get an iterator for py2/3 compatibility + brick_iter = iter(zip(*subvols)) + prev_brick = next(brick_iter) + first_brick = prev_brick + + for index, curr_brick in enumerate(brick_iter, 1): + # `yield` should contain arbiter brick for arbiter type vols + if not (index == 1 and arbiter): + yield prev_brick + curr_brick + prev_brick = curr_brick + # At the end yield first and last brick from a subvol + yield prev_brick + first_brick + + def _get_files_in_brick(self, brick_path, dir_path): """ - Kills bricks, does I/O and brings the brick back up. + Returns files in format of `dir_path/file_name` from the given brick + path """ - # Bring down bricks. - g.log.info("Going to bring down the brick process for %s", brick_list) - ret = bring_bricks_offline(self.volname, brick_list) - self.assertTrue(ret, ("Failed to bring down the bricks. Please " - "check the log file for more details.")) - g.log.info("Brought down the brick process " - "for %s successfully", brick_list) - ret = are_bricks_offline(self.mnode, self.volname, brick_list) - self.assertTrue(ret, 'Bricks %s are not offline' % brick_list) - - # Perform I/O - for filename in file_list: - fpath = self.mounts[0].mountpoint + "/test_gfid_split_brain/" + \ - filename - cmd = ("dd if=/dev/urandom of=%s bs=1024 count=1" % fpath) - ret, _, _ = g.run(self.clients[0], cmd) - self.assertEqual(ret, 0, "Creating %s failed" % fpath) - - # Bring up bricks - ret = bring_bricks_online(self.mnode, self.volname, brick_list) - self.assertTrue(ret, 'Failed to bring brick %s online' % brick_list) - g.log.info('Bringing brick %s online is successful', brick_list) - - # Waiting for bricks to come online - g.log.info("Waiting for brick process to come online") - timeout = 30 - ret = wait_for_bricks_to_be_online(self.mnode, self.volname, timeout) - self.assertTrue(ret, "bricks didn't come online after adding bricks") - g.log.info("Bricks are online") - - def resolve_gfid_split_brain(self, filename, source_brick): + node, path = brick_path.split(':') + files = list_files(node, path, dir_path) + self.assertIsNotNone( + files, 'Unable to get list of files from {}'.format(brick_path)) + + files = [file_name.rsplit('/', 1)[-1] for file_name in files] + return [ + each_file for each_file in files + if each_file in ('file1', 'file2', 'file3') + ] + + def _run_cmd_and_assert(self, cmd): """ - resolves gfid split-brain on files using source-brick option + Run `cmd` on `mnode` and assert for success """ - node, _ = source_brick.split(':') - command = ("gluster volume heal " + self.volname + " split-brain " - "source-brick " + source_brick + " " + filename) - ret, _, _ = g.run(node, command) - self.assertEqual(ret, 0, "command execution not successful") + ret, _, err = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, '`{}` failed with {}'.format(cmd, err)) def test_gfid_split_brain_resolution(self): """ - - create gfid split-brain of files and resolves them using source-brick - option of the CLI. + Description: Simulates gfid split brain on multiple files in a dir and + resolve them via `bigger-file`, `mtime` and `source-brick` methods + + Steps: + - Create and mount a replicated volume, create a dir and ~10 data files + - Simulate gfid splits in 9 of the files + - Resolve each 3 set of files using `bigger-file`, `mtime` and + `source-bricks` split-brain resoultion methods + - Trigger and monitor for heal completion + - Validate all the files are healed and arequal matches for bricks in + subvols """ - - # pylint: disable=too-many-statements - # pylint: disable=too-many-locals - - # Disable all self-heals and client-quorum - options = {"self-heal-daemon": "off", - "data-self-heal": "off", - "metadata-self-heal": "off", - "entry-self-heal": "off", - "cluster.quorum-type": "none"} - g.log.info("setting volume options %s", options) - ret = set_volume_options(self.mnode, self.volname, options) - self.assertTrue(ret, ("Unable to set volume option %s for " - "volume %s" % (options, self.volname))) - g.log.info("Successfully set %s for volume %s", options, self.volname) - - # Create dir inside which I/O will be performed. - ret = mkdir(self.mounts[0].client_system, "%s/test_gfid_split_brain" - % self.mounts[0].mountpoint) - self.assertTrue(ret, "mkdir failed") - - # get the subvolumes - g.log.info("Starting to get sub-volumes for volume %s", self.volname) - subvols_dict = get_subvols(self.mnode, self.volname) - num_subvols = len(subvols_dict['volume_subvols']) - g.log.info("Number of subvolumes in volume %s:", num_subvols) - - # Toggle bricks and perform I/O - file_list = ["file1.txt", "file2.txt", "file3.txt", "file4.txt", - "file5.txt", "file6.txt", "file7.txt", "file8.txt", - "file9.txt", "file10.txt"] - brick_index = 0 - offline_bricks = [] - for _ in range(0, 3): - for i in range(0, num_subvols): - subvol_brick_list = subvols_dict['volume_subvols'][i] - offline_bricks.append(subvol_brick_list[brick_index % 3]) - offline_bricks.append(subvol_brick_list[(brick_index+1) % 3]) - self.toggle_bricks_and_perform_io(file_list, offline_bricks) - brick_index += 1 - offline_bricks[:] = [] - - # Enable shd - g.log.info("enabling the self heal daemon") + io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + arbiter = self.volume_type.find('arbiter') >= 0 + + # Disable self-heal daemon and set `quorum-type` option to `none` + ret = set_volume_options(self.mnode, self.volname, { + 'self-heal-daemon': 'off', + 'cluster.quorum-type': 'none' + }) + self.assertTrue( + ret, 'Not able to disable `quorum-type` and ' + '`self-heal` daemon volume options') + + # Create required dir and files from the mount + split_dir = 'gfid_split_dir' + file_io = ('cd %s; for i in {1..10}; do ' + io_cmd + + ' 1M > %s/file$i; done;') + ret = mkdir(client, '{}/{}'.format(m_point, split_dir)) + self.assertTrue(ret, 'Unable to create a directory from mount point') + ret, _, _ = g.run(client, file_io % (m_point, split_dir)) + + # `file{4,5,6}` are re-created every time to be used in `bigger-file` + # resolution method + cmd = 'rm -rf {0}/file{1} && {2} {3}M > {0}/file{1}' + split_cmds = { + 1: + ';'.join(cmd.format(split_dir, i, io_cmd, 2) for i in range(1, 7)), + 2: + ';'.join(cmd.format(split_dir, i, io_cmd, 3) for i in range(4, 7)), + 3: ';'.join( + cmd.format(split_dir, i, io_cmd, 1) for i in range(4, 10)), + 4: ';'.join( + cmd.format(split_dir, i, io_cmd, 1) for i in range(7, 10)), + } + + # Get subvols and simulate entry split brain + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + self.assertTrue(subvols, 'Not able to get list of subvols') + msg = ('Unable to bring files under {} dir to entry split brain while ' + '{} are down') + for index, bricks in enumerate(self._get_two_bricks(subvols, arbiter), + 1): + # Bring down two bricks from each subvol + ret = bring_bricks_offline(self.volname, list(bricks)) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + ret, _, _ = g.run(client, + 'cd {}; {}'.format(m_point, split_cmds[index])) + self.assertEqual(ret, 0, msg.format(split_dir, bricks)) + + # Bricks will be brought down only two times in case of arbiter and + # bringing remaining files into split brain for `latest-mtime` heal + if arbiter and index == 2: + ret, _, _ = g.run(client, + 'cd {}; {}'.format(m_point, split_cmds[4])) + self.assertEqual(ret, 0, msg.format(split_dir, bricks)) + + # Bring offline bricks online + ret = bring_bricks_online( + self.mnode, + self.volname, + bricks, + bring_bricks_online_methods='volume_start_force') + self.assertTrue(ret, 'Unable to bring {} online'.format(bricks)) + + # Enable self-heal daemon, trigger heal and assert volume is in split + # brain condition ret = enable_self_heal_daemon(self.mnode, self.volname) - self.assertTrue(ret, "failed to enable self heal daemon") - g.log.info("Successfully enabled the self heal daemon") - - # Wait for self heal processes to come online - g.log.info("Wait for selfheal process to come online") - timeout = 300 - ret = wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname, - timeout) - self.assertTrue(ret, "Self-heal process are not online") - g.log.info("All self heal process are online") - - # Trigger heal + self.assertTrue(ret, 'Failed to enable self heal daemon') + + ret = wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, 'Not all self heal daemons are online') + ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Starting heal failed') - g.log.info('Index heal launched') + self.assertTrue(ret, 'Unable to trigger index heal on the volume') - # checking if file is in split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) - self.assertTrue(ret, "Files are not in split-brain as expected.") - g.log.info("Files are still in split-brain") - - # First brick of each replica will be used as source-brick - first_brick_list = [] - for i in range(0, num_subvols): - subvol_brick_list = subvols_dict['volume_subvols'][i] - brick = subvol_brick_list[0] - first_brick_list.append(brick) - - # Find which dht subvols the 10 files are present in and trigger heal - for filename in file_list: - fpath = self.mounts[0].mountpoint + "/test_gfid_split_brain/" + \ - filename - gfile = GlusterFile(self.clients[0], fpath) - for brick in first_brick_list: - _, brick_path = brick.split(':') - match = [brick for item in gfile.hashed_bricks if brick_path - in item] - if match: - self.resolve_gfid_split_brain("/test_gfid_split_brain/" + - filename, brick) - - # Trigger heal to complete pending data/metadata heals - ret = trigger_heal(self.mnode, self.volname) - self.assertTrue(ret, 'Starting heal failed') - g.log.info('Index heal launched') + self.assertTrue(ret, 'Volume should be in split brain condition') + + # Select source brick and take note of files in source brick + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + source_bricks = [choice(subvol[0:stop]) for subvol in subvols] + files = [ + self._get_files_in_brick(path, split_dir) for path in source_bricks + ] + + # Resolve `file1, file2, file3` gfid split files using `source-brick` + cmd = ('gluster volume heal ' + self.volname + ' split-brain ' + 'source-brick {} /' + split_dir + '/{}') + for index, source_brick in enumerate(source_bricks): + for each_file in files[index]: + run_cmd = cmd.format(source_brick, each_file) + self._run_cmd_and_assert(run_cmd) + + # Resolve `file4, file5, file6` gfid split files using `bigger-file` + cmd = ('gluster volume heal ' + self.volname + + ' split-brain bigger-file /' + split_dir + '/{}') + for each_file in ('file4', 'file5', 'file6'): + run_cmd = cmd.format(each_file) + self._run_cmd_and_assert(run_cmd) + + # Resolve `file7, file8, file9` gfid split files using `latest-mtime` + cmd = ('gluster volume heal ' + self.volname + + ' split-brain latest-mtime /' + split_dir + '/{}') + for each_file in ('file7', 'file8', 'file9'): + run_cmd = cmd.format(each_file) + self._run_cmd_and_assert(run_cmd) + + # Unless `shd` is triggered manually/automatically files will still + # appear in `heal info` + ret = trigger_heal_full(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger full self heal') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) - self.assertTrue(ret, 'Heal has not yet completed') - - # Check if heal is completed - ret = is_heal_complete(self.mnode, self.volname) - self.assertTrue(ret, 'Heal is not complete') - g.log.info('Heal is completed successfully') - - # Get arequals and compare - for i in range(0, num_subvols): - # Get arequal for first brick - subvol_brick_list = subvols_dict['volume_subvols'][i] - node, brick_path = subvol_brick_list[0].split(':') - command = ('arequal-checksum -p %s ' - '-i .glusterfs -i .landfill -i .trashcan' - % brick_path) - ret, arequal, _ = g.run(node, command) - first_brick_total = arequal.splitlines()[-1].split(':')[-1] - - # Get arequal for every brick and compare with first brick - for brick in subvol_brick_list[1:]: - node, brick_path = brick.split(':') - command = ('arequal-checksum -p %s ' - '-i .glusterfs -i .landfill -i .trashcan' - % brick_path) - ret, brick_arequal, _ = g.run(node, command) - self.assertFalse(ret, - 'Failed to get arequal on brick %s' - % brick) - g.log.info('Getting arequal for %s is successful', brick) - brick_total = brick_arequal.splitlines()[-1].split(':')[-1] - - self.assertEqual(first_brick_total, brick_total, - 'Arequals for subvol and %s are not equal' - % brick) - g.log.info('Arequals for subvol and %s are equal', brick) + self.assertTrue( + ret, 'All files in volume should be healed after healing files via' + ' `source-brick`, `bigger-file`, `latest-mtime` methods manually') + + # Validate normal file `file10` and healed files don't differ in + # subvols via an `arequal` + for subvol in subvols: + # Disregard last brick if volume is of arbiter type + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue( + ret, 'Unable to get `arequal` checksum on ' + '{}'.format(subvol[0:stop])) + self.assertEqual( + len(set(arequal)), 1, 'Mismatch of `arequal` ' + 'checksum among {} is identified'.format(subvol[0:stop])) + + g.log.info('Pass: Resolution of gfid split-brain via `source-brick`, ' + '`bigger-file` and `latest-mtime` methods is complete') diff --git a/tests/functional/afr/test_git_clone.py b/tests/functional/afr/test_git_clone.py new file mode 100644 index 000000000..02871cb8b --- /dev/null +++ b/tests/functional/afr/test_git_clone.py @@ -0,0 +1,80 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import git_clone_and_compile +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['replicated', 'distributed-replicated', 'dispersed', + 'distributed-dispersed', 'arbiter', 'distributed-arbiter'], + ['glusterfs']]) +class TestGitCloneOnGlusterVolume(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on one client + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + self.get_super_method(self, 'tearDown')() + + # Unmount from the one client and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + g.log.info("Unmount and volume cleanup is successful") + + def _run_git_clone(self, options): + """Run git clone on the client""" + + repo = 'https://github.com/gluster/glusterfs.git' + cloned_repo_dir = (self.mounts[0].mountpoint + '/' + + repo.split('/')[-1].rstrip('.git')) + if options: + cloned_repo_dir = (self.mounts[0].mountpoint + '/' + "perf-" + + repo.split('/')[-1].rstrip('.git')) + ret = git_clone_and_compile(self.mounts[0].client_system, + repo, cloned_repo_dir, False) + self.assertTrue(ret, "Unable to clone {} repo on {}". + format(repo, cloned_repo_dir)) + g.log.info("Repo %s cloned successfully ", repo) + + def test_git_clone_on_gluster_volume(self): + """ + Test Steps: + 1. Create a volume and mount it on one client + 2. git clone the glusterfs repo on the glusterfs volume. + 3. Set the performance options to off + 4. Repeat step 2 on a different directory. + """ + self._run_git_clone(False) + + # Disable the performance cache options on the volume + self.options = {'performance.quick-read': 'off', + 'performance.stat-prefetch': 'off', + 'performance.open-behind': 'off', + 'performance.write-behind': 'off', + 'performance.client-io-threads': 'off'} + ret = set_volume_options(self.mnode, self.volname, self.options) + self.assertTrue(ret, "Unable to set the volume options") + g.log.info("Volume options set successfully") + + self._run_git_clone(True) diff --git a/tests/functional/afr/test_heal_info_should_have_fixed_fields.py b/tests/functional/afr/test_heal_info_should_have_fixed_fields.py index 92fc8868e..11a39f794 100644 --- a/tests/functional/afr/test_heal_info_should_have_fixed_fields.py +++ b/tests/functional/afr/test_heal_info_should_have_fixed_fields.py @@ -122,10 +122,7 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) diff --git a/tests/functional/afr/test_heal_split_brain_command.py b/tests/functional/afr/test_heal_split_brain_command.py new file mode 100644 index 000000000..c924e8910 --- /dev/null +++ b/tests/functional/afr/test_heal_split_brain_command.py @@ -0,0 +1,264 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, get_all_bricks) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_fattr +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.heal_ops import heal_info, heal_info_split_brain +from glustolibs.gluster.volume_ops import set_volume_options + + +# pylint: disable=too-many-locals, too-many-statements +@runs_on([['arbiter', 'replicated'], ['glusterfs']]) +class TestSplitBrain(GlusterBaseClass): + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = [self.mounts[0]] + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + def tearDown(self): + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def _run_cmd_and_validate(self, client, cmd, paths): + """ + Run `cmd` from `paths` on `client` + """ + for path in paths: + ret, _, _ = g.run(client, cmd % path) + self.assertEqual( + ret, 0, 'Unable to perform `{}` from `{}` on `{}`'.format( + cmd, path, client)) + + @staticmethod + def _transform_gfids(gfids): + """ + Returns list of `gfids` joined by `-` at required places + + Example of one elemnt: + Input: 0xd4653ea0289548eb81b35c91ffb73eff + Returns: d4653ea0-2895-48eb-81b3-5c91ffb73eff + """ + split_pos = [10, 14, 18, 22] + rout = [] + for gfid in gfids: + rout.append('-'.join( + gfid[start:stop] + for start, stop in zip([2] + split_pos, split_pos + [None]))) + return rout + + def test_split_brain_from_heal_command(self): + """ + Description: Simulate and validate data, metadata and entry split brain + + Steps: + - Create and mount a replicated volume and disable quorum, self-heal + deamon + - Create ~10 files from the mount point and simulate data, metadata + split-brain for 2 files each + - Create a dir with some files and simulate entry/gfid split brain + - Validate volume successfully recognizing split-brain + - Validate a lookup on split-brain files fails with EIO error on mount + - Validate `heal info` and `heal info split-brain` command shows only + the files that are in split-brain + - Validate new files and dir's can be created from the mount + """ + io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + arbiter = self.volume_type.find('arbiter') >= 0 + + # Disable self-heal daemon and set `quorum-type` option to `none` + ret = set_volume_options(self.mnode, self.volname, { + 'self-heal-daemon': 'off', + 'cluster.quorum-type': 'none' + }) + self.assertTrue( + ret, 'Not able to disable `quorum-type` and ' + '`self-heal` daemon volume options') + + # Create required dir's from the mount + fqpath = '{}/dir'.format(m_point) + file_io = ('cd %s; for i in {1..6}; do ' + io_cmd + + ' 2M > file$i; done;') + file_cmd = 'cd %s; touch file{7..10}' + ret = mkdir(client, fqpath) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + # Create empty files and data files + for cmd in (file_io, file_cmd): + self._run_cmd_and_validate(client, cmd, [m_point, fqpath]) + + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone( + all_bricks, 'Unable to get list of bricks ' + 'associated with the volume') + + # Data will be appended to the files `file1, file2` resulting in data + # split brain + data_split_cmd = ';'.join(io_cmd + '2M >> ' + each_file + for each_file in ('file1', 'file2')) + + # File permissions will be changed for `file4, file5` to result in + # metadata split brain + meta_split_cmd = ';'.join('chmod 0555 ' + each_file + for each_file in ('file4', 'file5')) + + # Files will be deleted and created with data to result in data, + # metadata split brain on files and entry(gfid) split brain on dir + entry_split_cmd = ';'.join('rm -f ' + each_file + ' && ' + io_cmd + + ' 2M > ' + each_file + for each_file in ('dir/file1', 'dir/file2')) + + # Need to always select arbiter(3rd) brick if volume is arbiter type or + # any two bricks for replicated volume + for bricks in zip(all_bricks, all_bricks[1:] + [all_bricks[0]]): + + # Skip iteration if volume type is arbiter and `bricks` doesn't + # contain arbiter brick + if arbiter and (all_bricks[-1] not in bricks): + continue + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, list(bricks)) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + # Run cmd to bring files into split brain + for cmd, msg in ((data_split_cmd, 'data'), + (meta_split_cmd, 'meta'), (entry_split_cmd, + 'entry')): + ret, _, _ = g.run(client, 'cd {}; {}'.format(m_point, cmd)) + self.assertEqual( + ret, 0, 'Unable to run cmd for bringing files ' + 'into {} split brain'.format(msg)) + + # Bring offline bricks online + ret = bring_bricks_online( + self.mnode, + self.volname, + bricks, + bring_bricks_online_methods='volume_start_force') + self.assertTrue(ret, 'Unable to bring {} online'.format(bricks)) + + # Validate volume is in split-brain + self.assertTrue(is_volume_in_split_brain(self.mnode, self.volname), + 'Volume should be in split-brain') + + # Validate `head` lookup on split brain files fails with EIO + for each_file in ('file1', 'file2', 'file4', 'file5', 'dir/file1', + 'dir/file2'): + ret, _, err = g.run(client, + 'cd {}; head {}'.format(m_point, each_file)) + self.assertNotEqual( + ret, 0, 'Lookup on split-brain file {} should ' + 'fail'.format(each_file)) + self.assertIn( + 'Input/output error', err, + 'File {} should result in EIO error'.format(each_file)) + + # Validate presence of split-brain files and absence of other files in + # `heal info` and `heal info split-brain` commands + ret, info, _ = heal_info(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Unable to query for `heal info`') + ret, info_spb, _ = heal_info_split_brain(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Unable to query for `heal info split-brain`') + + # Collect `gfid's` of files in data and metadata split-brain + common_gfids = [] + host, path = all_bricks[0].split(':') + for each_file in ('file1', 'file2', 'file4', 'file5', 'dir'): + fattr = get_fattr(host, path + '/{}'.format(each_file), + 'trusted.gfid') + self.assertIsNotNone( + fattr, 'Unable to get `gfid` for {}'.format(each_file)) + common_gfids.append(fattr) + + # GFID for files under an entry split brain dir differs from it's peers + uniq_gfids = [] + for brick in all_bricks[:-1] if arbiter else all_bricks: + host, path = brick.split(':') + for each_file in ('dir/file1', 'dir/file2'): + fattr = get_fattr(host, path + '/{}'.format(each_file), + 'trusted.gfid') + self.assertIsNotNone( + fattr, 'Unable to get `gfid` for {}'.format(each_file)) + uniq_gfids.append(fattr) + + # Transform GFIDs to match against o/p of `heal info` and `split-brain` + common_gfids[:] = self._transform_gfids(common_gfids) + uniq_gfids[:] = self._transform_gfids(uniq_gfids) + + # Just enough validation by counting occurences asserting success + common_files = ['/file1 -', '/file2 -', '/file4', '/file5', '/dir '] + uniq_files = ['/dir/file1', '/dir/file2'] + + # Common files should occur 3 times each in `heal info` and + # `heal info split-brain` or 2 times for arbiter + occur = 2 if arbiter else 3 + for each_file, gfid in zip(common_files, common_gfids): + + # Check against `heal info` cmd + self.assertEqual( + info.count(gfid) + info.count(each_file), occur, + 'File {} with gfid {} should exist in `heal info` ' + 'command'.format(each_file[:6], gfid)) + + # Check against `heal info split-brain` cmd + self.assertEqual( + info_spb.count(gfid) + info_spb.count(each_file[:6].rstrip()), + occur, 'File {} with gfid {} should exist in `heal info ' + 'split-brain` command'.format(each_file[:6], gfid)) + + # Entry split files will be listed only in `heal info` cmd + for index, each_file in enumerate(uniq_files): + + # Collect file and it's associated gfid's + entries = (uniq_files + uniq_gfids)[index::2] + count = sum(info.count(entry) for entry in entries) + self.assertEqual( + count, occur, 'Not able to find existence of ' + 'entry split brain file {} in `heal info`'.format(each_file)) + + # Assert no other file is counted as in split-brain + for cmd, rout, exp_str in (('heal info', info, 'entries: 7'), + ('heal info split-brain', info_spb, + 'split-brain: 5')): + self.assertEqual( + rout.count(exp_str), occur, 'Each node should ' + 'list only {} entries in {} command'.format(exp_str[-1], cmd)) + + # Validate new files and dir can be created from mount + fqpath = '{}/temp'.format(m_point) + ret = mkdir(client, fqpath) + self.assertTrue( + ret, 'Unable to create a dir from mount post split-brain of files') + for cmd in (file_io, file_cmd): + self._run_cmd_and_validate(client, cmd, [fqpath]) + + g.log.info('Pass: Validated data, metadata and entry split brain') diff --git a/tests/functional/afr/test_healed_and_heal_failed_command.py b/tests/functional/afr/test_healed_and_heal_failed_command.py new file mode 100644 index 000000000..c02ed6514 --- /dev/null +++ b/tests/functional/afr/test_healed_and_heal_failed_command.py @@ -0,0 +1,104 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_ops import heal_info_heal_failed, heal_info_healed +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['replicated'], ['glusterfs', 'nfs']]) +class TestHealedAndHealFailedCommand(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + cls.script_path = '/usr/share/glustolibs/io/scripts/file_dir_ops.py' + if not upload_scripts(cls.clients, cls.script_path): + raise ExecutionError('Failed to upload IO scripts to client') + + def setUp(self): + self.get_super_method(self, 'setUp')() + self.mounts = [self.mounts[0]] + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + def tearDown(self): + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def test_healed_and_heal_failed_command(self): + """ + Description: Validate absence of `healed` and `heal-failed` options + + Steps: + - Create and mount a replicated volume + - Kill one of the bricks and write IO from mount point + - Verify `gluster volume heal <volname> info healed` and `gluster + volume heal <volname> info heal-failed` command results in error + - Validate `gluster volume help` doesn't list `healed` and + `heal-failed` commands + """ + + client, m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + # Kill one of the bricks in the volume + brick_list = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(brick_list, 'Unable to get online bricks list') + ret = bring_bricks_offline(self.volname, choice(brick_list)) + self.assertTrue(ret, 'Unable to kill one of the bricks in the volume') + + # Fill IO in the mount point + cmd = ('/usr/bin/env python {} ' + 'create_deep_dirs_with_files --dir-depth 10 ' + '--fixed-file-size 1M --num-of-files 50 ' + '--dirname-start-num 1 {}'.format(self.script_path, m_point)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, 'Not able to fill directory with IO') + + # Verify `gluster volume heal <volname> info healed` results in error + cmd = 'gluster volume heal <volname> info' + ret, _, err = heal_info_healed(self.mnode, self.volname) + self.assertNotEqual(ret, 0, '`%s healed` should result in error' % cmd) + self.assertIn('Usage', err, '`%s healed` should list `Usage`' % cmd) + + # Verify `gluster volume heal <volname> info heal-failed` errors out + ret, _, err = heal_info_heal_failed(self.mnode, self.volname) + self.assertNotEqual(ret, 0, + '`%s heal-failed` should result in error' % cmd) + self.assertIn('Usage', err, + '`%s heal-failed` should list `Usage`' % cmd) + + # Verify absence of `healed` nd `heal-failed` commands in `volume help` + cmd = 'gluster volume help | grep -i heal' + ret, rout, _ = g.run(self.mnode, cmd) + self.assertEqual( + ret, 0, 'Unable to query help content from `gluster volume help`') + self.assertNotIn( + 'healed', rout, '`healed` string should not exist ' + 'in `gluster volume help` command') + self.assertNotIn( + 'heal-failed', rout, '`heal-failed` string should ' + 'not exist in `gluster volume help` command') diff --git a/tests/functional/afr/test_metadata_self_heal_client_side_heal.py b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py new file mode 100644 index 000000000..166059276 --- /dev/null +++ b/tests/functional/afr/test_metadata_self_heal_client_side_heal.py @@ -0,0 +1,606 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# pylint: disable=too-many-locals,too-many-statements,too-many-branches + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain) +from glustolibs.gluster.lib_utils import (add_user, del_user, + collect_bricks_arequal) +from glustolibs.gluster.mount_ops import (umount_volume, + mount_volume) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete, + collect_mounts_arequal) + + +@runs_on([['distributed-replicated', 'replicated'], + ['glusterfs']]) +class TestAFRMetaDataSelfHealClientSideHeal(GlusterBaseClass): + @classmethod + def setUpClass(cls): + + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not upload_scripts(cls.clients, [cls.script_upload_path]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Create users + self.users = ['qa_func', 'qa_system', 'qa_perf', 'qa_all'] + for mount_object in self.mounts: + for user in self.users: + if not add_user(mount_object.client_system, user): + raise ExecutionError("Failed to create user " + "{}".format(user)) + g.log.info("Successfully created all users.") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status. + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + if not list_all_files_and_dirs_mounts(self.mounts): + raise ExecutionError("Failed to list all files and dirs") + g.log.info("Listing all files and directories is successful") + + # Delete user + for mount_object in self.mounts: + for user in self.users: + if not del_user(mount_object.client_system, user): + raise ExecutionError("Failed to delete user: {}" + .format(user)) + g.log.info("Successfully deleted all users") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + self.get_super_method(self, 'tearDown')() + + def trigger_heal_from_mount_point(self): + """ + Trigger heal from mount point using read. + """ + # Unmouting and remounting volume to update the volume graph + # in client. + ret, _, _ = umount_volume( + self.mounts[0].client_system, self.mounts[0].mountpoint) + self.assertFalse(ret, "Failed to unmount volume.") + + ret, _, _ = mount_volume( + self.volname, 'glusterfs', self.mounts[0].mountpoint, + self.mnode, self.mounts[0].client_system) + self.assertFalse(ret, "Failed to remount volume.") + g.log.info('Successfully umounted and remounted volume.') + + # Trigger heal from client side + cmd = ("/usr/bin/env python {0} read {1}/{2}".format( + self.script_upload_path, self.mounts[0].mountpoint, + self.test_meta_data_self_heal_folder)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, 'Failed to trigger heal on %s' + % self.mounts[0].client_system) + g.log.info("Successfully triggered heal from mount point.") + + def validate_io_on_clients(self): + """ + Validate I/O on client mount points. + """ + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + def check_arequal_from_mount_point_and_bricks(self): + """ + Check if arequals of mount point and bricks are + are the same. + """ + # Check arequals for "replicated" + all_bricks = get_all_bricks(self.mnode, self.volname) + if self.volume_type == "replicated": + # Get arequal before getting bricks offline + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Get arequal on bricks and compare with mount_point_total + ret, arequals = collect_bricks_arequal(all_bricks) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(mount_point_total, brick_total, + 'Arequals for mountpoint and brick ' + 'are not equal') + g.log.info('Arequals for mountpoint and brick are equal') + g.log.info('All arequals are equal for replicated') + + # Check arequals for "distributed-replicated" + if self.volume_type == "distributed-replicated": + # get the subvolumes + subvols_dict = get_subvols(self.mnode, self.volname) + num_subvols = len(subvols_dict['volume_subvols']) + g.log.info("Number of subvolumes in volume %s:", num_subvols) + + # Get arequals and compare + for i in range(0, num_subvols): + # Get arequal for first brick + subvol_brick_list = subvols_dict['volume_subvols'][i] + ret, arequal = collect_bricks_arequal([subvol_brick_list[0]]) + self.assertTrue(ret, 'Failed to get arequal on first') + + # Get arequal for every brick and compare with first brick + first_brick_total = arequal[0].splitlines()[-1].split(':')[-1] + ret, arequals = collect_bricks_arequal(subvol_brick_list) + self.assertTrue(ret, 'Failed to get arequal on bricks') + for arequal in arequals: + brick_total = arequal.splitlines()[-1].split(':')[-1] + self.assertEqual(first_brick_total, brick_total, + 'Arequals for subvol and brick are ' + 'not equal') + g.log.info('Arequals for subvol and brick are equal') + g.log.info('All arequals are equal for distributed-replicated') + + def check_permssions_on_bricks(self, bricks_list): + """ + Check permssions on a given set of bricks. + """ + for brick in bricks_list: + node, brick_path = brick.split(':') + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, folder)) + + self.assertEqual('555', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + # Verify for group for each file + if file_list: + for file_name in file_list: + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('1003', ret['gid'], + "Group mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 1..50 + for i in range(1, 51): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('666', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify permissions for files in dirs 51..100 + for i in range(51, 101): + + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('444', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 1..35 + for i in range(1, 36): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1000', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 36..70 + for i in range(36, 71): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1001', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for dirs 71..100 + for i in range(71, 101): + + ret = get_file_stat(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + # Verify ownership for files in dirs + file_list = get_dir_contents(node, "{}/{}/dir.{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i))) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/dir.{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + str(i), file_name)) + self.assertEqual('1002', ret['uid'], + "User id mismatch on node {}" + .format(node)) + + def test_metadata_self_heal_client_side_heal(self): + """ + Testcase steps: + 1.Turn off the options self heal daemon + 2.Create IO + 3.Calculate arequal of the bricks and mount point + 4.Bring down "brick1" process + 5.Change the permissions of the directories and files + 6.Change the ownership of the directories and files + 7.Change the group of the directories and files + 8.Bring back the brick "brick1" process + 9.Execute "find . | xargs stat" from the mount point to trigger heal + 10.Verify the changes in permissions are not self healed on brick1 + 11.Verify the changes in permissions on all bricks but brick1 + 12.Verify the changes in ownership are not self healed on brick1 + 13.Verify the changes in ownership on all the bricks but brick1 + 14.Verify the changes in group are not successfully self-healed + on brick1 + 15.Verify the changes in group on all the bricks but brick1 + 16.Turn on the option metadata-self-heal + 17.Execute "find . | xargs md5sum" from the mount point to trgger heal + 18.Wait for heal to complete + 19.Verify the changes in permissions are self-healed on brick1 + 20.Verify the changes in ownership are successfully self-healed + on brick1 + 21.Verify the changes in group are successfully self-healed on brick1 + 22.Calculate arequal check on all the bricks and mount point + """ + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"self-heal-daemon": "off"}) + self.assertTrue(ret, 'Failed to set options self-heal-daemon ' + 'and metadata-self-heal to OFF') + g.log.info("Options are set successfully") + + # Creating files on client side + self.test_meta_data_self_heal_folder = 'test_meta_data_self_heal' + for mount_object in self.mounts: + command = ("cd {0}/ ; mkdir {1} ; cd {1}/ ;" + "for i in `seq 1 100` ; " + "do mkdir dir.$i ; " + "for j in `seq 1 5` ; " + "do dd if=/dev/urandom of=dir.$i/file.$j " + "bs=1K count=$j ; done ; done ;".format + (mount_object.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_object.client_system, command, + user=mount_object.user) + self.all_mounts_procs.append(proc) + + # Validate IO + self.validate_io_on_clients() + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() + + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + bricks_to_bring_offline = [] + bricks_to_be_online = [] + for subvol in subvols: + bricks_to_bring_offline.append(subvol[0]) + for brick in subvol[1:]: + bricks_to_be_online.append(brick) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Change the permissions of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chmod 555 dir.$i ; done ; ' + 'for i in `seq 1 50` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 666 dir.$i/file.$j ; done ; done ; ' + 'for i in `seq 51 100` ; ' + 'do for j in `seq 1 5` ; ' + 'do chmod 444 dir.$i/file.$j ; done ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the ownership of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{} ; ' + 'for i in `seq 1 35` ; ' + 'do chown -R qa_func dir.$i ; done ; ' + 'for i in `seq 36 70` ; ' + 'do chown -R qa_system dir.$i ; done ; ' + 'for i in `seq 71 100` ; ' + 'do chown -R qa_perf dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Change the group of the directories and files + self.all_mounts_procs = [] + for mount_obj in self.mounts: + command = ('cd {}/{}; ' + 'for i in `seq 1 100` ; ' + 'do chgrp -R qa_all dir.$i ; done ;' + .format(mount_obj.mountpoint, + self.test_meta_data_self_heal_folder)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + self.validate_io_on_clients() + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Verify the changes are not self healed on brick1 for each subvol + for brick in bricks_to_bring_offline: + node, brick_path = brick.split(':') + + dir_list = get_dir_contents(node, "{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder)) + self.assertIsNotNone(dir_list, "Dir list from " + "brick is empty") + g.log.info("Successfully got dir list from bick") + + # Verify changes for dirs + for folder in dir_list: + + ret = get_file_stat(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + + self.assertEqual('755', ret['access'], + "Permissions mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node {}" + .format(node)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node {}" + .format(node)) + + # Get list of files for each dir + file_list = get_dir_contents(node, "{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder)) + self.assertIsNotNone(file_list, "File list from " + "brick is empty.") + g.log.info("Successfully got file list from bick.") + + if file_list: + for file_name in file_list: + + ret = get_file_stat(node, "{}/{}/{}/{}".format( + brick_path, self.test_meta_data_self_heal_folder, + folder, file_name)) + + self.assertEqual('644', ret['access'], + "Permissions mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['username'], + "User id mismatch on node" + " {} for file {}".format(node, + file_name)) + + self.assertEqual('root', ret['groupname'], + "Group id mismatch on node" + " {} for file {}".format(node, + file_name)) + + # Verify the changes are self healed on all bricks except brick1 + # for each subvol + self.check_permssions_on_bricks(bricks_to_be_online) + + # Setting options + ret = set_volume_options(self.mnode, self.volname, + {"metadata-self-heal": "on"}) + self.assertTrue(ret, 'Failed to set options to ON.') + g.log.info("Options are set successfully") + + # Trigger heal from mount point + self.trigger_heal_from_mount_point() + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Verify the changes are self healed on brick1 for each subvol + self.check_permssions_on_bricks(bricks_to_bring_offline) + + # Calculate and check arequal of the bricks and mount point + self.check_arequal_from_mount_point_and_bricks() diff --git a/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py b/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py index 703e0e1ad..a3a6cb183 100644 --- a/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py +++ b/tests/functional/afr/test_multiple_clients_dd_on_same_file_default.py @@ -29,9 +29,10 @@ from glustolibs.gluster.heal_ops import trigger_heal from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, wait_for_io_to_complete) +from glustolibs.gluster.volume_ops import (get_volume_info) -@runs_on([['replicated'], +@runs_on([['replicated', 'arbiter'], ['glusterfs', 'cifs', 'nfs']]) class VerifySelfHealTriggersHealCommand(GlusterBaseClass): """ @@ -55,14 +56,6 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): g.log.info("Successfully uploaded IO scripts to clients %s", cls.clients) - # Override Volumes - if cls.volume_type == "replicated": - # Define x2 replicated volume - cls.volume['voltype'] = { - 'type': 'replicated', - 'replica_count': 2, - 'transport': 'tcp'} - def setUp(self): # Calling GlusterBaseClass setUp self.get_super_method(self, 'setUp')() @@ -244,6 +237,18 @@ class VerifySelfHealTriggersHealCommand(GlusterBaseClass): # It should be the same g.log.info('Getting arequal on bricks...') arequals_after_heal = {} + + if self.volume_type == "arbiter": + vol_info = get_volume_info(self.mnode, self.volname) + self.assertIsNotNone(vol_info, 'Unable to get volume info') + data_brick_list = [] + for brick in bricks_list: + for brick_info in vol_info[self.volname]["bricks"]["brick"]: + if brick_info["name"] == brick: + if brick_info["isArbiter"] == "0": + data_brick_list.append(brick) + bricks_list = data_brick_list + for brick in bricks_list: g.log.info('Getting arequal on bricks %s...', brick) node, brick_path = brick.split(':') diff --git a/tests/functional/afr/test_repl_heal_with_io.py b/tests/functional/afr/test_repl_heal_with_io.py new file mode 100644 index 000000000..0cdff000c --- /dev/null +++ b/tests/functional/afr/test_repl_heal_with_io.py @@ -0,0 +1,306 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from time import sleep, time + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import bring_bricks_offline +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.heal_ops import heal_info +from glustolibs.gluster.volume_libs import ( + get_subvols, wait_for_volume_process_to_be_online) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.io.utils import wait_for_io_to_complete + + +@runs_on([[ + 'arbiter', 'distributed-arbiter', 'replicated', 'distributed-replicated' +], ['glusterfs', 'nfs']]) +class TestHealWithIO(GlusterBaseClass): + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = [self.mounts[0]] + + # For `test_heal_info_...` tests 6 replicas are needed + if ('test_heal_info' in self.id().split('.')[-1] + and self.volume_type.find('distributed') >= 0): + self.volume['voltype']['dist_count'] = 6 + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + self.client, self.m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + self.file_path = self.m_point + '/test_file' + self._io_cmd = ('cat /dev/urandom | tr -dc [:space:][:print:] | ' + 'head -c {} ') + # IO has to run for longer length for covering two scenarios in arbiter + # volume type + self.io_time = 600 if self.volume_type.find('arbiter') >= 0 else 300 + self.proc = '' + + def tearDown(self): + if self.proc: + ret = wait_for_io_to_complete([self.proc], [self.mounts[0]]) + if not ret: + raise ExecutionError('Wait for IO completion failed on client') + + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + self.get_super_method(self, 'tearDown')() + + def _validate_heal(self, timeout=8): + """ + Validates `heal info` command returns in less than `timeout` value + """ + start_time = time() + ret, _, _ = heal_info(self.mnode, self.volname) + end_time = time() + self.assertEqual(ret, 0, 'Not able to query heal info status') + self.assertLess( + end_time - start_time, timeout, + 'Query of heal info of volume took more than {} ' + 'seconds'.format(timeout)) + + def _validate_io(self, delay=5): + """ + Validates IO was happening during main test, measures by looking at + time delay between issue and return of `async_communicate` + """ + start_time = time() + ret, _, err = self.proc.async_communicate() + end_time = time() + self.assertEqual(ret, 0, 'IO failed to complete with error ' + '{}'.format(err)) + self.assertGreater( + end_time - start_time, delay, + 'Unable to validate IO was happening during main test') + self.proc = '' + + def _bring_brick_offline(self, bricks_list, arb_brick=False): + """ + Bring arbiter brick offline if `arb_brick` is true else one of data + bricks will be offline'd + """ + # Pick up only `data` brick + off_brick, b_type = bricks_list[:-1], 'data' + if arb_brick: + # Pick only `arbiter` brick + off_brick, b_type = [bricks_list[-1]], 'arbiter' + elif not arb_brick and self.volume_type.find('replicated') >= 0: + # Should pick all bricks if voltype is `replicated` + off_brick = bricks_list + + ret = bring_bricks_offline(self.volname, choice(off_brick)) + self.assertTrue(ret, + 'Unable to bring `{}` brick offline'.format(b_type)) + + def _get_hashed_subvol_index(self, subvols): + """ + Return `index` of hashed_volume from list of subvols + """ + index = 0 + if self.volume_type.find('distributed') >= 0: + hashed_subvol, index = find_hashed_subvol( + subvols, '', + self.file_path.rsplit('/', 1)[1]) + self.assertIsNotNone(hashed_subvol, + 'Unable to find hashed subvolume') + return index + + def _validate_brick_down_scenario(self, + validate_heal=False, + monitor_heal=False): + """ + Refactor of common steps across volume type for validating brick down + scenario + """ + if validate_heal: + # Wait for ample amount of IO to be written to file + sleep(180) + + # Validate heal info shows o/p and exit in <8s + self._validate_heal() + + # Force start volume and verify all process are online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, 'Unable to force start volume') + + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue( + ret, 'Not able to confirm all process of volume are online') + + if monitor_heal: + # Wait for IO to be written to file + sleep(30) + + # Monitor heal and validate data was appended successfully to file + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, + 'Self heal is not completed post brick online') + + def _perform_heal_append_scenario(self): + """ + Refactor of common steps in `entry_heal` and `data_heal` tests + """ + # Find hashed subvol of the file with IO + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + index = self._get_hashed_subvol_index(subvols) + + # Bring down one of the `data` bricks of hashed subvol + self._bring_brick_offline(bricks_list=subvols[index]) + + cmd = ('{} >> {}; '.format(self._io_cmd.format('1G'), self.file_path)) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual( + ret, 0, 'Unable to append 1G of data to existing ' + 'file on mount post offline of a brick') + + # Start volume and verify all process are online + self._validate_brick_down_scenario() + + # Start conitnuous IO and monitor heal completion + cmd = ('count={}; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' + '((count--)); done;'.format(self.io_time, + self._io_cmd.format('1M'), + self.file_path)) + self.proc = g.run_async(self.client, cmd) + self._validate_brick_down_scenario(monitor_heal=True) + + # Bring down `arbiter` brick and perform validation + if self.volume_type.find('arbiter') >= 0: + self._bring_brick_offline(bricks_list=subvols[index], + arb_brick=True) + self._validate_brick_down_scenario(monitor_heal=True) + + self._validate_io() + + def test_heal_info_with_io(self): + """ + Description: Validate heal info command with IO + + Steps: + - Create and mount a 6x3 replicated volume + - Create a file and perform IO continuously on this file + - While IOs are happening issue `heal info` command and validate o/p + not taking much time + """ + cmd = ('count=90; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' + '((count--)); done;'.format(self._io_cmd.format('5M'), + self.file_path)) + self.proc = g.run_async(self.client, cmd) + + # Wait for IO to be written to file + sleep(30) + + # Validate heal info shows o/p and exit in <5s + self._validate_heal() + + # Validate IO was happening + self._validate_io() + + g.log.info('Pass: Test heal info with IO is complete') + + def test_heal_info_with_io_and_brick_down(self): + """ + Description: Validate heal info command with IO and brick down + + Steps: + - Create and mount a 6x3 replicated volume + - Create a file and perform IO continuously on this file + - While IOs are happening, bring down one of the brick where the file + is getting hashed to + - After about a period of ~5 min issue `heal info` command and + validate o/p not taking much time + - Repeat the steps for arbiter on bringing arbiter brick down + """ + cmd = ('count={}; while [ $count -gt 1 ]; do {} >> {}; sleep 1; ' + '((count--)); done;'.format(self.io_time, + self._io_cmd.format('5M'), + self.file_path)) + self.proc = g.run_async(self.client, cmd) + + # Wait for IO to be written to file + sleep(30) + + # Find hashed subvol of the file with IO + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + index = self._get_hashed_subvol_index(subvols) + + # Bring down one of the `data` bricks of hashed subvol + self._bring_brick_offline(bricks_list=subvols[index]) + + # Validate heal and bring volume online + self._validate_brick_down_scenario(validate_heal=True) + + # Bring down `arbiter` brick and perform validation + if self.volume_type.find('arbiter') >= 0: + self._bring_brick_offline(bricks_list=subvols[index], + arb_brick=True) + + # Validate heal and bring volume online + self._validate_brick_down_scenario(validate_heal=True) + + self._validate_io() + + g.log.info('Pass: Test heal info with IO and brick down is complete') + + def test_data_heal_on_file_append(self): + """ + Description: Validate appends to a self healing file (data heal check) + + Steps: + - Create and mount a 1x2 replicated volume + - Create a file of ~ 1GB from the mount + - Bring down a brick and write more data to the file + - Bring up the offline brick and validate appending data to the file + succeeds while file self heals + - Repeat the steps for arbiter on bringing arbiter brick down + """ + cmd = ('{} >> {}; '.format(self._io_cmd.format('1G'), self.file_path)) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, 'Unable to create 1G of file on mount') + + # Perform `data_heal` test + self._perform_heal_append_scenario() + + g.log.info('Pass: Test data heal on file append is complete') + + def test_entry_heal_on_file_append(self): + """ + Description: Validate appends to a self healing file (entry heal check) + + Steps: + - Create and mount a 1x2 replicated volume + - Bring down a brick and write data to the file + - Bring up the offline brick and validate appending data to the file + succeeds while file self heals + - Repeat the steps for arbiter on bringing arbiter brick down + """ + + # Perform `entry_heal` test + self._perform_heal_append_scenario() + + g.log.info('Pass: Test entry heal on file append is complete') diff --git a/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py b/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py index 4a69d5700..198351589 100644 --- a/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py +++ b/tests/functional/afr/test_replace_brick_self_heal_io_in_progress.py @@ -140,10 +140,7 @@ class TestAFRSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) diff --git a/tests/functional/afr/test_self_heal_with_diff_algorithm.py b/tests/functional/afr/test_self_heal_with_diff_algorithm.py new file mode 100644 index 000000000..68472cc14 --- /dev/null +++ b/tests/functional/afr/test_self_heal_with_diff_algorithm.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test self heal when data-self-heal-algorithm option is set to diff. +""" + +from random import sample + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion) +from glustolibs.gluster.lib_utils import collect_bricks_arequal +from glustolibs.gluster.volume_ops import (volume_start, + set_volume_options) +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, get_subvols) + + +@runs_on([['arbiter', 'distributed-arbiter', 'replicated', + 'distributed-replicated'], ['glusterfs']]) +class TestSelfHealWithDiffAlgorithm(GlusterBaseClass): + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Unable to setup and mount volume") + g.log.info("Volume created and mounted successfully") + + def tearDown(self): + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + g.log.info("Volume unmounted and deleted successfully") + + # Calling GlusterBaseClass Teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_with_diff_algorithm(self): + """ + Test Steps: + 1. Create a replicated/distributed-replicate volume and mount it + 2. Set data/metadata/entry-self-heal to off and + data-self-heal-algorithm to diff + 3. Create few files inside a directory with some data + 4. Check arequal of the subvol and all the bricks in the subvol should + have same checksum + 5. Bring down a brick from the subvol and validate it is offline + 6. Modify the data of existing files under the directory + 7. Bring back the brick online and wait for heal to complete + 8. Check arequal of the subvol and all the brick in the same subvol + should have same checksum + """ + + # Setting options + for key, value in (("data-self-heal", "off"), + ("metadata-self-heal", "off"), + ("entry-self-heal", "off"), + ("data-self-heal-algorithm", "diff")): + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertTrue(ret, 'Failed to set %s to %s.' % (key, value)) + g.log.info("%s set to %s successfully", key, value) + + # Create few files under a directory with data + mountpoint = self.mounts[0].mountpoint + client = self.mounts[0].client_system + + cmd = ("mkdir %s/test_diff_self_heal ; cd %s/test_diff_self_heal ;" + "for i in `seq 1 100` ; do dd if=/dev/urandom of=file.$i " + " bs=1M count=1; done;" % (mountpoint, mountpoint)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Check arequal checksum of all the bricks is same + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") + + # List a brick in each subvol and bring them offline + brick_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brick_to_bring_offline.extend(sample(subvol, 1)) + + ret = bring_bricks_offline(self.volname, brick_to_bring_offline) + self.assertTrue(ret, "Unable to bring brick: {} offline".format( + brick_to_bring_offline)) + + # Validate the brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + brick_to_bring_offline) + self.assertTrue(ret, "Brick:{} is still online".format( + brick_to_bring_offline)) + + # Modify files under test_diff_self_heal directory + cmd = ("for i in `seq 1 100` ; do truncate -s 0 file.$i ; " + "truncate -s 2M file.$i ; done;") + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to modify the files") + g.log.info("Successfully modified files") + + # Start volume with force to bring all bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Volume start with force failed") + g.log.info("Volume: %s started successfully", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online", + self.volname)) + + # Monitor heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname, + interval_check=10), + "Heal failed after 20 mins") + + # Check are there any files in split-brain + self.assertFalse(is_volume_in_split_brain(self.mnode, self.volname), + "Some files are in split brain for " + "volume: {}".format(self.volname)) + + # Check arequal checksum of all the bricks is same + for subvol in subvols: + ret, arequal_from_the_bricks = collect_bricks_arequal(subvol) + self.assertTrue(ret, "Arequal is collected successfully across " + "the bricks in the subvol {}".format(subvol)) + cmd = len(set(arequal_from_the_bricks)) + if (self.volume_type == "arbiter" or + self.volume_type == "distributed-arbiter"): + cmd = len(set(arequal_from_the_bricks[:2])) + self.assertEqual(cmd, 1, "Arequal" + " is same on all the bricks in the subvol") diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py new file mode 100644 index 000000000..d5b6d5d43 --- /dev/null +++ b/tests/functional/afr/test_self_heal_with_expand_volume.py @@ -0,0 +1,221 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import (set_file_permissions, + occurences_of_pattern_in_file) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.lib_utils import (add_user, del_user) +from glustolibs.gluster.volume_libs import (get_subvols, expand_volume) + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestHealWithExpandVolume(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Create non-root users + self.users = ('qa_user', 'qa_admin') + for user in self.users: + if not add_user(self.first_client, user): + raise ExecutionError("Failed to create non-root user {}" + .format(user)) + g.log.info("Successfully created non-root users") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + ret, _, _ = g.run(self.first_client, + "rm -rf /home/{}".format(user)) + if ret: + raise ExecutionError("Failed to remove home dir of " + "non-root user") + g.log.info("Successfully deleted all users") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + self.bricks_to_bring_offline.append(choice(subvols[0])) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_if_there_are_files_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _expand_volume_and_wait_for_rebalance_to_complete(self): + """Expand volume and wait for rebalance to complete""" + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=6000) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + def test_self_heal_and_add_brick_with_data_from_diff_users(self): + """ + Test case: + 1. Created a 2X3 volume. + 2. Mount the volume using FUSE and give 777 permissions to the mount. + 3. Added a new user. + 4. Login as new user and created 100 files from the new user: + for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done + 5. Kill a brick which is part of the volume. + 6. On the mount, login as root user and create 1000 files: + for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done + 7. On the mount, login as new user, and copy existing data to + the mount. + 8. Start volume using force. + 9. While heal is in progress, add-brick and start rebalance. + 10. Wait for rebalance and heal to complete, + 11. Check for MSGID: 108008 errors in rebalance logs. + """ + # Change permissions of mount point to 777 + ret = set_file_permissions(self.first_client, self.mountpoint, + '-R 777') + self.assertTrue(ret, "Unable to change mount point permissions") + g.log.info("Mount point permissions set to 777") + + # Create 100 files from non-root user + cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom " + "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0], + self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files from non-root user") + + # Kill one brick which is part of the volume + self._bring_bricks_offline() + + # Create 1000 files from root user + cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i" + " bs=10M count=1;done" % self.mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to creare files from root user") + + # On the mount, login as new user, and copy existing data to + # the mount + cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/" + "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;" + "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to copy files from non-root user") + + # Check if there are files to be healed + self._check_if_there_are_files_to_be_healed() + + # Start the vol using force + self._restart_volume_and_bring_all_offline_bricks_online() + + # Add bricks to volume and wait for heal to complete + self._expand_volume_and_wait_for_rebalance_to_complete() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check for MSGID: 108008 errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "MSGID: 108008", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + g.log.info("Expanding volume successful and no MSGID: 108008 " + "errors see in rebalance logs") diff --git a/tests/functional/afr/test_split_brain_with_hard_link_file.py b/tests/functional/afr/test_split_brain_with_hard_link_file.py new file mode 100644 index 000000000..a8248fb72 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_hard_link_file.py @@ -0,0 +1,175 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals, unused-variable +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + monitor_heal_completion, + is_heal_complete) + +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Override Volumes + if cls.volume_type == "distributed-replicated": + # Define x3 distributed-replicated volume + cls.volume['voltype'] = { + 'type': 'distributed-replicated', + 'dist_count': 2, + 'replica_count': 3, + 'transport': 'tcp'} + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + @classmethod + def tearDownClass(cls): + + # Cleanup Volume + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + g.log.info("Successful in cleaning up Volume %s", cls.volname) + + cls.get_super_method(cls, 'tearDownClass')() + + def _test_brick_down_with_file_rename(self, pfile, rfile, brick): + # Bring brick offline + g.log.info('Bringing brick %s offline', brick) + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Failed to bring brick %s offline' + % brick) + + ret = are_bricks_offline(self.mnode, self.volname, + [brick]) + self.assertTrue(ret, 'Brick %s is not offline' + % brick) + g.log.info('Bringing brick %s offline is successful', + brick) + + # Rename file + cmd = ("mv %s/%s %s/%s" + % (self.mounts[0].mountpoint, pfile, + self.mounts[0].mountpoint, rfile)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "rename of file failed") + + # Bring brick back online + g.log.info('Bringing brick %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, + brick) + self.assertTrue(ret, 'Failed to bring brick %s online' % + brick) + g.log.info('Bringing brick %s online is successful', brick) + + def test_afr_heal_with_brickdown_hardlink(self): + """ + Steps: + 1. Create 2 * 3 distribute replicate volume and disable all heals + 2. Create a file and 3 hardlinks to it from fuse mount. + 3. Kill brick4, rename HLINK1 to an appropriate name so that + it gets hashed to replicate-1 + 4. Likewise rename HLINK3 and HLINK7 as well, killing brick5 and brick6 + respectively each time. i.e. a different brick of the 2nd + replica is down each time. + 5. Now enable shd and let selfheals complete. + 6. Heal should complete without split-brains. + """ + bricks_list = get_all_bricks(self.mnode, self.volname) + options = {"metadata-self-heal": "off", + "entry-self-heal": "off", + "data-self-heal": "off", + "self-heal-daemon": "off"} + g.log.info("setting options %s", options) + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for" + "volume %s" % (options, self.volname))) + g.log.info("Successfully set %s for volume %s", options, self.volname) + + cmd = ("touch %s/FILE" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "file creation failed") + + # Creating a hardlink for the file created + for i in range(1, 4): + ret = create_link_file(self.clients[0], + '{}/FILE'.format(self.mounts[0].mountpoint), + '{}/HLINK{}'.format + (self.mounts[0].mountpoint, i)) + self.assertTrue(ret, "Unable to create hard link file ") + + # Bring brick3 offline,Rename file HLINK1,and bring back brick3 online + self._test_brick_down_with_file_rename("HLINK1", "NEW-HLINK1", + bricks_list[3]) + + # Bring brick4 offline,Rename file HLINK2,and bring back brick4 online + self._test_brick_down_with_file_rename("HLINK2", "NEW-HLINK2", + bricks_list[4]) + + # Bring brick5 offline,Rename file HLINK3,and bring back brick5 online + self._test_brick_down_with_file_rename("HLINK3", "NEW-HLINK3", + bricks_list[5]) + + # Setting options + options = {"self-heal-daemon": "on"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, 'Failed to set options %s' % options) + g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Check data on mount point + cmd = ("ls %s" % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "failed to fetch data from mount point") diff --git a/tests/functional/afr/test_split_brain_with_node_reboot.py b/tests/functional/afr/test_split_brain_with_node_reboot.py new file mode 100644 index 000000000..9b630ba75 --- /dev/null +++ b/tests/functional/afr/test_split_brain_with_node_reboot.py @@ -0,0 +1,149 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +from unittest import SkipTest +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.io.utils import (run_linux_untar, run_crefi, + wait_for_io_to_complete) + + +@runs_on([['replicated', 'distributed-replicated'], ['glusterfs']]) +class TestSelfHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for availability of atleast 3 clients + if len(cls.clients) < 3: + raise SkipTest("This test requires atleast 3 clients") + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + cls.list_of_io_processes = [] + cls.is_io_running = False + + def tearDown(self): + + # If I/O processes are running wait from them to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + self.mounts): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_afr_node_reboot_self_heal(self): + """ + Steps: + 1. Create *3 replica volume + 2. Mount the volume on 3 clients + 3. Run following workload from clients + Client 1: Linux Untars + Client 2: Lookups ls + Client 3: Lookups du + 4. Create a directory on mount point + 5. Create deep dirs and file in the directory created at step 4 + 6. Perform node reboot + 7. Check for heal status + 8. Reboot another node + 9. Check for heal status + """ + + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar from client 1 + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Run lookup operation ls from client 2 + cmd = ("cd {}; for i in `seq 1 1000000`;do du -sh; done" + .format(self.mounts[1].mountpoint)) + ret = g.run_async(self.mounts[1].client_system, cmd) + self.list_of_io_processes += [ret] + + # Run lookup operation du from client 3 + cmd = ("cd {}; for i in `seq 1 1000000`;do ls -laRt; done" + .format(self.mounts[2].mountpoint)) + ret = g.run_async(self.mounts[2].client_system, cmd) + self.list_of_io_processes += [ret] + + # Create a dir to start crefi tool + self.linux_untar_dir = "{}/{}".format(self.mounts[3].mountpoint, + "crefi") + ret = mkdir(self.clients[3], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir for crefi") + + # Create deep dirs and files on mount point from client 4 + list_of_fops = ("create", "rename", "chmod", "chown", "chgrp", + "hardlink", "truncate", "setxattr") + for fops in list_of_fops: + ret = run_crefi(self.clients[3], + self.linux_untar_dir, 10, 3, 3, thread=4, + random_size=True, fop=fops, minfs=0, + maxfs=102400, multi=True, random_filename=True) + self.assertTrue(ret, "crefi failed during {}".format(fops)) + g.log.info("crefi PASSED FOR fop %s", fops) + g.log.info("IOs were successful using crefi") + + for server_num in (1, 2): + # Perform node reboot for servers + g.log.info("Rebooting %s", self.servers[server_num]) + ret = g.run_async(self.servers[server_num], "reboot") + self.assertTrue(ret, 'Failed to reboot node') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') diff --git a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py index 947c6eef7..24c014502 100755 --- a/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py +++ b/tests/functional/arbiter/brick_cases/test_replica3_to_arbiter.py @@ -22,6 +22,12 @@ from glustolibs.gluster.volume_libs import ( expand_volume, wait_for_volume_process_to_be_online,
verify_all_process_of_volume_are_online, shrink_volume, get_subvols)
from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.io.utils import run_linux_untar
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+ is_heal_complete,
+ is_volume_in_split_brain)
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.gluster.heal_ops import trigger_heal
@runs_on([['replicated', 'distributed-replicated'],
@@ -37,22 +43,29 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): # Calling GlusterBaseClass setUp
self.get_super_method(self, 'setUp')()
+ # Set I/O flag to false
+ self.is_io_running = False
+
# Setup Volume
- g.log.info("Starting to Setup Volume")
- ret = self.setup_volume()
+ g.log.info("Starting to Setup and Mount Volume")
+ # Creating Volume and mounting the volume
+ ret = self.setup_volume_and_mount_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Setup_Volume")
- g.log.info("Successful in Setup Volume")
+ raise ExecutionError("Volume creation or mount failed: %s"
+ % self.volname)
self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
def tearDown(self):
- # Cleanup Volume
- g.log.info("Starting to Unmount Volume and Cleanup Volume")
- ret = self.cleanup_volume()
+ # Wait for I/O if not completed
+ if self.is_io_running:
+ if not self._wait_for_untar_completion():
+ g.log.error("I/O failed to stop on clients")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
if not ret:
- raise ExecutionError("Failed to Cleanup Volume")
- g.log.info("Successful Cleanup Volume")
+ raise ExecutionError("Unable to delete volume % s" % self.volname)
# Calling GlusterBaseClass tearDown
self.get_super_method(self, 'tearDown')()
@@ -67,10 +80,22 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): g.log.info('Clearing brick %s is successful', brick)
g.log.info('Clearing for all brick is successful')
- def test_replicated_to_arbiter_volume(self):
+ def _wait_for_untar_completion(self):
+ """Wait for untar to complete"""
+ has_process_stopped = []
+ for proc in self.io_process:
+ try:
+ ret, _, _ = proc.async_communicate()
+ if not ret:
+ has_process_stopped.append(False)
+ has_process_stopped.append(True)
+ except ValueError:
+ has_process_stopped.append(True)
+ return all(has_process_stopped)
+
+ def _convert_replicated_to_arbiter_volume(self):
"""
- Description:-
- Reduce the replica count from replica 3 to arbiter
+ Helper module to convert replicated to arbiter volume.
"""
# pylint: disable=too-many-statements
# Remove brick to reduce the replica count from replica 3
@@ -99,7 +124,7 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): g.log.info("Adding bricks to convert to Arbiter Volume")
replica_arbiter = {'replica_count': 1, 'arbiter_count': 1}
ret = expand_volume(self.mnode, self.volname, self.servers,
- self.all_servers_info, add_to_hot_tier=False,
+ self.all_servers_info, force=True,
**replica_arbiter)
self.assertTrue(ret, "Failed to expand the volume %s" % self.volname)
g.log.info("Changing volume to arbiter volume is successful %s",
@@ -119,3 +144,70 @@ class GlusterArbiterVolumeTypeClass(GlusterBaseClass): self.assertTrue(ret, "Volume %s : All process are not online"
% self.volname)
g.log.info("Volume %s : All process are online", self.volname)
+
+ def test_replicated_to_arbiter_volume(self):
+ """
+ Description:-
+ Reduce the replica count from replica 3 to arbiter
+ """
+ # pylint: disable=too-many-statements
+ self._convert_replicated_to_arbiter_volume()
+
+ def test_replica_to_arbiter_volume_with_io(self):
+ """
+ Description: Replica 3 to arbiter conversion with ongoing IO's
+
+ Steps :
+ 1) Create a replica 3 volume and start volume.
+ 2) Set client side self heal off.
+ 3) Fuse mount the volume.
+ 4) Create directory dir1 and write data.
+ Example: untar linux tar from the client into the dir1
+ 5) When IO's is running, execute remove-brick command,
+ and convert replica 3 to replica 2 volume
+ 6) Execute add-brick command and convert to arbiter volume,
+ provide the path of new arbiter brick.
+ 7) Issue gluster volume heal.
+ 8) Heal should be completed with no files in split-brain.
+ """
+
+ # pylint: disable=too-many-statements
+ # Create a dir to start untar
+ self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.clients[0], self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start linux untar on dir linuxuntar
+ self.io_process = run_linux_untar(self.clients[0],
+ self.mounts[0].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.is_io_running = True
+
+ # Convert relicated to arbiter volume
+ self._convert_replicated_to_arbiter_volume()
+
+ # Wait for IO to complete.
+ ret = self._wait_for_untar_completion()
+ self.assertFalse(ret, "IO didn't complete or failed on client")
+ self.is_io_running = False
+
+ # Start healing
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not started')
+ g.log.info('Healing is started')
+
+ # Monitor heal completion
+ ret = monitor_heal_completion(self.mnode, self.volname,
+ timeout_period=3600)
+ self.assertTrue(ret, 'Heal has not yet completed')
+
+ # Check if heal is completed
+ ret = is_heal_complete(self.mnode, self.volname)
+ self.assertTrue(ret, 'Heal is not complete')
+ g.log.info('Heal is completed successfully')
+
+ # Check for split-brain
+ ret = is_volume_in_split_brain(self.mnode, self.volname)
+ self.assertFalse(ret, 'Volume is in split-brain state')
+ g.log.info('Volume is not in split-brain state')
diff --git a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py index 9dbaa74fc..8d7304b0b 100755 --- a/tests/functional/arbiter/brick_cases/test_rmvrf_files.py +++ b/tests/functional/arbiter/brick_cases/test_rmvrf_files.py @@ -145,10 +145,7 @@ class TestRmrfMount(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Killing one brick from the volume set g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline) diff --git a/tests/functional/arbiter/test_afr_read_write.py b/tests/functional/arbiter/test_afr_read_write.py new file mode 100644 index 000000000..09e6a3a2a --- /dev/null +++ b/tests/functional/arbiter/test_afr_read_write.py @@ -0,0 +1,192 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestAfrReadWrite(GlusterBaseClass): + + """ + Description: + Arbiter test writes and reads from a file + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_online_heal(self, mnode, volname, bricks_list): + """ + Bring bricks online and monitor heal completion + """ + # Bring bricks online + ret = bring_bricks_online( + mnode, volname, bricks_list, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks online') + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(mnode, volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(mnode, volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (volname))) + g.log.info("Volume %s : All process are online", volname) + + # Monitor heal completion + ret = monitor_heal_completion(mnode, volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(mnode, volname) + self.assertFalse(ret, 'Volume is in split-brain state') + + def test_afr_read_write(self): + """ + Test read and write of file + Description: + - Get the bricks from the volume + - Creating directory test_write_and_read_file + - Write from 1st client + - Read from 2nd client + - Select brick to bring offline + - Bring brick offline + - Validating IO's on client1 + - Validating IO's on client2 + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Bring 2nd brick offline + - Check if brick is offline + - Write from 1st client + - Read from 2nd client + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Creating directory test_write_and_read_file + ret = mkdir(self.mounts[0].client_system, + "{}/test_write_and_read_file" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_write_and_read_file' on %s created " + "successfully", self.mounts[0]) + + # Write from 1st client + cmd_to_write = ( + 'cd %s/test_write_and_read_file ; for i in `seq 1 5000` ;' + 'do echo -e "Date:`date`\n" >> test_file ;echo -e "' + '`cal`\n" >> test_file ; done ; cd ..' + % self.mounts[0].mountpoint) + proc1 = g.run_async(self.mounts[0].client_system, + cmd_to_write) + + # Read from 2nd client + cmd = ('cd %s/ ;for i in {1..30};' + 'do cat test_write_and_read_file/test_file;done' + % self.mounts[1].mountpoint) + proc2 = g.run_async(self.mounts[1].client_system, cmd) + + # Bring brick offline + bricks_to_bring_offline = sample(bricks_list, 2) + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[0]) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + # Check brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_to_bring_offline[0]]) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline[0])) + + # Validating IO's + for proc, mount in zip([proc1, proc2], self.mounts): + ret = validate_io_procs([proc], mount) + self.assertTrue(ret, "IO failed on client") + g.log.info("Successfully validated all IO's") + + self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list) + + # Bring down second brick + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline[1]) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline[1])) + + # Check if brick is offline + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_to_bring_offline[1]]) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline[1])) + + # Write from 1st client + ret, _, _ = g.run(self.mounts[0].client_system, cmd_to_write) + self.assertEqual(ret, 0, "Failed to write to file") + g.log.info("Successfully written to file") + + # Read from 2nd client + cmd = ('cd %s/ ;cat test_write_and_read_file/test_file' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to read file on mountpoint") + g.log.info("Successfully read file on mountpoint") + + self._bring_bricks_online_heal(self.mnode, self.volname, bricks_list) diff --git a/tests/functional/arbiter/test_brick_down_cyclic.py b/tests/functional/arbiter/test_brick_down_cyclic.py new file mode 100644 index 000000000..8639a4dc5 --- /dev/null +++ b/tests/functional/arbiter/test_brick_down_cyclic.py @@ -0,0 +1,140 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +import time +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (is_volume_in_split_brain, + is_heal_complete) +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks, + are_bricks_online) +from glustolibs.gluster.heal_libs import ( + monitor_heal_completion, are_all_self_heal_daemons_are_online) + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestBrickDownHeal(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @classmethod + def tearDownClass(cls): + """ + Cleanup Volume + """ + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + + cls.get_super_method(cls, 'tearDownClass')() + + def test_brick_down_heal(self): + """ + - Run IO's from client on a single file + - Now bring down bricks in cyclic order + - kill brick 1, sleep for 5 seconds, bring brick 1 up, wait for 10s + - Now repeat step3 for brick2 and brick 3 + - Repeat the cycle a few times + - Trigger heal, check for split brain using command + """ + # Write IO's + self.all_mounts_procs = [] + cmd = ("for i in `seq 1 10`;" + "do dd if=/dev/urandom of=%s/file$i bs=1K count=1;" + "done" % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd) + self.all_mounts_procs.append(proc) + + # Killing bricks in cyclic order + bricks_list = get_all_bricks(self.mnode, self.volname) + + # Total number of cyclic brick-down cycles to be executed + number_of_cycles = 0 + while number_of_cycles < 3: + number_of_cycles += 1 + for brick in bricks_list: + # Bring brick offline + g.log.info('Bringing bricks %s offline', brick) + ret = bring_bricks_offline(self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s offline" + % brick)) + + ret = are_bricks_offline(self.mnode, self.volname, [brick]) + self.assertTrue(ret, 'Bricks %s are not offline' % brick) + g.log.info('Bringing bricks %s offline is successful', brick) + + # Introducing 5 second sleep when brick is down + g.log.info("Waiting for 5 seconds, with ongoing IO while " + "brick %s is offline", brick) + ret = time.sleep(5) + + # Bring brick online + g.log.info('Bringing bricks %s online', brick) + ret = bring_bricks_online(self.mnode, self.volname, [brick]) + self.assertTrue(ret, ("Failed to bring bricks %s online " + % brick)) + g.log.info('Bricks %s are online', brick) + + # Introducing 10 second sleep when brick is up + g.log.info("Waiting for 10 seconds,when " + "brick %s is online", brick) + ret = time.sleep(10) + + # Check if bricks are online + ret = are_bricks_online(self.mnode, self.volname, bricks_list) + self.assertTrue(ret, 'Bricks %s are not online' % bricks_list) + g.log.info('Bricks %s are online', bricks_list) + + # Check daemons + g.log.info('Checking daemons...') + ret = are_all_self_heal_daemons_are_online(self.mnode, + self.volname) + self.assertTrue(ret, ("Some of the self-heal Daemons are " + "offline")) + g.log.info('All self-heal Daemons are online') + + # Trigger self heal + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Unable to trigger heal on volume') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') diff --git a/tests/functional/arbiter/test_data_delete.py b/tests/functional/arbiter/test_data_delete.py new file mode 100644 index 000000000..4753efcbc --- /dev/null +++ b/tests/functional/arbiter/test_data_delete.py @@ -0,0 +1,110 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestDataDelete(GlusterBaseClass): + """ + Description: + Test data delete/rename on arbiter volume + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_data_delete(self): + """ + Test steps: + - Get brick list + - Create files and rename + - Check if brick path contains old files + - Delete files from mountpoint + - Check .glusterfs/indices/xattrop is empty + - Check if brickpath is empty + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Create files and rename + cmd = ('cd %s ;for i in `seq 1 100` ;do mkdir -pv directory$i;' + 'cd directory$i;dd if=/dev/urandom of=file$i bs=1M count=5;' + 'mv file$i renamed$i;done;' % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Fail: Not able to create files on " + "{}".format(self.mounts[0].mountpoint)) + g.log.info("Files created successfully and renamed") + + # Check if brickpath contains old files + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |grep file |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} contains old " + "file in node {}".format(brick_path, brick_node)) + g.log.info("Brick path contains renamed files") + + # Delete files from mountpoint + cmd = ('rm -rf -v %s/*' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete files") + g.log.info("Files deleted successfully for %s", self.mounts[0]) + + # Check .glusterfs/indices/xattrop is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s/.glusterfs/indices/xattrop/ | " + "grep -ve \"xattrop-\" | wc -l" % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), ".glusterfs/indices/" + "xattrop is not empty") + g.log.info("No pending heals on bricks") + + # Check if brickpath is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} is not empty " + "in node {}".format(brick_path, brick_node)) + g.log.info("Brick path is empty on all nodes") diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py index 17c2ba4d5..bbb30f271 100644 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_default.py @@ -133,10 +133,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -230,7 +227,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py index 132b9df8a..0aa440af1 100755 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_diff_heal_command.py @@ -161,10 +161,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -272,6 +269,7 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py index 82538d42a..f4f13931a 100755 --- a/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py +++ b/tests/functional/arbiter/test_data_self_heal_algorithm_full_default.py @@ -132,10 +132,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -229,7 +226,8 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') diff --git a/tests/functional/arbiter/test_data_self_heal_daemon_off.py b/tests/functional/arbiter/test_data_self_heal_daemon_off.py index df2e58aa6..9faae85ca 100644 --- a/tests/functional/arbiter/test_data_self_heal_daemon_off.py +++ b/tests/functional/arbiter/test_data_self_heal_daemon_off.py @@ -164,10 +164,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) diff --git a/tests/functional/arbiter/test_entry_self_heal_heal_command.py b/tests/functional/arbiter/test_entry_self_heal_heal_command.py index ced2bc19c..64c6c2339 100644 --- a/tests/functional/arbiter/test_entry_self_heal_heal_command.py +++ b/tests/functional/arbiter/test_entry_self_heal_heal_command.py @@ -177,10 +177,8 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = ( + bricks_to_bring_offline_dict['volume_bricks']) # Bring brick offline g.log.info('Bringing bricks %s offline...', diff --git a/tests/functional/arbiter/test_gfid_self_heal.py b/tests/functional/arbiter/test_gfid_self_heal.py new file mode 100644 index 000000000..9ed4a8767 --- /dev/null +++ b/tests/functional/arbiter/test_gfid_self_heal.py @@ -0,0 +1,206 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestGFIDSelfHeal(GlusterBaseClass): + + """ + Description: + Arbiter Test cases related to GFID self heal + """ + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_gfid_self_heal(self): + """ + Test GFID self heal + Description: + - Creating directory test_compilation + - Write Deep directories and files + - Get arequal before getting bricks offline + - Select bricks to bring offline + - Bring brick offline + - Delete directory on mountpoint where data is writte + - Create the same directory and write same data + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_compilation + ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_gfid_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Write Deep directories and files + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Successfully created files on mountpoint") + count += 10 + + # Get arequal before getting bricks offline + ret, result_before_offline = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks offline ' + 'is %s', result_before_offline) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Delete directory on mountpoint where data is written + cmd = ('rm -rf -v %s/test_gfid_self_heal' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete directory") + g.log.info("Directory deleted successfully for %s", self.mounts[0]) + + # Create the same directory and write same data + ret = mkdir(self.mounts[0].client_system, "{}/test_gfid_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_gfid_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Write the same files again + count = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Successfully created files on mountpoint") + count += 10 + + # Bring bricks online + ret = bring_bricks_online( + self.mnode, self.volname, + bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks online ' + 'is %s', result_after_online) diff --git a/tests/functional/arbiter/test_gluster_clone_heal.py b/tests/functional/arbiter/test_gluster_clone_heal.py new file mode 100644 index 000000000..94603c701 --- /dev/null +++ b/tests/functional/arbiter/test_gluster_clone_heal.py @@ -0,0 +1,209 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs) +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['arbiter', 'distributed-arbiter', + 'replicated', 'distributed-replicated'], ['glusterfs']]) +class TestGlusterCloneHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to self heal + of data and hardlink + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_gluster_clone_heal(self): + """ + Test gluster compilation on mount point(Heal command) + - Creating directory test_compilation + - Compile gluster on mountpoint + - Select bricks to bring offline + - Bring brick offline + - Validate IO + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + - Compile gluster on mountpoint again + - Select bricks to bring offline + - Bring brick offline + - Validate IO + - Bring bricks online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_compilation + ret = mkdir(self.mounts[0].client_system, "{}/test_compilation" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_compilation' on %s created " + "successfully", self.mounts[0]) + + # Compile gluster on mountpoint + cmd = ("cd %s/test_compilation ; rm -rf glusterfs; git clone" + " git://github.com/gluster/glusterfs.git ; cd glusterfs ;" + " ./autogen.sh ;./configure CFLAGS='-g3 -O0 -DDEBUG'; make ;" + " cd ../..;" % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info("Arequal of mountpoint %s", result_after_online) + + # Compile gluster on mountpoint again + proc1 = g.run_async(self.mounts[0].client_system, cmd) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + + # Validate IO + self.assertTrue( + validate_io_procs([proc1], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info("Arequal of mountpoint %s", result_after_online) diff --git a/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py new file mode 100755 index 000000000..8e11af6e4 --- /dev/null +++ b/tests/functional/arbiter/test_heal_full_after_deleting_the_files_from_bricks.py @@ -0,0 +1,202 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete, + is_volume_in_split_brain, + is_shd_daemon_running) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (collect_mounts_arequal, + validate_io_procs, + list_all_files_and_dirs_mounts, + wait_for_io_to_complete) +from glustolibs.gluster.gluster_init import (start_glusterd, + stop_glusterd) +from glustolibs.misc.misc_libs import kill_process + + +@runs_on([['arbiter', 'distributed-arbiter'], + ['glusterfs', 'nfs']]) +class TestArbiterSelfHeal(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to + healing in default configuration of the volume + """ + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + + Cleanup and umount volume + """ + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # List all files and dirs created + ret = list_all_files_and_dirs_mounts(self.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_heal_full_after_deleting_files(self): + """ + - Create IO + - Calculate arequal from mount + - kill glusterd process and glustershd process on arbiter nodes + - Delete data from backend from the arbiter nodes + - Start glusterd process and force start the volume + to bring the processes online + - Check if heal is completed + - Check for split-brain + - Calculate arequal checksum and compare it + """ + # pylint: disable=too-many-locals,too-many-statements + # Creating files on client side + for mount_obj in self.mounts: + g.log.info("Generating data for %s:%s", + mount_obj.client_system, mount_obj.mountpoint) + # Create dirs with file + command = ("/usr/bin/env python %s create_deep_dirs_with_files " + "-d 2 -l 2 -n 2 -f 20 %s" + % (self.script_upload_path, mount_obj.mountpoint)) + + proc = g.run_async(mount_obj.client_system, command, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.io_validation_complete = False + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.io_validation_complete = True + g.log.info("IO is successful on all mounts") + + # Get arequal before killing gluster processes on arbiter node + ret, result_before_killing_procs = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before getting bricks offline ' + 'is successful') + + # Kill glusterd process and glustershd process on arbiter node + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + for subvol in subvols: + arbiter = subvol[-1] + node, brick_path = arbiter.split(':') + # Stop glusterd + ret = stop_glusterd(node) + self.assertTrue(ret, "Failed to stop the glusterd on arbiter node") + # Stop glustershd + ret = kill_process(node, "glustershd") + if not ret: + # Validate glustershd process is not running + self.assertFalse( + is_shd_daemon_running(self.mnode, node, self.volname), + "The glustershd process is still running.") + g.log.info('Killed glusterd and glustershd for all arbiter ' + 'brick successfully') + + # Delete data from backend from the arbiter node + for subvol in subvols: + arbiter = subvol[-1] + # Clearing the arbiter bricks + node, brick_path = arbiter.split(':') + ret, _, err = g.run(node, 'rm -rf %s/*' % brick_path) + self.assertFalse( + ret, err) + g.log.info('Clearing for all arbiter brick is successful') + + # Start glusterd process on each arbiter + for subvol in subvols: + arbiter = subvol[-1] + node, brick_path = arbiter.split(':') + ret = start_glusterd(node) + self.assertTrue( + ret, "Failed to start glusterd on the arbiter node") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after healing + ret, result_after_healing = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal after getting bricks online ' + 'is successful') + + # Comparing arequals before before killing arbiter processes + # and after healing + self.assertEqual( + result_before_killing_procs, result_after_healing, + 'Arequals arequals before before killing arbiter ' + 'processes and after healing are not equal') + + g.log.info('Arequals before killing arbiter ' + 'processes and after healing are equal') diff --git a/tests/functional/arbiter/test_metadata_self_heal.py b/tests/functional/arbiter/test_metadata_self_heal.py index 81a098fff..0b2708438 100755 --- a/tests/functional/arbiter/test_metadata_self_heal.py +++ b/tests/functional/arbiter/test_metadata_self_heal.py @@ -207,10 +207,7 @@ class TestMetadataSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -332,8 +329,9 @@ class TestMetadataSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') @@ -356,11 +354,6 @@ class TestMetadataSelfHeal(GlusterBaseClass): ret, out, err = g.run(node, command) file_list = out.split() - g.log.info('Checking for user and group on %s...', node) - conn = g.rpyc_get_connection(node) - if conn is None: - raise Exception("Unable to get connection on node %s" % node) - for file_name in file_list: file_to_check = '%s/%s/%s' % (nodes_to_check[node], test_meta_data_self_heal_folder, @@ -368,26 +361,30 @@ class TestMetadataSelfHeal(GlusterBaseClass): g.log.info('Checking for permissions, user and group for %s', file_name) + # Check for permissions - permissions = oct( - conn.modules.os.stat(file_to_check).st_mode)[-3:] - self.assertEqual(permissions, '444', + cmd = ("stat -c '%a %n' {} | awk '{{print $1}}'" + .format(file_to_check)) + ret, permissions, _ = g.run(node, cmd) + self.assertEqual(permissions.split('\n')[0], '444', 'Permissions %s is not equal to 444' % permissions) g.log.info("Permissions are '444' for %s", file_name) # Check for user - uid = conn.modules.os.stat(file_to_check).st_uid - username = conn.modules.pwd.getpwuid(uid).pw_name - self.assertEqual(username, 'qa', 'User %s is not equal qa' + cmd = ("ls -ld {} | awk '{{print $3}}'" + .format(file_to_check)) + ret, username, _ = g.run(node, cmd) + self.assertEqual(username.split('\n')[0], + 'qa', 'User %s is not equal qa' % username) g.log.info("User is 'qa' for %s", file_name) # Check for group - gid = conn.modules.os.stat(file_to_check).st_gid - groupname = conn.modules.grp.getgrgid(gid).gr_name - self.assertEqual(groupname, 'qa', 'Group %s is not equal qa' + cmd = ("ls -ld {} | awk '{{print $4}}'" + .format(file_to_check)) + ret, groupname, _ = g.run(node, cmd) + self.assertEqual(groupname.split('\n')[0], + 'qa', 'Group %s is not equal qa' % groupname) g.log.info("Group is 'qa' for %s", file_name) - - g.rpyc_close_connection(host=node) diff --git a/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py new file mode 100644 index 000000000..8e4df5e9f --- /dev/null +++ b/tests/functional/arbiter/test_metadata_self_heal_on_open_fd.py @@ -0,0 +1,244 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import os +import copy +from socket import gethostbyname +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + get_all_bricks) +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.heal_ops import get_heal_info_summary +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.volume_ops import get_volume_info +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.glusterfile import get_pathinfo +from glustolibs.gluster.lib_utils import (collect_bricks_arequal, + add_user, del_user) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['replicated', 'distributed-replicated', 'arbiter', + 'distributed-arbiter'], + ['glusterfs']]) +class TestMetadataSelfHealOpenfd(GlusterBaseClass): + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.user = "qa" + self.nodes = [] + self.nodes = copy.deepcopy(self.servers) + self.nodes.append(self.clients[0]) + + # Create user for changing ownership + for node in self.nodes: + ret = add_user(node, self.user) + self.assertTrue(ret, "Failed to create user") + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup and Mount_Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + for node in self.nodes: + del_user(node, self.user) + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + def _verify_stat_info(self, nodes_to_check, test_file): + """ + Helper method to verify stat on all bricks and client. + """ + for node in nodes_to_check: + filepath = nodes_to_check[node] + "/" + test_file + stat_dict = get_file_stat(node, filepath) + self.assertIsNotNone(stat_dict, "stat on {} failed" + .format(test_file)) + self.assertEqual(stat_dict['username'], self.user, + "Expected qa but found {}" + .format(stat_dict['username'])) + self.assertEqual(stat_dict['groupname'], self.user, + "Expected gid qa but found {}" + .format(stat_dict['groupname'])) + self.assertEqual(stat_dict['access'], '777', + "Expected permission 777 but found {}" + .format(stat_dict['access'])) + + def test_metadata_self_heal_on_open_fd(self): + """ + Description: Pro-active metadata self heal on open fd + + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Create test executable on volume mount. + 4) While test execution is in progress, bring down brick1. + 5) From mount point, change ownership, permission, group id of + the test file. + 6) While test execution is in progress, bring back brick1 online. + 7) Do stat on the test file to check ownership, permission, + group id on mount point and on bricks + 8) Stop test execution. + 9) Do stat on the test file to check ownership, permission, + group id on mount point and on bricks. + 10) There should be no pending heals in the heal info command. + 11) There should be no split-brain. + 12) Calculate arequal of the bricks and mount point and it + should be same. + """ + # pylint: disable=too-many-statements,too-many-locals + # pylint: disable=too-many-branches + bricks_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(bricks_list, 'Brick list is None') + client = self.clients[0] + + # Create test executable file on mount point + m_point = self.mounts[0].mountpoint + test_file = "testfile.sh" + cmd = ("echo 'while true; do echo 'Press CTRL+C to stop execution';" + " done' >> {}/{}".format(m_point, test_file)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to create test file") + + # Execute the test file + cmd = "cd {}; sh {}".format(m_point, test_file) + g.run_async(client, cmd) + + # Get pid of the test file + _cmd = "ps -aux | grep -v grep | grep testfile.sh | awk '{print $2}'" + ret, out, _ = g.run(client, _cmd) + self.assertEqual(ret, 0, "Failed to get pid of test file execution") + + # Bring brick1 offline + ret = bring_bricks_offline(self.volname, [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring bricks {} ' + 'offline'.format(bricks_list[1])) + + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Bricks {} are not ' + 'offline'.format(bricks_list[1])) + + # change uid, gid and permission from client + cmd = "chown {} {}/{}".format(self.user, m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chown failed") + + cmd = "chgrp {} {}/{}".format(self.user, m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chgrp failed") + + cmd = "chmod 777 {}/{}".format(m_point, test_file) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "chown failed") + + # Bring brick1 online + ret = bring_bricks_online(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring bricks {} online' + .format(bricks_list[1])) + + ret = get_pathinfo(client, "{}/{}" + .format(m_point, test_file)) + self.assertIsNotNone(ret, "Unable to get " + "trusted.glusterfs.pathinfo of file") + nodes_to_check = {} + bricks_list = [] + for brick in ret['brickdir_paths']: + node, brick_path = brick.split(':') + if node[0:2].isdigit(): + nodes_to_check[node] = os.path.dirname(brick_path) + path = node + ":" + os.path.dirname(brick_path) + else: + nodes_to_check[gethostbyname(node)] = (os.path.dirname( + brick_path)) + path = gethostbyname(node) + ":" + os.path.dirname(brick_path) + bricks_list.append(path) + nodes_to_check[client] = m_point + + # Verify that the changes are successful on bricks and client + self._verify_stat_info(nodes_to_check, test_file) + + # Kill the test executable file + for pid in out.split('\n')[:-1]: + cmd = "kill -s 9 {}".format(pid) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to kill test file execution") + + # Verify that the changes are successful on bricks and client + self._verify_stat_info(nodes_to_check, test_file) + + # Verify there are no pending heals + heal_info = get_heal_info_summary(self.mnode, self.volname) + self.assertIsNotNone(heal_info, 'Unable to get heal info') + for brick in bricks_list: + self.assertEqual(int(heal_info[brick]['numberOfEntries']), + 0, ("Pending heal on brick {} ".format(brick))) + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal for mount + ret, arequals = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + mount_point_total = arequals[0].splitlines()[-1].split(':')[-1] + + # Collecting data bricks + vol_info = get_volume_info(self.mnode, self.volname) + self.assertIsNotNone(vol_info, 'Unable to get volume info') + data_brick_list = [] + for brick in bricks_list: + for brick_info in vol_info[self.volname]["bricks"]["brick"]: + if brick_info["name"] == brick: + if brick_info["isArbiter"] == "0": + data_brick_list.append(brick) + bricks_list = data_brick_list + + # Get arequal on bricks and compare with mount_point_total + # It should be the same + arbiter = self.volume_type.find('arbiter') >= 0 + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + for subvol in subvols: + subvol = [i for i in subvol if i in bricks_list] + if subvol: + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue(ret, 'Unable to get arequal checksum ' + 'on {}'.format(subvol[0:stop])) + self.assertEqual(len(set(arequal)), 1, 'Mismatch of arequal ' + 'checksum among {} is ' + 'identified'.format(subvol[0:stop])) + brick_total = arequal[-1].splitlines()[-1].split(':')[-1] + self.assertEqual(brick_total, mount_point_total, + "Arequals for mountpoint and {} " + "are not equal".format(subvol[0:stop])) diff --git a/tests/functional/arbiter/test_mount_point_while_deleting_files.py b/tests/functional/arbiter/test_mount_point_while_deleting_files.py index 6acb8e0c8..68f880663 100755 --- a/tests/functional/arbiter/test_mount_point_while_deleting_files.py +++ b/tests/functional/arbiter/test_mount_point_while_deleting_files.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -34,8 +34,7 @@ from glustolibs.gluster.mount_ops import (mount_volume, from glustolibs.misc.misc_libs import upload_scripts -@runs_on([['arbiter'], - ['glusterfs']]) +@runs_on([['arbiter'], ['glusterfs']]) class VolumeSetDataSelfHealTests(GlusterBaseClass): @classmethod def setUpClass(cls): @@ -57,6 +56,7 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): # Setup Volumes cls.volume_configs = [] cls.mounts_dict_list = [] + cls.client = cls.clients[0] # Define two replicated volumes for i in range(1, 3): @@ -67,24 +67,22 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): cls.volume_configs.append(volume_config) # Redefine mounts - for client in cls.all_clients_info.keys(): - mount = { - 'protocol': cls.mount_type, - 'server': cls.mnode, - 'volname': volume_config['name'], - 'client': cls.all_clients_info[client], - 'mountpoint': (os.path.join( - "/mnt", '_'.join([volume_config['name'], - cls.mount_type]))), - 'options': '' - } - cls.mounts_dict_list.append(mount) - - cls.mounts = create_mount_objs(cls.mounts_dict_list) + mount = { + 'protocol': cls.mount_type, + 'server': cls.mnode, + 'volname': volume_config['name'], + 'client': cls.all_clients_info[cls.client], + 'mountpoint': (os.path.join( + "/mnt", '_'.join([volume_config['name'], + cls.mount_type]))), + 'options': '' + } + cls.mounts_dict_list.append(mount) + + cls.mounts = create_mount_objs(cls.mounts_dict_list) # Create and mount volumes cls.mount_points = [] - cls.client = cls.clients[0] for volume_config in cls.volume_configs: # Setup volume @@ -146,39 +144,33 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): raise ExecutionError("Failed to list all files and dirs") g.log.info("Listing all files and directories is successful") - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ # umount all volumes - for mount_obj in cls.mounts: + for mount_point in self.mount_points: ret, _, _ = umount_volume( - mount_obj.client_system, mount_obj.mountpoint) + self.client, mount_point) if ret: raise ExecutionError( "Failed to umount on volume %s " - % cls.volname) + % self.volname) g.log.info("Successfully umounted %s on client %s", - cls.volname, mount_obj.client_system) - ret = rmdir(mount_obj.client_system, mount_obj.mountpoint) + self.volname, self.client) + ret = rmdir(self.client, mount_point) if not ret: raise ExecutionError( - ret, "Failed to remove directory mount directory.") + "Failed to remove directory mount directory.") g.log.info("Mount directory is removed successfully") # stopping all volumes - g.log.info("Starting to Cleanup all Volumes") - volume_list = get_volume_list(cls.mnode) + volume_list = get_volume_list(self.mnode) for volume in volume_list: - ret = cleanup_volume(cls.mnode, volume) + ret = cleanup_volume(self.mnode, volume) if not ret: raise ExecutionError("Failed to cleanup Volume %s" % volume) g.log.info("Volume: %s cleanup is done", volume) g.log.info("Successfully Cleanedup all Volumes") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_mount_point_not_go_to_rofs(self): """ @@ -218,10 +210,8 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): for volname in volume_list: bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = ( + bricks_to_bring_offline_dict['volume_bricks']) # bring bricks offline g.log.info("Going to bring down the brick process for %s", @@ -251,3 +241,4 @@ class VolumeSetDataSelfHealTests(GlusterBaseClass): self.assertTrue( validate_io_procs(self.all_mounts_procs, self.mounts), "IO failed on some of the clients") + self.io_validation_complete = True diff --git a/tests/functional/arbiter/test_self_heal_50k_files.py b/tests/functional/arbiter/test_self_heal_50k_files.py new file mode 100644 index 000000000..887959fa0 --- /dev/null +++ b/tests/functional/arbiter/test_self_heal_50k_files.py @@ -0,0 +1,140 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSelfHeal50kFiles(GlusterBaseClass): + """ + Description: + Arbiter self heal of 50k files + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + + # Cleanup and umount volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_50k_files(self): + """ + Description: + - Select bricks to bring offline + - Bring brick offline + - Create 50k files + - Validate IO + - Bring bricks online + - Monitor heal + - Check for split-brain + - Validate IO + """ + # pylint: disable=too-many-statements,too-many-locals + # Select bricks to bring offline + bricks_to_bring_offline_dict = select_bricks_to_bring_offline( + self.mnode, self.volname) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + bricks_to_bring_offline) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Create 50k files + command = ("cd %s ; " + "for i in `seq 1 50000` ; " + "do dd if=/dev/urandom of=test.$i " + "bs=100k count=1 ; " + "done ;" + % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + bricks_to_bring_offline) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume %s processes to " + "be online", self.volname)) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume %s : All process are not online" + % self.volname)) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3000) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') diff --git a/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py index 06f1f42c0..da98c4b7f 100644 --- a/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py +++ b/tests/functional/arbiter/test_self_heal_50k_files_heal_command_by_add_brick.py @@ -107,10 +107,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) @@ -211,9 +208,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and ' + 'after bringing bricks online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') @@ -242,8 +240,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals after bringing bricks online # and after adding bricks - self.assertItemsEqual(result_after_online, result_after_adding_bricks, - 'Checksums after bringing bricks online and ' - 'after adding bricks are not equal') + self.assertEqual(sorted(result_after_online), + sorted(result_after_adding_bricks), + 'Checksums after bringing bricks online' + 'and after adding bricks are not equal') g.log.info('Checksums after bringing bricks online and ' 'after adding bricks are equal') diff --git a/tests/functional/arbiter/test_self_heal_daemon.py b/tests/functional/arbiter/test_self_heal_daemon.py new file mode 100644 index 000000000..37470e41c --- /dev/null +++ b/tests/functional/arbiter/test_self_heal_daemon.py @@ -0,0 +1,256 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online, + wait_for_volume_process_to_be_online) +from glustolibs.gluster.brick_libs import ( + select_volume_bricks_to_bring_offline, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_volume_in_split_brain) +from glustolibs.io.utils import (collect_mounts_arequal) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_file_stat + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSelfHealDaemon(GlusterBaseClass): + """ + Description: + Arbiter Test cases related to self heal + of data and hardlink + """ + def setUp(self): + # Calling GlusterBaseClass + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_self_heal_daemon(self): + """ + Test Data-Self-Heal(heal command) + Description: + - Create directory test_hardlink_self_heal + - Create directory test_data_self_heal + - Creating files for hardlinks and data files + - Get arequal before getting bricks offline + - Select bricks to bring offline + - Bring brick offline + - Create hardlinks and append data to data files + - Bring brick online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal after getting bricks online + - Select bricks to bring offline + - Bring brick offline + - Truncate data to data files and verify hardlinks + - Bring brick online + - Wait for volume processes to be online + - Verify volume's all process are online + - Monitor heal completion + - Check for split-brain + - Get arequal again + + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Creating directory test_hardlink_self_heal + ret = mkdir(self.mounts[0].client_system, "{}/test_hardlink_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory 'test_hardlink_self_heal' on %s created " + "successfully", self.mounts[0]) + + # Creating directory test_data_self_heal + ret = mkdir(self.mounts[0].client_system, "{}/test_data_self_heal" + .format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory test_hardlink_self_heal on %s created " + "successfully", self.mounts[0]) + + # Creating files for hardlinks and data files + cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5`;' + 'do mkdir dir.$i ; for j in `seq 1 10` ; do dd if=' + '/dev/urandom of=dir.$i/file.$j bs=1k count=$j;done; done;' + 'cd ..' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' + 'do dd if=/dev/urandom of=file.$i bs=128K count=$i;done;' + 'cd ..' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Get arequal before getting bricks offline + ret, result_before_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal before getting bricks online-%s', + result_before_online) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'. + format(bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Append data to data files and create hardlinks + cmd = ('cd %s/test_data_self_heal;for i in `seq 1 100`;' + 'do dd if=/dev/urandom of=file.$i bs=512K count=$i ; done ;' + 'cd .. ' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to modify data files.") + g.log.info("Successfully modified data files") + + cmd = ('cd %s/test_hardlink_self_heal;for i in `seq 1 5` ;do ' + 'for j in `seq 1 10`;do ln dir.$i/file.$j dir.$i/link_file.$j;' + 'done ; done ; cd .. ' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Hardlinks creation failed") + g.log.info("Successfully created hardlinks of files") + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check for split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertFalse(ret, 'Volume is in split-brain state') + g.log.info('Volume is not in split-brain state') + + # Get arequal after getting bricks online + ret, result_after_online = collect_mounts_arequal(self.mounts) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Arequal after getting bricks online ' + 'is %s', result_after_online) + + # Select bricks to bring offline + bricks_to_bring_offline = select_volume_bricks_to_bring_offline( + self.mnode, self.volname) + self.assertIsNotNone(bricks_to_bring_offline, "List is empty") + + # Bring brick offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} offline'.format + (bricks_to_bring_offline)) + + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks {} are not offline'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s offline is successful', + bricks_to_bring_offline) + + # Truncate data to data files and verify hardlinks + cmd = ('cd %s/test_data_self_heal ; for i in `seq 1 100` ;' + 'do truncate -s $(( $i * 128)) file.$i ; done ; cd ..' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to truncate files") + g.log.info("Successfully truncated files on mountpoint") + + file_path = ('%s/test_hardlink_self_heal/dir{1..5}/file{1..10}' + % (self.mounts[0].mountpoint)) + link_path = ('%s/test_hardlink_self_heal/dir{1..5}/link_file{1..10}' + % (self.mounts[0].mountpoint)) + file_stat = get_file_stat(self.mounts[0], file_path) + link_stat = get_file_stat(self.mounts[0], link_path) + self.assertEqual(file_stat, link_stat, "Verification of hardlinks " + "failed") + g.log.info("Successfully verified hardlinks") + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks {} online'.format + (bricks_to_bring_offline)) + g.log.info('Bringing bricks %s online is successful', + bricks_to_bring_offline) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + g.log.info("Successful in waiting for volume %s processes to be " + "online", self.volname) + + # Verify volume's all process are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, ("Volume {} : All process are not online".format + (self.volname))) + g.log.info("Volume %s : All process are online", self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') diff --git a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py index d8d93a9ee..0c49bcd8f 100755 --- a/tests/functional/arbiter/test_self_heal_differing_in_file_type.py +++ b/tests/functional/arbiter/test_self_heal_differing_in_file_type.py @@ -152,10 +152,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -179,9 +176,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks offline - self.assertItemsEqual(result_before_offline, result_after_offline, - 'Checksums before and after ' - 'bringing bricks offline are not equal') + self.assertEqual(sorted(result_before_offline), + sorted(result_after_offline), + 'Checksums before and after bringing bricks' + ' offline are not equal') g.log.info('Checksums before and after ' 'bringing bricks offline are equal') @@ -271,8 +269,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and after bringing bricks' + ' online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') diff --git a/tests/functional/arbiter/test_self_heal_symbolic_links.py b/tests/functional/arbiter/test_self_heal_symbolic_links.py index 6907f8805..655ea7564 100644 --- a/tests/functional/arbiter/test_self_heal_symbolic_links.py +++ b/tests/functional/arbiter/test_self_heal_symbolic_links.py @@ -169,10 +169,7 @@ class TestSelfHeal(GlusterBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) @@ -196,9 +193,10 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks offline # and after bringing bricks offline - self.assertItemsEqual(result_before_offline, result_after_offline, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_offline), + sorted(result_after_offline), + 'Checksums before and after bringing bricks ' + 'online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') @@ -323,8 +321,9 @@ class TestSelfHeal(GlusterBaseClass): # Checking arequals before bringing bricks online # and after bringing bricks online - self.assertItemsEqual(result_before_online, result_after_online, - 'Checksums before and ' - 'after bringing bricks online are not equal') + self.assertEqual(sorted(result_before_online), + sorted(result_after_online), + 'Checksums before and after bringing bricks ' + 'online are not equal') g.log.info('Checksums before and after bringing bricks online ' 'are equal') diff --git a/tests/functional/arbiter/test_split_brain.py b/tests/functional/arbiter/test_split_brain.py new file mode 100644 index 000000000..e2684be49 --- /dev/null +++ b/tests/functional/arbiter/test_split_brain.py @@ -0,0 +1,165 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + wait_for_bricks_to_be_online) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.heal_libs import is_volume_in_split_brain +from glustolibs.gluster.volume_libs import get_subvols + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestSplitBrain(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(cls.mounts, True) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @classmethod + def tearDownClass(cls): + """ + Cleanup Volume + """ + ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + + cls.get_super_method(cls, 'tearDownClass')() + + def _bring_bricks_online(self): + """ + Bring bricks online and monitor heal completion + """ + # Bring bricks online + ret = bring_bricks_online( + self.mnode, + self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=['volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks online') + + # Wait for volume processes to be online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to wait for volume {} processes to " + "be online".format(self.volname))) + + def test_split_brain(self): + + """ + Description: Create split-brain on files and check if IO's fail + - Disable self-heal and cluster-quorum-type + - Get the bricks from the volume + - Write IO and validate IO + - Bring 1st set of brick offline(1 Data brick and arbiter brick) + - Write IO and validate IO + - Bring 2nd set of bricks offline(1 Data brick and arbiter brick) + - Write IO and validate IO + - Check volume is in split-brain + - Write IO and validate IO - should fail + - Enable self-heal and cluster-quorum-type + - Write IO and validate IO - should fail + """ + # Disable self-heal and cluster-quorum-type + options = {"self-heal-daemon": "off", + "cluster.quorum-type": "none"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for " + "volume %s" % (options, self.volname))) + + # Get the bricks from the volume + sub_vols = get_subvols(self.mnode, self.volname) + self.bricks_to_bring_offline = list(sub_vols['volume_subvols'][0]) + + # Write IO's + write_cmd = ("/usr/bin/env python %s create_files -f 1 " + "--base-file-name test_file --fixed-file-size 1k %s" % + (self.script_upload_path, + self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, write_cmd) + + # Bring 1st set of brick offline(1 Data brick and arbiter brick) + for bricks in ((0, -1), (1, -1)): + down_bricks = [] + for brick in bricks: + down_bricks.append(self.bricks_to_bring_offline[brick]) + ret = bring_bricks_offline(self.volname, down_bricks) + self.assertTrue(ret, 'Failed to bring bricks {} offline'. + format(down_bricks)) + proc = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertTrue( + validate_io_procs([proc], self.mounts), + "IO failed on some of the clients" + ) + + # Bring bricks online + self._bring_bricks_online() + + # Check volume is in split-brain + ret = is_volume_in_split_brain(self.mnode, self.volname) + self.assertTrue(ret, "unable to create split-brain scenario") + g.log.info("Successfully created split brain scenario") + + # Write IO's + proc2 = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertFalse( + validate_io_procs([proc2], self.mounts), + "IO passed on split-brain" + ) + g.log.info("Expected - IO's failed due to split-brain") + + # Enable self-heal and cluster-quorum-type + options = {"self-heal-daemon": "on", + "cluster.quorum-type": "auto"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, ("Unable to set volume option %s for " + "volume %s" % (options, self.volname))) + + # Write IO's + proc3 = g.run_async(self.mounts[0].client_system, write_cmd) + + # Validate I/O + self.assertFalse( + validate_io_procs([proc3], self.mounts), + "IO passed on split-brain" + ) + g.log.info("Expected - IO's failed due to split-brain") diff --git a/tests/functional/arbiter/test_verify_metadata_and_data_heal.py b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py new file mode 100644 index 000000000..d48e36e73 --- /dev/null +++ b/tests/functional/arbiter/test_verify_metadata_and_data_heal.py @@ -0,0 +1,297 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import ( + is_heal_complete, is_volume_in_split_brain, monitor_heal_completion, + wait_for_self_heal_daemons_to_be_online) +from glustolibs.gluster.heal_ops import (disable_self_heal_daemon, + enable_self_heal_daemon, trigger_heal) +from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal, + del_user, group_add, group_del) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import list_all_files_and_dirs_mounts + + +@runs_on([['arbiter', 'replicated'], ['glusterfs']]) +class TestMetadataAndDataHeal(GlusterBaseClass): + '''Description: Verify shd heals files after performing metadata and data + operations while a brick was down''' + def _dac_helper(self, host, option): + '''Helper for creating, deleting users and groups''' + + # Permission/Ownership changes required only for `test_metadata..` + # tests, using random group and usernames + if 'metadata' not in self.test_dir: + return + + if option == 'create': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_add(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not add_user(host, 'qa_all', group='qa_func'): + raise ExecutionError('Unable to {} user {} under {} on ' + '{}'.format(option, 'qa_all', 'qa_func', + host)) + elif option == 'delete': + # Groups + for group in ('qa_func', 'qa_system'): + if not group_del(host, group): + raise ExecutionError('Unable to {} group {} on ' + '{}'.format(option, group, host)) + + # User + if not del_user(host, 'qa_all'): + raise ExecutionError('Unable to {} user on {}'.format( + option, host)) + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # A single mount is enough for all the tests + self.mounts = self.mounts[0:1] + self.client = self.mounts[0].client_system + + # Use testcase name as test directory + self.test_dir = self.id().split('.')[-1] + self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir + + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError('Failed to setup and mount ' + '{}'.format(self.volname)) + + # Crete group and user names required for the test + self._dac_helper(host=self.client, option='create') + + def tearDown(self): + # Delete group and user names created as part of setup + self._dac_helper(host=self.client, option='delete') + + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError('Not able to unmount and cleanup ' + '{}'.format(self.volname)) + + self.get_super_method(self, 'tearDown')() + + def _perform_io_and_disable_self_heal(self): + '''Refactor of steps common to all tests: Perform IO, disable heal''' + ret = mkdir(self.client, self.fqpath) + self.assertTrue(ret, + 'Directory creation failed on {}'.format(self.client)) + self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' + # Create 6 dir's, 6 files and 6 files in each subdir with 10K data + file_io = ('''cd {0}; for i in `seq 1 6`; + do mkdir dir.$i; {1} 10K > file.$i; + for j in `seq 1 6`; + do {1} 10K > dir.$i/file.$j; done; + done;'''.format(self.fqpath, self.io_cmd)) + ret, _, err = g.run(self.client, file_io) + self.assertEqual(ret, 0, 'Unable to create directories and data files') + self.assertFalse(err, '{0} failed with {1}'.format(file_io, err)) + + # Disable self heal deamon + self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname), + 'Disabling self-heal-daemon falied') + + def _perform_brick_ops_and_enable_self_heal(self, op_type): + '''Refactor of steps common to all tests: Brick down and perform + metadata/data operations''' + # First brick in the subvol will always be online and used for self + # heal, so make keys match brick index + self.op_cmd = { + # Metadata Operations (owner and permission changes) + 'metadata': { + 2: + '''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \ + dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''', + 3: + '''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \ + dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', + }, + # Data Operations (append data to the files) + 'data': { + 2: + '''cd {0}; for i in `seq 1 3`; + do {1} 2K >> file.$i; + for j in `seq 1 3`; + do {1} 2K >> dir.$i/file.$j; done; + done;''', + 3: + '''cd {0}; for i in `seq 1 3`; + do {1} 3K >> file.$i; + for j in `seq 1 3`; + do {1} 3K >> dir.$i/file.$j; done; + done;''', + # 4 - Will be used for final data consistency check + 4: + '''cd {0}; for i in `seq 1 6`; + do {1} 4K >> file.$i; + for j in `seq 1 6`; + do {1} 4K >> dir.$i/file.$j; done; + done;''', + }, + } + bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(bricks, + 'Not able to get list of bricks in the volume') + + # Make first brick always online and start operations from second brick + for index, brick in enumerate(bricks[1:], start=2): + + # Bring brick offline + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) + + # Perform metadata/data operation + cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Bring brick online + ret = bring_bricks_online( + self.mnode, + self.volname, + brick, + bring_bricks_online_methods='volume_start_force') + + # Assert metadata/data operations resulted in pending heals + self.assertFalse(is_heal_complete(self.mnode, self.volname)) + + # Enable and wait self heal daemon to be online + self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname), + 'Enabling self heal daemon failed') + self.assertTrue( + wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname), + 'Not all self heal daemons are online') + + def _validate_heal_completion_and_arequal(self, op_type): + '''Refactor of steps common to all tests: Validate heal from heal + commands, verify arequal, perform IO and verify arequal after IO''' + + # Validate heal completion + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + 'Self heal is not completed within timeout') + self.assertFalse( + is_volume_in_split_brain(self.mnode, self.volname), + 'Volume is in split brain even after heal completion') + + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + self.assertTrue(subvols, 'Not able to get list of subvols') + arbiter = self.volume_type.find('arbiter') >= 0 + stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) + + # Validate arequal + self._validate_arequal_and_perform_lookup(subvols, stop) + + # Perform some additional metadata/data operations + cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd) + ret, _, err = g.run(self.client, cmd) + self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) + self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) + + # Validate arequal after additional operations + self._validate_arequal_and_perform_lookup(subvols, stop) + + def _validate_arequal_and_perform_lookup(self, subvols, stop): + '''Refactor of steps common to all tests: Validate arequal from bricks + backend and perform a lookup of all files from mount''' + for subvol in subvols: + ret, arequal = collect_bricks_arequal(subvol[0:stop]) + self.assertTrue( + ret, 'Unable to get `arequal` checksum on ' + '{}'.format(subvol[0:stop])) + self.assertEqual( + len(set(arequal)), 1, 'Mismatch of `arequal` ' + 'checksum among {} is identified'.format(subvol[0:stop])) + + # Perform a lookup of all files and directories on mounts + self.assertTrue(list_all_files_and_dirs_mounts(self.mounts), + 'Failed to list all files and dirs from mount') + + def test_metadata_heal_from_shd(self): + '''Description: Verify files heal after switching on `self-heal-daemon` + when metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of metadata heal after switching on ' + '`self heal daemon` is complete') + + def test_metadata_heal_from_heal_cmd(self): + '''Description: Verify files heal after triggering heal command when + metadata operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform + metadata operations + 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal` + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'metadata' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + + # Invoke `glfsheal` + self.assertTrue(trigger_heal(self.mnode, self.volname), + 'Unable to trigger index heal on the volume') + + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info( + 'Pass: Verification of metadata heal via `glfsheal` is complete') + + def test_data_heal_from_shd(self): + '''Description: Verify files heal after triggering heal command when + data operations are performed while a brick was down + + Steps: + 1. Create, mount and run IO on volume + 2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data + operations + 3. Set `self-heal-daemon` to `on` and wait for heal completion + 4. Validate areequal checksum on backend bricks + ''' + op_type = 'data' + self._perform_io_and_disable_self_heal() + self._perform_brick_ops_and_enable_self_heal(op_type=op_type) + self._validate_heal_completion_and_arequal(op_type=op_type) + g.log.info('Pass: Verification of data heal after switching on ' + '`self heal daemon` is complete') diff --git a/tests/functional/authentication/test_auth_allow.py b/tests/functional/authentication/test_auth_allow.py index 194ea189c..1f4b4b067 100644 --- a/tests/functional/authentication/test_auth_allow.py +++ b/tests/functional/authentication/test_auth_allow.py @@ -25,8 +25,7 @@ from glustolibs.gluster.auth_ops import set_auth_allow @runs_on([['replicated', 'distributed', 'distributed-replicated', - 'dispersed', 'distributed-dispersed'], - ['glusterfs']]) + 'dispersed', 'distributed-dispersed'], ['glusterfs']]) class FuseAuthAllow(GlusterBaseClass): """ Tests to verify auth.allow feature on fuse mount. @@ -38,13 +37,10 @@ class FuseAuthAllow(GlusterBaseClass): """ cls.get_super_method(cls, 'setUpClass')() # Create and start volume - g.log.info("Starting volume setup process %s", cls.volname) ret = cls.setup_volume() if not ret: raise ExecutionError("Failed to setup " "and start volume %s" % cls.volname) - g.log.info("Successfully created and started the volume: %s", - cls.volname) def authenticated_mount(self, mount_obj): """ @@ -147,7 +143,6 @@ class FuseAuthAllow(GlusterBaseClass): auth_dict = {'all': [self.mounts[0].client_system]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set authentication") - g.log.info("Successfully set authentication on volume") # Mounting volume on client1 self.authenticated_mount(self.mounts[0]) @@ -179,7 +174,6 @@ class FuseAuthAllow(GlusterBaseClass): auth_dict = {'all': [hostname_client1.strip()]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set authentication") - g.log.info("Successfully set authentication on volume") # Mounting volume on client1 self.authenticated_mount(self.mounts[0]) @@ -204,8 +198,9 @@ class FuseAuthAllow(GlusterBaseClass): """ Cleanup volume """ - g.log.info("Cleaning up volume") ret = self.cleanup_volume() if not ret: raise ExecutionError("Failed to cleanup volume.") - g.log.info("Volume cleanup was successful.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/authentication/test_auth_allow_with_brick_down.py b/tests/functional/authentication/test_auth_allow_with_brick_down.py new file mode 100644 index 000000000..8fe365aed --- /dev/null +++ b/tests/functional/authentication/test_auth_allow_with_brick_down.py @@ -0,0 +1,171 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test cases in this module tests the authentication allow feature +""" +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.auth_ops import set_auth_allow +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline) +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) + + +@runs_on([['distributed-replicated', 'distributed-dispersed'], ['glusterfs']]) +class FuseAuthAllow(GlusterBaseClass): + """ + Tests to verify auth.allow feature on fuse mount. + """ + @classmethod + def setUpClass(cls): + """ + Create and start volume + """ + cls.get_super_method(cls, 'setUpClass')() + # Create and start volume + ret = cls.setup_volume() + if not ret: + raise ExecutionError("Failed to setup " + "and start volume %s" % cls.volname) + + def _authenticated_mount(self, mount_obj): + """ + Mount volume on authenticated client + + Args: + mount_obj(obj): Object of GlusterMount class + """ + # Mount volume + ret = mount_obj.mount() + self.assertTrue(ret, ("Failed to mount %s on client %s" % + (mount_obj.volname, + mount_obj.client_system))) + g.log.info("Successfully mounted %s on client %s", mount_obj.volname, + mount_obj.client_system) + + # Verify mount + ret = mount_obj.is_mounted() + self.assertTrue(ret, ("%s is not mounted on client %s" + % (mount_obj.volname, mount_obj.client_system))) + g.log.info("Verified: %s is mounted on client %s", + mount_obj.volname, mount_obj.client_system) + + def _brick_down_heal(self): + # Create files on mount point using dd command + cmd = ('cd %s;for i in {1..10};' + 'do dd if=/dev/urandom bs=1024 count=1 of=file$i;done;' + % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to createfiles on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Bring brick1 offline + bricks_list = get_all_bricks(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, bricks_list[1]) + self.assertTrue(ret, 'Failed to bring brick1 offline') + ret = are_bricks_offline(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Brick1 is not offline') + g.log.info('Bringing brick1 offline is successful') + + # Bring brick1 back online + ret = bring_bricks_online(self.mnode, self.volname, + [bricks_list[1]]) + self.assertTrue(ret, 'Failed to bring brick1 online') + g.log.info('Bringing brick1 online is successful') + + # Start healing + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not started') + g.log.info('Healing is started') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + + # Check if heal is completed + ret = is_heal_complete(self.mnode, self.volname) + self.assertTrue(ret, 'Heal is not complete') + g.log.info('Heal is completed successfully') + + def test_auth_allow_with_heal(self): + """ + Validating the FUSE authentication volume options with Heal. + Steps: + 1. Setup and start volume + 2. Set auth.allow on volume for client1 using ip of client1 + 3. Mount volume on client1. + 4. Create files on mount point using dd command + 5. Bring down one brick of the volume + 6. Bring the brick back up after few seconds using + "gluster volume start force" + 7. Start volume heal by using gluster volume heal + 8. See the heal status using gluster volume heal info + 9. Set auth.allow on volume for client1 using hostname of client1. + 10. Repeat steps from 3 to 9 + """ + # Setting authentication on volume for client1 using ip + auth_dict = {'all': [self.mounts[0].client_system]} + ret = set_auth_allow(self.volname, self.mnode, auth_dict) + self.assertTrue(ret, "Failed to set authentication") + + # Mounting volume on client1 + self._authenticated_mount(self.mounts[0]) + + # Create files,bring brick down and check heal + self._brick_down_heal() + + # Unmount volume from client1 + ret = self.mounts[0].unmount() + self.assertTrue(ret, ("Failed to unmount volume %s from client %s" + % (self.volname, self.mounts[0].client_system))) + + # Obtain hostname of client1 + ret, hostname_client1, _ = g.run(self.mounts[0].client_system, + "hostname") + self.assertEqual(ret, 0, ("Failed to obtain hostname of client %s" + % self.mounts[0].client_system)) + g.log.info("Obtained hostname of client. IP- %s, hostname- %s", + self.mounts[0].client_system, hostname_client1.strip()) + + # Setting authentication on volume for client1 using hostname + auth_dict = {'all': [hostname_client1.strip()]} + ret = set_auth_allow(self.volname, self.mnode, auth_dict) + self.assertTrue(ret, "Failed to set authentication") + + # Mounting volume on client1 + self._authenticated_mount(self.mounts[0]) + + # Create files,bring brick down and check heal + self._brick_down_heal() + + def tearDown(self): + """ + Cleanup volume + """ + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup volume.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/authentication/test_auth_invalid_values.py b/tests/functional/authentication/test_auth_invalid_values.py index 7fd514b71..a494c8357 100644 --- a/tests/functional/authentication/test_auth_invalid_values.py +++ b/tests/functional/authentication/test_auth_invalid_values.py @@ -27,9 +27,8 @@ from glustolibs.gluster.volume_ops import set_volume_options from glustolibs.gluster.volume_libs import is_volume_exported -@runs_on([['replicated', 'distributed', 'distributed-replicated', - 'dispersed', 'distributed-dispersed'], - ['glusterfs', 'nfs']]) +@runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', + 'distributed-dispersed'], ['glusterfs', 'nfs']]) class AuthInvalidValues(GlusterBaseClass): """ Tests to verify negative scenario in authentication allow and reject @@ -42,13 +41,10 @@ class AuthInvalidValues(GlusterBaseClass): """ cls.get_super_method(cls, 'setUpClass')() # Create and start volume - g.log.info("Starting volume setup process %s", cls.volname) ret = cls.setup_volume() if not ret: raise ExecutionError("Failed to setup " "and start volume %s" % cls.volname) - g.log.info("Successfully created and started the volume: %s", - cls.volname) def set_invalid_auth(self, auth_opt, values_list): """ @@ -157,8 +153,9 @@ class AuthInvalidValues(GlusterBaseClass): """ Cleanup volume """ - g.log.info("Cleaning up volume") ret = self.cleanup_volume() if not ret: raise ExecutionError("Failed to cleanup volume.") - g.log.info("Volume cleanup was successful.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/authentication/test_auth_reject_allow.py b/tests/functional/authentication/test_auth_reject_allow.py index 083b08e1b..6afdc5ae8 100644 --- a/tests/functional/authentication/test_auth_reject_allow.py +++ b/tests/functional/authentication/test_auth_reject_allow.py @@ -28,8 +28,7 @@ from glustolibs.gluster.auth_ops import set_auth_allow, set_auth_reject @runs_on([['replicated', 'distributed', 'distributed-replicated', - 'dispersed', 'distributed-dispersed'], - ['glusterfs']]) + 'dispersed', 'distributed-dispersed'], ['glusterfs']]) class FuseAuthRejectAllow(GlusterBaseClass): """ Tests to verify auth.reject and auth.allow volume options in volume and @@ -42,13 +41,10 @@ class FuseAuthRejectAllow(GlusterBaseClass): """ cls.get_super_method(cls, 'setUpClass')() # Create and start volume - g.log.info("Starting volume setup process %s", cls.volname) ret = cls.setup_volume() if not ret: raise ExecutionError("Failed to setup " "and start volume %s" % cls.volname) - g.log.info("Successfully created and started the volume: %s", - cls.volname) def authenticated_mount(self, mount_obj): """ @@ -167,13 +163,11 @@ class FuseAuthRejectAllow(GlusterBaseClass): auth_dict = {'all': [self.mounts[0].client_system]} ret = set_auth_reject(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.reject volume option.") - g.log.info("Successfully set auth.reject option on volume") # Setting auth.allow on volume for client2 using ip auth_dict = {'all': [self.mounts[1].client_system]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option on volume") # Trying to mount volume on client1 self.unauthenticated_mount(self.mounts[0]) @@ -213,13 +207,11 @@ class FuseAuthRejectAllow(GlusterBaseClass): auth_dict = {'all': [hostname_client1.strip()]} ret = set_auth_reject(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.reject volume option.") - g.log.info("Successfully set auth.reject option on volume") # Setting auth.allow on volume for client2 using hostname auth_dict = {'all': [hostname_client2.strip()]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option on volume") # Trying to mount volume on client1 self.unauthenticated_mount(self.mounts[0]) @@ -251,13 +243,11 @@ class FuseAuthRejectAllow(GlusterBaseClass): auth_dict = {'/d1': [self.mounts[0].client_system]} ret = set_auth_reject(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.reject volume option.") - g.log.info("Successfully set auth.reject option.") # Setting auth.allow on d1 for client2 using ip auth_dict = {'/d1': [self.mounts[1].client_system]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option.") # Creating mount object for sub-directory mount on client1 mount_obj_client1 = copy.deepcopy(self.mounts[0]) @@ -291,13 +281,11 @@ class FuseAuthRejectAllow(GlusterBaseClass): auth_dict = {'/d1': [hostname_client1.strip()]} ret = set_auth_reject(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.reject volume option.") - g.log.info("Successfully set auth.reject option.") # Setting auth.allow on d1 for client2 using hostname auth_dict = {'/d1': [hostname_client2.strip()]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option.") # Trying to mount d1 on client1 self.unauthenticated_mount(mount_obj_client1) @@ -322,8 +310,9 @@ class FuseAuthRejectAllow(GlusterBaseClass): """ Cleanup volume """ - g.log.info("Cleaning up volume") ret = self.cleanup_volume() if not ret: raise ExecutionError("Failed to cleanup volume.") - g.log.info("Volume cleanup was successful.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/authentication/test_authentication_allow_blank.py b/tests/functional/authentication/test_authentication_allow_blank.py index dab0baab3..4bef00f31 100644 --- a/tests/functional/authentication/test_authentication_allow_blank.py +++ b/tests/functional/authentication/test_authentication_allow_blank.py @@ -28,8 +28,7 @@ from glustolibs.gluster.volume_libs import cleanup_volume @runs_on([['replicated', 'distributed-replicated', 'dispersed', - 'distributed-dispersed'], - ['glusterfs']]) + 'distributed-dispersed'], ['glusterfs']]) class AuthAllowEmptyString(GlusterBaseClass): """ Tests to verify auth.allow functionality on Volume and Fuse subdir @@ -38,13 +37,12 @@ class AuthAllowEmptyString(GlusterBaseClass): """ Setup Volume """ + # Calling GlusterBaseClass Setup + self.get_super_method(self, 'setUp')() + ret = self.setup_volume() if not ret: raise ExecutionError("Failed to setup volume") - g.log.info("Volume %s has been setup successfully", self.volname) - - # Calling GlusterBaseClass Setup - self.get_super_method(self, 'setUp')() def test_validate_authallow(self): """ @@ -76,5 +74,6 @@ class AuthAllowEmptyString(GlusterBaseClass): if not ret: raise ExecutionError("Failed to Cleanup the " "Volume %s" % self.volname) - g.log.info("Volume deleted successfully " - ": %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/authentication/test_fusereject.py b/tests/functional/authentication/test_fusereject.py index 19bafdff7..6600f4e49 100644 --- a/tests/functional/authentication/test_fusereject.py +++ b/tests/functional/authentication/test_fusereject.py @@ -28,8 +28,7 @@ from glustolibs.gluster.mount_ops import (mount_volume, is_mounted, from glustolibs.gluster.brick_libs import get_all_bricks, are_bricks_online -@runs_on([['replicated'], - ['glusterfs']]) +@runs_on([['replicated'], ['glusterfs']]) class AuthRejectVol(GlusterBaseClass): """ Create a replicated volume and start the volume and check @@ -40,18 +39,18 @@ class AuthRejectVol(GlusterBaseClass): """ Creating a replicated volume and checking if it is started """ + # Calling GlusterBaseClass Setup + self.get_super_method(self, 'setUp')() + ret = self.setup_volume() if not ret: raise ExecutionError("Failed to setup volume %s" % self.volname) - g.log.info("Volume %s has been setup successfully", self.volname) # Check if volume is started volinfo = get_volume_info(self.mnode, self.volname) if volinfo[self.volname]['statusStr'] != "Started": raise ExecutionError("Volume has not Started") g.log.info("Volume is started") - # Calling GlusterBaseClass Setup - self.get_super_method(self, 'setUp')() def tearDown(self): """ @@ -64,13 +63,10 @@ class AuthRejectVol(GlusterBaseClass): raise ExecutionError("Failed to unmount volume from client" " %s" % client) g.log.info("Unmounted Volume from client %s successfully", client) - g.log.info("Cleaning up volume") ret = self.cleanup_volume() if not ret: raise ExecutionError("Failed to Cleanup the " "Volume %s" % self.volname) - g.log.info("Volume deleted successfully " - ": %s", self.volname) # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() @@ -106,7 +102,6 @@ class AuthRejectVol(GlusterBaseClass): # Fetching all the bricks self.mountpoint = "/mnt/testvol" - g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Brick list is empty") g.log.info("Brick List : %s", bricks_list) @@ -114,7 +109,6 @@ class AuthRejectVol(GlusterBaseClass): # Check are bricks online ret = are_bricks_online(self.mnode, self.volname, bricks_list) self.assertTrue(ret, "All bricks are not online") - g.log.info("All bricks are online") # Using this way to check because of bug 1586036 # Mounting volume @@ -144,10 +138,8 @@ class AuthRejectVol(GlusterBaseClass): # Mounting the vol on client2 # Check bricks are online - g.log.info("Brick List : %s", bricks_list) ret = are_bricks_online(self.mnode, self.volname, bricks_list) self.assertTrue(ret, "All bricks are not online") - g.log.info("All bricks are online") # Mounting Volume ret, _, _ = mount_volume(self.volname, self.mount_type, @@ -162,7 +154,6 @@ class AuthRejectVol(GlusterBaseClass): user='root') self.assertTrue(out, "Volume %s has failed to mount" % self.volname) - g.log.info("Volume is mounted successfully %s", self.volname) # Reset Volume ret, _, _ = volume_reset(mnode=self.mnode, volname=self.volname) @@ -170,10 +161,8 @@ class AuthRejectVol(GlusterBaseClass): g.log.info("Volume %s reset operation is successful", self.volname) # Checking if bricks are online - g.log.info("Brick List : %s", bricks_list) ret = are_bricks_online(self.mnode, self.volname, bricks_list) self.assertTrue(ret, "All bricks are not online") - g.log.info("All bricks are online") # Mounting Volume ret, _, _ = mount_volume(self.volname, self.mount_type, diff --git a/tests/functional/authentication/test_verify_auth_reject_precedence.py b/tests/functional/authentication/test_verify_auth_reject_precedence.py index d51e61443..ce8420690 100644 --- a/tests/functional/authentication/test_verify_auth_reject_precedence.py +++ b/tests/functional/authentication/test_verify_auth_reject_precedence.py @@ -28,8 +28,7 @@ from glustolibs.gluster.auth_ops import set_auth_allow, set_auth_reject @runs_on([['replicated', 'distributed', 'distributed-replicated', - 'dispersed', 'distributed-dispersed'], - ['glusterfs']]) + 'dispersed', 'distributed-dispersed'], ['glusterfs']]) class VerifyAuthRejectPrecedence(GlusterBaseClass): """ Tests to verify auth.reject precedence over auth.allow option. @@ -41,13 +40,10 @@ class VerifyAuthRejectPrecedence(GlusterBaseClass): """ cls.get_super_method(cls, 'setUpClass')() # Create and start volume - g.log.info("Starting volume setup process %s", cls.volname) ret = cls.setup_volume() if not ret: raise ExecutionError("Failed to setup " "and start volume %s" % cls.volname) - g.log.info("Successfully created and started the volume: %s", - cls.volname) def authenticated_mount(self, mount_obj): """ @@ -182,14 +178,12 @@ class VerifyAuthRejectPrecedence(GlusterBaseClass): auth_dict = {'all': ['*']} ret = set_auth_reject(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.reject volume option.") - g.log.info("Successfully set auth.reject option on volume") # Setting auth.allow on volume for client1 and client2 using ip auth_dict = {'all': [self.mounts[0].client_system, self.mounts[0].client_system]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option on volume") # Trying to mount volume on client1 self.unauthenticated_mount(self.mounts[0]) @@ -230,7 +224,6 @@ class VerifyAuthRejectPrecedence(GlusterBaseClass): auth_dict = {'all': [hostname_client1, hostname_client2]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option on volume") # Trying to mount volume on client1 self.unauthenticated_mount(self.mounts[0]) @@ -255,14 +248,12 @@ class VerifyAuthRejectPrecedence(GlusterBaseClass): auth_dict = {'/d1': ['*']} ret = set_auth_reject(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.reject volume option.") - g.log.info("Successfully set auth.reject option.") # Setting auth.allow on d1 for client1 and client2 using ip auth_dict = {'/d1': [self.mounts[0].client_system, self.mounts[1].client_system]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option.") # Creating mount object for sub-directory mount on client1 mount_obj_client1 = copy.deepcopy(self.mounts[0]) @@ -296,7 +287,6 @@ class VerifyAuthRejectPrecedence(GlusterBaseClass): auth_dict = {'/d1': [hostname_client1, hostname_client2]} ret = set_auth_allow(self.volname, self.mnode, auth_dict) self.assertTrue(ret, "Failed to set auth.allow volume option") - g.log.info("Successfully set auth.allow option.") # Trying to mount d1 on client1 self.unauthenticated_mount(mount_obj_client1) @@ -320,8 +310,9 @@ class VerifyAuthRejectPrecedence(GlusterBaseClass): """ Cleanup volume """ - g.log.info("Cleaning up volume") ret = self.cleanup_volume() if not ret: raise ExecutionError("Failed to cleanup volume.") - g.log.info("Volume cleanup was successful.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/authentication/test_vol_auth.py b/tests/functional/authentication/test_vol_auth.py index 646ab3520..fa5e34a2f 100644 --- a/tests/functional/authentication/test_vol_auth.py +++ b/tests/functional/authentication/test_vol_auth.py @@ -30,27 +30,26 @@ from glustolibs.gluster.brick_libs import get_all_bricks, are_bricks_online from glustolibs.gluster.volume_libs import cleanup_volume -@runs_on([['replicated'], - ['glusterfs']]) +@runs_on([['replicated'], ['glusterfs']]) class AuthRejectVol(GlusterBaseClass): """ Create a replicated volume and start the volume and check if volume is started """ def setUp(self): + # Calling GlusterBaseClass Setup + self.get_super_method(self, 'setUp')() + # Setup Volume to create a replicated volume ret = self.setup_volume() if not ret: raise ExecutionError("Failed to setup volume %s" % self.volname) - g.log.info("Volume %s has been setup successfully", self.volname) # Check if volume is started volinfo = get_volume_info(self.mnode, self.volname) if volinfo[self.volname]['statusStr'] != "Started": raise ExecutionError("Volume has not Started") g.log.info("Volume is started.") - # Calling GlusterBaseClass Setup - self.get_super_method(self, 'setUp')() def tearDown(self): # tearDown for every test @@ -59,8 +58,6 @@ class AuthRejectVol(GlusterBaseClass): if not ret: raise ExecutionError("Failed to Cleanup the " "Volume %s" % self.volname) - g.log.info("Volume deleted successfully " - ": %s", self.volname) # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() @@ -90,7 +87,6 @@ class AuthRejectVol(GlusterBaseClass): for client in self.clients: # Fetching all the bricks self.mountpoint = '/mnt/testvol' - g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Brick list is empty") g.log.info("Brick List : %s", bricks_list) @@ -98,7 +94,6 @@ class AuthRejectVol(GlusterBaseClass): # Check are bricks online ret = are_bricks_online(self.mnode, self.volname, bricks_list) self.assertTrue(ret, "All bricks are not online") - g.log.info("All bricks are online") # Creating directory to mount cmd = ("mkdir -p /mnt/testvol") @@ -138,7 +133,6 @@ class AuthRejectVol(GlusterBaseClass): # Check if bricks are online and Mounting the vol on client1 # Fetching bricks - g.log.info("Fetching bricks for the volume : %s", self.volname) bricks_list = get_all_bricks(self.mnode, self.volname) self.assertIsNotNone(bricks_list, "Brick list is empty") g.log.info("Brick List : %s", bricks_list) @@ -146,7 +140,6 @@ class AuthRejectVol(GlusterBaseClass): # Checking if bricks are online ret = are_bricks_online(self.mnode, self.volname, bricks_list) self.assertTrue(ret, "All bricks are not online") - g.log.info("All bricks are online") # Creating directory to mount cmd = ("mkdir -p /mnt/testvol") diff --git a/tests/functional/bvt/test_basic.py b/tests/functional/bvt/test_basic.py index a031850cf..bf6c94958 100644 --- a/tests/functional/bvt/test_basic.py +++ b/tests/functional/bvt/test_basic.py @@ -44,7 +44,6 @@ class TestGlusterdSanity(GlusterBaseClass): peers are in connected state after glusterd restarts. """ # restart glusterd on all servers - g.log.info("Restart glusterd on all servers %s", self.servers) ret = restart_glusterd(self.servers) self.assertTrue(ret, ("Failed to restart glusterd on all servers %s", self.servers)) @@ -52,15 +51,12 @@ class TestGlusterdSanity(GlusterBaseClass): self.servers) # Check if glusterd is running on all servers(expected: active) - g.log.info("Check if glusterd is running on all servers %s" - "(expected: active)", self.servers) ret = is_glusterd_running(self.servers) self.assertEqual(ret, 0, ("Glusterd is not running on all servers %s", self.servers)) g.log.info("Glusterd is running on all the servers %s", self.servers) # Stop glusterd on all servers - g.log.info("Stop glusterd on all servers %s", self.servers) ret = stop_glusterd(self.servers) self.assertTrue(ret, ("Failed to stop glusterd on all servers %s", self.servers)) @@ -68,8 +64,6 @@ class TestGlusterdSanity(GlusterBaseClass): self.servers) # Check if glusterd is running on all servers(expected: not running) - g.log.info("Check if glusterd is running on all servers %s" - "(expected: not running)", self.servers) ret = is_glusterd_running(self.servers) self.assertNotEqual(ret, 0, ("Glusterd is still running on some " "servers %s", self.servers)) @@ -77,7 +71,6 @@ class TestGlusterdSanity(GlusterBaseClass): self.servers) # Start glusterd on all servers - g.log.info("Start glusterd on all servers %s", self.servers) ret = start_glusterd(self.servers) self.assertTrue(ret, ("Failed to start glusterd on all servers %s", self.servers)) @@ -85,8 +78,6 @@ class TestGlusterdSanity(GlusterBaseClass): self.servers) # Check if glusterd is running on all servers(expected: active) - g.log.info("Check if glusterd is running on all servers %s" - "(expected: active)", self.servers) ret = is_glusterd_running(self.servers) self.assertEqual(ret, 0, ("Glusterd is not running on all servers %s", self.servers)) @@ -96,10 +87,8 @@ class TestGlusterdSanity(GlusterBaseClass): time.sleep(30) # Validate all the peers are in connected state - g.log.info("Validating all the peers are in Cluster and Connected") ret = self.validate_peers_are_connected() self.assertTrue(ret, "Validating Peers to be in Cluster Failed") - g.log.info("All peers are in connected state") self.test_method_complete = True @@ -108,7 +97,6 @@ class TestGlusterdSanity(GlusterBaseClass): """ if not self.test_method_complete: # restart glusterd on all servers - g.log.info("Restart glusterd on all servers %s", self.servers) ret = restart_glusterd(self.servers) if not ret: raise ExecutionError("Failed to restart glusterd on all " @@ -120,12 +108,10 @@ class TestGlusterdSanity(GlusterBaseClass): time.sleep(30) # Validate all the peers are in connected state - g.log.info("Validating all the peers are in Cluster and Connected") ret = self.validate_peers_are_connected() if not ret: raise ExecutionError("Validating Peers to be in Cluster " "Failed") - g.log.info("All peers are in connected state") # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/bvt/test_cvt.py b/tests/functional/bvt/test_cvt.py index 9a2bcd9f7..f8cb4f2ba 100644 --- a/tests/functional/bvt/test_cvt.py +++ b/tests/functional/bvt/test_cvt.py @@ -74,8 +74,6 @@ class GlusterBasicFeaturesSanityBaseClass(GlusterBaseClass): cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts - g.log.info("Upload io scripts to clients %s for running IO on " - "mounts", cls.clients) cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") ret = upload_scripts(cls.clients, cls.script_upload_path) @@ -86,6 +84,14 @@ class GlusterBasicFeaturesSanityBaseClass(GlusterBaseClass): cls.clients) cls.counter = 1 + + # Temporary code: + # Additional checks to gather infomartion from all + # servers for Bug 1810901 and setting log level to debug. + ret = set_volume_options(cls.mnode, 'all', + {'cluster.daemon-log-level': 'DEBUG'}) + if not ret: + g.log.error('Failed to set cluster.daemon-log-level to DEBUG') # int: Value of counter is used for dirname-start-num argument for # file_dir_ops.py create_deep_dirs_with_files. @@ -109,11 +115,21 @@ class GlusterBasicFeaturesSanityBaseClass(GlusterBaseClass): self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") ret = self.setup_volume_and_mount_volume(mounts=self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") - g.log.info("Successful in Setup Volume and Mount Volume") + + # Temporary code: + # Additional checks to gather infomartion from all + # servers for Bug 1810901 and setting log level to debug. + for opt in ('diagnostics.brick-log-level', + 'diagnostics.client-log-level', + 'diagnostics.brick-sys-log-level', + 'diagnostics.client-sys-log-level'): + ret = set_volume_options(self.mnode, self.volname, + {opt: 'DEBUG'}) + if not ret: + g.log.error('Failed to set volume option %s', opt) # Start IO on mounts g.log.info("Starting IO on all mounts...") @@ -124,7 +140,7 @@ class GlusterBasicFeaturesSanityBaseClass(GlusterBaseClass): cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " "--dirname-start-num %d " "--dir-depth 2 " - "--dir-length 15 " + "--dir-length 10 " "--max-num-of-dirs 5 " "--num-of-files 5 %s" % ( self.script_upload_path, @@ -132,7 +148,7 @@ class GlusterBasicFeaturesSanityBaseClass(GlusterBaseClass): proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) - self.counter = self.counter + 10 + self.counter += 10 self.io_validation_complete = False # Adding a delay of 15 seconds before test method starts. This @@ -148,26 +164,19 @@ class GlusterBasicFeaturesSanityBaseClass(GlusterBaseClass): # Wait for IO to complete if io validation is not executed in the # test method if not self.io_validation_complete: - g.log.info("Wait for IO to complete as IO validation did not " - "succeed in test method") ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) if not ret: raise ExecutionError("IO failed on some of the clients") - g.log.info("IO is successful on all mounts") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) if not ret: raise ExecutionError("Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") # Unmount Volume and Cleanup Volume - g.log.info("Starting to Unmount Volume and Cleanup Volume") ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() @@ -190,7 +199,6 @@ class TestGlusterExpandVolumeSanity(GlusterBasicFeaturesSanityBaseClass): - validate IO """ # Log Volume Info and Status before expanding the volume. - g.log.info("Logging volume info and Status before expanding volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -198,24 +206,17 @@ class TestGlusterExpandVolumeSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Expanding volume by adding bricks to the volume when IO in progress - g.log.info("Start adding bricks to volume when IO in progress") ret = expand_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, ("Failed to expand the volume when IO in " "progress on volume %s", self.volname)) - g.log.info("Expanding volume when IO in progress is successful on " - "volume %s", self.volname) # Wait for volume processes to be online - g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) - g.log.info("Successful in waiting for volume %s processes to be " - "online", self.volname) # Log Volume Info and Status after expanding the volume - g.log.info("Logging volume info and Status after expanding volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -223,14 +224,11 @@ class TestGlusterExpandVolumeSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Verify volume's all process are online - g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online", self.volname)) - g.log.info("Volume %s : All process are online", self.volname) # Start Rebalance - g.log.info("Starting Rebalance on the volume") ret, _, _ = rebalance_start(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " "%s", self.volname)) @@ -242,15 +240,14 @@ class TestGlusterExpandVolumeSanity(GlusterBasicFeaturesSanityBaseClass): _, _, _ = rebalance_status(self.mnode, self.volname) # Wait for rebalance to complete - g.log.info("Waiting for rebalance to complete") - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", self.volname) # Check Rebalance status after rebalance is complete - g.log.info("Checking Rebalance status") ret, _, _ = rebalance_status(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to get rebalance status for the " "volume %s", self.volname)) @@ -263,10 +260,8 @@ class TestGlusterExpandVolumeSanity(GlusterBasicFeaturesSanityBaseClass): self.assertTrue(ret, "IO failed on some of the clients") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") @runs_on([['distributed', 'distributed-replicated', 'distributed-dispersed'], @@ -283,7 +278,6 @@ class TestGlusterShrinkVolumeSanity(GlusterBasicFeaturesSanityBaseClass): - validate IO """ # Log Volume Info and Status before shrinking the volume. - g.log.info("Logging volume info and Status before shrinking volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -291,23 +285,19 @@ class TestGlusterShrinkVolumeSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Shrinking volume by removing bricks from volume when IO in progress - g.log.info("Start removing bricks from volume when IO in progress") ret = shrink_volume(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to shrink the volume when IO in " "progress on volume %s", self.volname)) g.log.info("Shrinking volume when IO in progress is successful on " "volume %s", self.volname) # Wait for volume processes to be online - g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) - g.log.info("Successful in waiting for volume %s processes to be " - "online", self.volname) # Log Volume Info and Status after shrinking the volume - g.log.info("Logging volume info and Status after shrinking volume") ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -315,13 +305,9 @@ class TestGlusterShrinkVolumeSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Verify volume's all process are online - g.log.info("Verifying volume's all process are online after " - "shrinking volume") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online", self.volname)) - g.log.info("Volume %s : All process are online after shrinking volume", - self.volname) # Validate IO ret = validate_io_procs(self.all_mounts_procs, self.mounts) @@ -329,10 +315,8 @@ class TestGlusterShrinkVolumeSanity(GlusterBasicFeaturesSanityBaseClass): self.assertTrue(ret, "IO failed on some of the clients") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") @runs_on([['replicated', 'distributed', 'distributed-replicated', @@ -355,14 +339,11 @@ class TestGlusterVolumeSetSanity(GlusterBasicFeaturesSanityBaseClass): volume_options_list = ["features.uss", "features.shard"] # enable and validate the volume options - g.log.info("Setting the volume options: %s", volume_options_list) ret = enable_and_validate_volume_options(self.mnode, self.volname, volume_options_list, time_delay=30) self.assertTrue(ret, ("Unable to enable the volume options: %s", volume_options_list)) - g.log.info("Successfully enabled all the volume options: %s", - volume_options_list) # Validate IO ret = validate_io_procs(self.all_mounts_procs, self.mounts) @@ -370,10 +351,8 @@ class TestGlusterVolumeSetSanity(GlusterBasicFeaturesSanityBaseClass): self.assertTrue(ret, "IO failed on some of the clients") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") @runs_on([['replicated', 'distributed', 'distributed-replicated', @@ -388,14 +367,12 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): in progress. """ # Enable Quota - g.log.info("Enabling quota on the volume %s", self.volname) ret, _, _ = quota_enable(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to enable quota on the volume %s", self.volname)) g.log.info("Successfully enabled quota on the volume %s", self.volname) # Check if quota is enabled - g.log.info("Validate Quota is enabled on the volume %s", self.volname) ret = is_quota_enabled(self.mnode, self.volname) self.assertTrue(ret, ("Quota is not enabled on the volume %s", self.volname)) @@ -406,8 +383,6 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): path = "/" # Set Quota limit on the root of the volume - g.log.info("Set Quota Limit on the path %s of the volume %s", - path, self.volname) ret, _, _ = quota_limit_usage(self.mnode, self.volname, path=path, limit="1GB") self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of " @@ -416,8 +391,6 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): path, self.volname) # quota_fetch_list - g.log.info("Get Quota list for path %s of the volume %s", - path, self.volname) quota_list = quota_fetch_list(self.mnode, self.volname, path=path) self.assertIsNotNone(quota_list, ("Failed to get the quota list for " "path %s of the volume %s", @@ -430,7 +403,6 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): "volume %s", path, quota_list, self.volname) # Disable quota - g.log.info("Disable quota on the volume %s", self.volname) ret, _, _ = quota_disable(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to disable quota on the volume %s", self.volname)) @@ -438,7 +410,6 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Check if quota is still enabled (expected : Disabled) - g.log.info("Validate Quota is enabled on the volume %s", self.volname) ret = is_quota_enabled(self.mnode, self.volname) self.assertFalse(ret, ("Quota is still enabled on the volume %s " "(expected: Disable) ", self.volname)) @@ -446,14 +417,12 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Enable Quota - g.log.info("Enabling quota on the volume %s", self.volname) ret, _, _ = quota_enable(self.mnode, self.volname) self.assertEqual(ret, 0, ("Failed to enable quota on the volume %s", self.volname)) g.log.info("Successfully enabled quota on the volume %s", self.volname) # Check if quota is enabled - g.log.info("Validate Quota is enabled on the volume %s", self.volname) ret = is_quota_enabled(self.mnode, self.volname) self.assertTrue(ret, ("Quota is not enabled on the volume %s", self.volname)) @@ -461,8 +430,6 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # quota_fetch_list - g.log.info("Get Quota list for path %s of the volume %s", - path, self.volname) quota_list = quota_fetch_list(self.mnode, self.volname, path=path) self.assertIsNotNone(quota_list, ("Failed to get the quota list for " "path %s of the volume %s", @@ -480,10 +447,8 @@ class TestQuotaSanity(GlusterBasicFeaturesSanityBaseClass): self.assertTrue(ret, "IO failed on some of the clients") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") @runs_on([['replicated', 'distributed', 'distributed-replicated', @@ -500,8 +465,6 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): """ snap_name = "snap_cvt" # Create Snapshot - g.log.info("Creating snapshot %s of the volume %s", - snap_name, self.volname) ret, _, _ = snap_create(self.mnode, self.volname, snap_name) self.assertEqual(ret, 0, ("Failed to create snapshot with name %s " " of the volume %s", snap_name, @@ -510,8 +473,6 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): snap_name, self.volname) # List Snapshot - g.log.info("Listing the snapshot created for the volume %s", - self.volname) snap_list = get_snap_list(self.mnode) self.assertIsNotNone(snap_list, "Unable to get the Snapshot list") self.assertIn(snap_name, snap_list, @@ -520,8 +481,6 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): snap_name) # Activate the snapshot - g.log.info("Activating snapshot %s of the volume %s", - snap_name, self.volname) ret, _, _ = snap_activate(self.mnode, snap_name) self.assertEqual(ret, 0, ("Failed to activate snapshot with name %s " " of the volume %s", snap_name, @@ -533,8 +492,6 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): uss_options = ["features.uss"] if self.mount_type == "cifs": uss_options.append("features.show-snapshot-directory") - g.log.info("Enable uss options %s on the volume %s", uss_options, - self.volname) ret = enable_and_validate_volume_options(self.mnode, self.volname, uss_options, time_delay=30) @@ -544,14 +501,11 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): uss_options, self.volname) # Viewing snapshot from mount - g.log.info("Viewing Snapshot %s from mounts:", snap_name) ret = view_snaps_from_mount(self.mounts, snap_name) self.assertTrue(ret, ("Failed to View snap %s from mounts", snap_name)) g.log.info("Successfully viewed snap %s from mounts", snap_name) # De-Activate the snapshot - g.log.info("Deactivating snapshot %s of the volume %s", - snap_name, self.volname) ret, _, _ = snap_deactivate(self.mnode, snap_name) self.assertEqual(ret, 0, ("Failed to deactivate snapshot with name %s " " of the volume %s", snap_name, @@ -561,8 +515,6 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): # Viewing snapshot from mount (.snaps shouldn't be listed from mount) for mount_obj in self.mounts: - g.log.info("Viewing Snapshot %s from mount %s:%s", snap_name, - mount_obj.client_system, mount_obj.mountpoint) ret = view_snaps_from_mount(mount_obj, snap_name) self.assertFalse(ret, ("Still able to View snap %s from mount " "%s:%s", snap_name, @@ -571,8 +523,6 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): g.log.info("%s not listed under .snaps from mount %s:%s", snap_name, mount_obj.client_system, mount_obj.mountpoint) - g.log.info("%s not listed under .snaps from mounts after " - "deactivating ", snap_name) # Validate IO ret = validate_io_procs(self.all_mounts_procs, self.mounts) @@ -580,10 +530,8 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass): self.assertTrue(ret, "IO failed on some of the clients") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") @runs_on([['replicated', 'distributed-replicated'], @@ -601,8 +549,6 @@ class TestGlusterReplaceBrickSanity(GlusterBasicFeaturesSanityBaseClass): - validate IO """ # Log Volume Info and Status before replacing brick from the volume. - g.log.info("Logging volume info and Status before replacing brick " - "from the volume %s", self.volname) ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -610,23 +556,17 @@ class TestGlusterReplaceBrickSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Replace brick from a sub-volume - g.log.info("Replace a faulty brick from the volume") ret = replace_brick_from_volume(self.mnode, self.volname, self.servers, self.all_servers_info) self.assertTrue(ret, "Failed to replace faulty brick from the volume") g.log.info("Successfully replaced faulty brick from the volume") # Wait for volume processes to be online - g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) - g.log.info("Successful in waiting for volume %s processes to be " - "online", self.volname) # Log Volume Info and Status after replacing the brick - g.log.info("Logging volume info and Status after replacing brick " - "from the volume %s", self.volname) ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -634,19 +574,16 @@ class TestGlusterReplaceBrickSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Verify volume's all process are online - g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online", self.volname)) - g.log.info("Volume %s : All process are online", self.volname) # Wait for self-heal to complete - g.log.info("Wait for self-heal to complete") - ret = monitor_heal_completion(self.mnode, self.volname) + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=1800) self.assertTrue(ret, "Self heal didn't complete even after waiting " - "for 20 minutes. 20 minutes is too much a time for " + "for 30 minutes. 30 minutes is too much a time for " "current test workload") - g.log.info("self-heal is successful after replace-brick operation") # Validate IO ret = validate_io_procs(self.all_mounts_procs, self.mounts) @@ -654,10 +591,8 @@ class TestGlusterReplaceBrickSanity(GlusterBasicFeaturesSanityBaseClass): self.assertTrue(ret, "IO failed on some of the clients") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") # This test is disabled on nfs because of bug 1473668. A patch to apply the @@ -698,8 +633,6 @@ class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass): "'disperse.optimistic-change-log' to 'off'") # Log Volume Info and Status before simulating brick failure - g.log.info("Logging volume info and Status before bringing bricks " - "offlien from the volume %s", self.volname) ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -709,22 +642,14 @@ class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass): # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] # Bring bricks offline - g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, ("Failed to bring bricks: %s offline", bricks_to_bring_offline)) - g.log.info("Successful in bringing bricks: %s offline", - bricks_to_bring_offline) # Log Volume Info and Status - g.log.info("Logging volume info and Status after bringing bricks " - "offline from the volume %s", self.volname) ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -732,20 +657,15 @@ class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Validate if bricks are offline - g.log.info("Validating if bricks: %s are offline", - bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, ("Not all the bricks in list: %s are offline", bricks_to_bring_offline)) - g.log.info("Successfully validated that bricks: %s are all offline", - bricks_to_bring_offline) # Add delay before bringing bricks online time.sleep(40) # Bring bricks online - g.log.info("Bring bricks: %s online", bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, ("Failed to bring bricks: %s online", @@ -754,16 +674,12 @@ class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass): bricks_to_bring_offline) # Wait for volume processes to be online - g.log.info("Wait for volume processes to be online") - ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname, + timeout=400) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) - g.log.info("Successful in waiting for volume %s processes to be " - "online", self.volname) # Log Volume Info and Status - g.log.info("Logging volume info and Status after bringing bricks " - "online from the volume %s", self.volname) ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Logging volume info and status failed on " "volume %s", self.volname)) @@ -771,7 +687,6 @@ class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass): self.volname) # Verify volume's all process are online - g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online", self.volname)) @@ -779,9 +694,10 @@ class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass): # Wait for self-heal to complete g.log.info("Wait for self-heal to complete") - ret = monitor_heal_completion(self.mnode, self.volname) + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=1800) self.assertTrue(ret, "Self heal didn't complete even after waiting " - "for 20 minutes. 20 minutes is too much a time for " + "for 30 minutes. 30 minutes is too much a time for " "current test workload") g.log.info("self-heal is successful after replace-brick operation") @@ -791,7 +707,5 @@ class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass): self.assertTrue(ret, "IO failed on some of the clients") # List all files and dirs created - g.log.info("List all files and directories:") ret = list_all_files_and_dirs_mounts(self.mounts) self.assertTrue(ret, "Failed to list all files and dirs") - g.log.info("Listing all files and directories is successful") diff --git a/tests/functional/bvt/test_sosreport_interoperability.py b/tests/functional/bvt/test_sosreport_interoperability.py new file mode 100644 index 000000000..3f1081a57 --- /dev/null +++ b/tests/functional/bvt/test_sosreport_interoperability.py @@ -0,0 +1,141 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-statements, too-many-locals + +from unittest import SkipTest +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import get_dir_contents + + +@runs_on([['arbiter', 'distributed-replicated', 'distributed-dispersed'], + ['glusterfs', 'cifs']]) +class ValidateSosreportBehavior(GlusterBaseClass): + """ + This testcase validates sosreport behavior with glusterfs + """ + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=[self.mounts[0]], + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + """tearDown""" + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + self.get_super_method(self, 'tearDown')() + + def test_sosreport_behavior_for_glusterfs(self): + ''' + Test Steps: + 1) Download sos package if not installed + 2) Fetch Sos version for reference + 3) Note down all files in below locations before taking sosreport: + a) /var/run/gluster + b) /run/gluster + c) /var/lib/glusterd + d) /var/log/glusterfs + 4) Take the sosreport + 5) Again note down the list of all gluster file in locations mentioned + in step#3. The list of files in this step should match step#3 + 6) untar the sosreport to see if gluster files are packaged + ''' + + # Fetching sosreport version for information + ret, version, _ = g.run(self.servers[1], 'rpm -qa|grep sos') + if version[4:9] in ('3.8-6', '3.8-7', '3.8-8'): + raise SkipTest("Skipping testcase as bug is fixed in " + "sosreport version 3.8.9") + g.log.info("sos version is %s", version) + + # Noting down list of entries in gluster directories before sos + gluster_contents_before_sos = [] + gluster_dirs = ('/var/run/gluster*', '/run/gluster*', + '/var/lib/glusterd', '/var/log/glusterfs') + for gdir in gluster_dirs: + ret = get_dir_contents(self.servers[1], gdir, recursive=True) + gluster_contents_before_sos.append(ret) + + # Check for any existing sosreport + var_tmp_dircontents_before_sos = get_dir_contents(self.servers[1], + '/var/tmp/') + + # Collect sosreport + ret, _, err = g.run(self.servers[1], + 'sosreport --batch --name=$HOSTNAME') + self.assertEqual(ret, 0, "failed to fetch sosreport due to {}" + .format(err)) + + # Checking /var/tmp contents + var_tmp_dircontents_after_sos = get_dir_contents(self.servers[1], + '/var/tmp/') + + # Recheck if all gluster files still exist + gluster_contents_after_sos = [] + for gdir in gluster_dirs: + ret = get_dir_contents(self.servers[1], gdir, recursive=True) + gluster_contents_after_sos.append(ret) + + # Compare glusterfiles before and after taking sosreport + # There should be no difference in contents + # Ignoring /var/log/glusterfs ie last element of the list, to avoid + # false negatives as sosreport triggers heal which creates new logs + # and obvious difference in list of entries post sos + self.assertTrue((gluster_contents_before_sos[:-1] == + gluster_contents_after_sos[:-1]), + "Gluster files not matching before and after " + " sosreport generation {} and {}" + .format(gluster_contents_before_sos, + gluster_contents_after_sos)) + + # Untar sosreport to check if gluster files are captured + sosfile = list(set(var_tmp_dircontents_after_sos) - + set(var_tmp_dircontents_before_sos)) + sosfile.sort() + untar_sosfile_cmd = 'tar -xvf /var/tmp/' + sosfile[0] + ' -C /var/tmp/' + ret, _, err = g.run(self.servers[1], untar_sosfile_cmd) + self.assertEqual(ret, 0, "Untar failed due to {}".format(err)) + dirchecks = ('/var/lib/glusterd', '/var/log/glusterfs') + olddirs = [gluster_contents_after_sos[2], + gluster_contents_after_sos[3]] + ret = {} + for after, before in zip(dirchecks, olddirs): + untar_dirpath = '/var/tmp/' + sosfile[0][0:-7] + untardir = untar_dirpath + after + _ = get_dir_contents(self.servers[1], untardir, recursive=True) + ret[after] = list(x.split(untar_dirpath, 1)[-1] for x in _) + if before == gluster_contents_after_sos[2]: + self.assertTrue(bool(before == ret[after]), 'gluster ' + ' sosreport may be missing as they dont match ' + 'with actual contents') + else: + # Need this logic for var/log/glusterfs entries as rotated(.gz) + # logs are not collected by sos + self.assertTrue(all(entry in before for entry in ret[after]), + 'var-log-glusterfs entries in sosreport may be' + ' missing as they dont match with actual ' + 'contents') diff --git a/tests/functional/bvt/test_verify_volume_sanity.py b/tests/functional/bvt/test_verify_volume_sanity.py index 6f92a111b..2013d0b1d 100644 --- a/tests/functional/bvt/test_verify_volume_sanity.py +++ b/tests/functional/bvt/test_verify_volume_sanity.py @@ -32,11 +32,13 @@ from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) class VerifyVolumeSanity(GlusterBaseClass): def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") ret = self.setup_volume_and_mount_volume(mounts=self.mounts) self.assertTrue(ret, ("Failed to Setup_Volume and Mount_Volume")) - g.log.info("Successful in Setup Volume and Mount Volume") def test_volume_sanity(self): """ @@ -78,7 +80,8 @@ class VerifyVolumeSanity(GlusterBaseClass): def tearDown(self): # Stopping the volume - g.log.info("Starting to Unmount Volume and Cleanup Volume") ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) self.assertTrue(ret, ("Failed to Unmount Volume and Cleanup Volume")) - g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/bvt/test_vvt.py b/tests/functional/bvt/test_vvt.py index 4ad7bfc6a..13ffd9dde 100644 --- a/tests/functional/bvt/test_vvt.py +++ b/tests/functional/bvt/test_vvt.py @@ -49,8 +49,6 @@ class VolumeAccessibilityTests(GlusterBaseClass): cls.get_super_method(cls, 'setUpClass')() # Upload io scripts for running IO on mounts - g.log.info("Upload io scripts to clients %s for running IO on " - "mounts", cls.clients) cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" "file_dir_ops.py") ret = upload_scripts(cls.clients, cls.script_upload_path) @@ -67,21 +65,17 @@ class VolumeAccessibilityTests(GlusterBaseClass): self.get_super_method(self, 'setUp')() # Setup_Volume - g.log.info("Starting to Setup Volume %s", self.volname) ret = self.setup_volume() if not ret: raise ExecutionError("Failed to Setup Volume %s" % self.volname) - g.log.info("Successful in Setup Volume %s", self.volname) def tearDown(self): """Cleanup the volume """ # Cleanup Volume - g.log.info("Starting to Setup Volume %s", self.volname) ret = self.cleanup_volume() if not ret: raise ExecutionError("Failed to Setup_Volume %s" % self.volname) - g.log.info("Successful in Setup Volume %s", self.volname) # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() @@ -93,7 +87,6 @@ class VolumeAccessibilityTests(GlusterBaseClass): start of the volume. """ # Verify volume processes are online - g.log.info("Verify volume %s processes are online", self.volname) ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" % self.volname)) @@ -101,27 +94,21 @@ class VolumeAccessibilityTests(GlusterBaseClass): self.volname) # Stop Volume - g.log.info("Stopping Volume %s", self.volname) ret, _, _ = volume_stop(self.mnode, self.volname, force=True) self.assertEqual(ret, 0, "Failed to stop volume %s" % self.volname) g.log.info("Successfully stopped volume %s", self.volname) # Start Volume - g.log.info("Starting Volume %s", self.volname) ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname) g.log.info("Successfully started volume %s", self.volname) # Wait for volume processes to be online - g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Failed to wait for volume %s processes to " "be online", self.volname)) - g.log.info("Successful in waiting for volume %s processes to be " - "online", self.volname) # Log Volume Info and Status - g.log.info("Logging Volume %s Info and Status", self.volname) ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Failed to Log volume %s info and status", self.volname)) @@ -129,7 +116,6 @@ class VolumeAccessibilityTests(GlusterBaseClass): self.volname) # Verify volume's all process are online - g.log.info("Verify volume %s processes are online", self.volname) ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" % self.volname)) @@ -137,7 +123,6 @@ class VolumeAccessibilityTests(GlusterBaseClass): self.volname) # Log Volume Info and Status - g.log.info("Logging Volume %s Info and Status", self.volname) ret = log_volume_info_and_status(self.mnode, self.volname) self.assertTrue(ret, ("Failed to Log volume %s info and status", self.volname)) @@ -145,8 +130,6 @@ class VolumeAccessibilityTests(GlusterBaseClass): self.volname) # Check if glusterd is running on all servers(expected: active) - g.log.info("Check if glusterd is running on all servers" - "(expected: active)") ret = is_glusterd_running(self.servers) self.assertEqual(ret, 0, "Glusterd is not running on all servers") g.log.info("Glusterd is running on all the servers") @@ -156,10 +139,8 @@ class VolumeAccessibilityTests(GlusterBaseClass): """Test File Directory Creation on the volume. """ # Mount Volume - g.log.info("Starting to Mount Volume %s", self.volname) ret = self.mount_volume(self.mounts) self.assertTrue(ret, ("Failed to Mount Volume %s", self.volname)) - g.log.info("Successful in Mounting Volume %s", self.volname) # Start IO on all mounts. all_mounts_procs = [] @@ -178,22 +159,16 @@ class VolumeAccessibilityTests(GlusterBaseClass): proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) - count = count + 10 + count += 10 # Validate IO - g.log.info("Validating IO's") ret = validate_io_procs(all_mounts_procs, self.mounts) self.assertTrue(ret, "IO failed on some of the clients") - g.log.info("Successfully validated all io's") # Get stat of all the files/dirs created. - g.log.info("Get stat of all the files/dirs created.") ret = get_mounts_stat(self.mounts) self.assertTrue(ret, "Stat failed on some of the clients") - g.log.info("Successfully got stat of all files/dirs created") # UnMount Volume - g.log.info("Starting to Unmount Volume %s", self.volname) ret = self.unmount_volume(self.mounts) self.assertTrue(ret, ("Failed to Unmount Volume %s" % self.volname)) - g.log.info("Successfully Unmounted Volume %s", self.volname) diff --git a/tests/functional/ctime_feature/__init__.py b/tests/functional/ctime_feature/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/functional/ctime_feature/__init__.py diff --git a/tests/functional/ctime_feature/test_consistent_timestamps_feature.py b/tests/functional/ctime_feature/test_consistent_timestamps_feature.py new file mode 100644 index 000000000..a5e85e6db --- /dev/null +++ b/tests/functional/ctime_feature/test_consistent_timestamps_feature.py @@ -0,0 +1,205 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from re import sub +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.io.utils import run_crefi +from glustolibs.gluster.brick_libs import get_subvols +from glustolibs.gluster.glusterdir import (rmdir, get_dir_contents) +from glustolibs.gluster.lib_utils import get_extended_attributes_info +from glustolibs.gluster.volume_libs import get_volume_type_info +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', 'dispersed', + 'distributed-dispersed', 'arbiter', 'distributed-arbiter'], + ['glusterfs']]) +class ValidateCtimeFeatures(GlusterBaseClass): + """ + This testcase validates ctime(consistent times) feature + """ + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=[self.mounts[0]], + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """tearDown""" + self.get_super_method(self, 'tearDown')() + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Need to get list of host and resp brickpaths + # Get list of entries under any one path + # Get xattr values on each brick of same path + # Compare them to see if mdata exists and value is same + # For arbiter the value may not be same on arbiter brick + + def validate_xattr_values(self, dirname, ctime=True): + """Validate existence and consistency of a specific + xattr value across replica set + + Args: + dirname (str): parent directory name + Kwargs: + ctime(bool): ctime feature enablement + """ + # pylint: disable=too-many-branches + # Fetch all replica sets(subvols) in the volume + ret = get_subvols(self.mnode, self.volname) + # Iterating through each subvol(replicaset) + for subvol in ret['volume_subvols']: + brick_host_list = {} # Dict for storing host,brickpath pairs + for each in subvol: # Fetching each replica in replica set + # Splitting to brick,hostname pairs + host, brick_path = each.split(':') + brick_host_list[host] = brick_path + # Fetch Complete parent directory path + directory = brick_path + '/' + dirname + # Fetching all entries recursively in a replicaset + entry_list = get_dir_contents(host, directory, recursive=True) + for each in entry_list: + xattr_value = [] # list to store xattr value + # Logic to get xattr values + for host, brickpath in brick_host_list.items(): + # Remove the prefix brick_path from entry-name + each = sub(brick_path, '', each) + # Adding the right brickpath name for fetching xattrval + brick_entry_path = brickpath + each + ret = get_extended_attributes_info(host, + [brick_entry_path], + encoding='hex', + attr_name='trusted' + '.glusterfs.' + 'mdata') + if ret: + ret = ret[brick_entry_path]['trusted.glusterfs.mdata'] + g.log.info("mdata xattr value of %s is %s", + brick_entry_path, ret) + else: + pass + if ctime: + self.assertIsNotNone(ret, "glusterfs.mdata not set on" + " {}" + .format(brick_entry_path)) + g.log.info("mdata xattr %s is set on the back-end" + " bricks", ret) + else: + self.assertIsNone(ret, "trusted.glusterfs.mdata seen " + " on {}" + .format(brick_entry_path)) + g.log.info("mdata xattr %s is not set on the back-end" + " bricks", ret) + xattr_value.append(ret) + voltype = get_volume_type_info(self.mnode, self.volname) + if voltype['volume_type_info']['arbiterCount'] == '0': + ret = bool(xattr_value.count(xattr_value[0]) == + len(xattr_value)) + elif voltype['volume_type_info']['arbiterCount'] == '1': + ret = bool(((xattr_value.count(xattr_value[0])) or + (xattr_value.count(xattr_value[1])) > 1)) + else: + g.log.error("Arbiter value is neither 0 nor 1") + if ctime: + self.assertTrue(ret, 'trusted.glusterfs.mdata' + + ' value not same across bricks for ' + 'entry ' + each) + else: + self.assertTrue(ret, 'trusted.glusterfs.mdata' + + ' seems to be set on some bricks for ' + + each) + + def data_create(self, dirname): + """Create different files and directories""" + dirname = self.mounts[0].mountpoint + '/' + dirname + list_of_fops = ["create", "rename", "chmod", "chown", "chgrp", + "hardlink", "truncate", "setxattr"] + for fops in list_of_fops: + ret = run_crefi(self.mounts[0].client_system, + dirname, 10, 3, 3, thread=4, + random_size=True, fop=fops, minfs=0, + maxfs=102400, multi=True, random_filename=True) + self.assertTrue(ret, "crefi failed during {}".format(fops)) + g.log.info("crefi PASSED FOR fop %s", fops) + g.log.info("IOs were successful using crefi") + + def data_delete(self, dirname): + """Delete created data""" + dirname = self.mounts[0].mountpoint + '/' + dirname + ret = rmdir(self.mounts[0].client_system, dirname, force=True) + self.assertTrue(ret, 'deletion of data failed') + + def test_consistent_timestamps_feature(self): + ''' + Test Steps: + 1. Create a volume, enable features.ctime, mount volume + 2. Create different files and directories + 3. For each entry trusted.glusterfs.mdata must be set + 4. For every entry, above xattr must match on each brick of replicaset + 5. Delete all data created + 6. turn off features.ctime + 7. Again create different files and directories + 8. "glusterfs.mdata xattr" must not be present for any entry + 9. Delete created data + ''' + # pylint: disable=too-many-statements + + # Enable features.ctime + ret = set_volume_options(self.mnode, self.volname, + {'features.ctime': 'on'}) + self.assertTrue(ret, 'failed to enable ctime feature on %s' + % self.volume) + g.log.info("Successfully enabled ctime feature on %s", self.volume) + + # Create different files and directories + self.data_create('ctime-on') + + # Check if mdata xattr has been set for all entries + # Check if the values are same across all replica copies + self.validate_xattr_values('ctime-on') + + # Delete all the existing data + self.data_delete('ctime-on') + + # Disable features.ctime + ret = set_volume_options(self.mnode, self.volname, + {'features.ctime': 'off'}) + self.assertTrue(ret, 'failed to disable features_ctime feature on %s' + % self.volume) + g.log.info("Successfully disabled ctime feature on %s", self.volume) + + # Create different files and directories + self.data_create('ctime-off') + + # Check that mdata xattr has not been set for any entries + self.validate_xattr_values('ctime-off', ctime=False) + + # Delete all the existing data + self.data_delete('ctime-off') diff --git a/tests/functional/ctime_feature/test_consistent_timestamps_on_new_entries.py b/tests/functional/ctime_feature/test_consistent_timestamps_on_new_entries.py new file mode 100644 index 000000000..9b1588bf6 --- /dev/null +++ b/tests/functional/ctime_feature/test_consistent_timestamps_on_new_entries.py @@ -0,0 +1,127 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import (get_volume_options, + set_volume_options) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_file_stat + + +@runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', + 'distributed-dispersed', 'arbiter', 'distributed-arbiter'], + ['glusterfs']]) +class ConsistentValuesAcrossTimeStamps(GlusterBaseClass): + """ + This testcase tests for atime, ctime and mtime to be same when a + file or directory is created + """ + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def validate_timestamp(self, objectpath, objectname): + ret = get_file_stat(self.mounts[0].client_system, objectpath) + self.assertTrue(bool(ret["atime"] == ret["ctime"] == ret["mtime"]), + "a|m|c timestamps on {} are not equal" + .format(objectname)) + g.log.info("a|m|c timestamps on %s are same", objectname) + + def test_time_stamps_on_create(self): + ''' + 1. Create a volume , enable features.ctime, mount volume + 2. Create a directory "dir1" and check the a|m|c times + 3. Create a file "file1" and check the a|m|c times + 4. Again create a new file "file2" as below + command>>> touch file2;stat file2;stat file2 + 5. Check the a|m|c times of "file2" + 6. The atime,ctime,mtime must be same within each object + ''' + # pylint: disable=too-many-statements + + # Check if ctime feature is disabled by default + ret = get_volume_options(self.mnode, self.volname, "features.ctime") + self.assertEqual(ret['features.ctime'], 'off', + 'features_ctime is not disabled by default') + g.log.info("ctime feature is disabled by default as expected") + + # Enable features.ctime + ret = set_volume_options(self.mnode, self.volname, + {'features.ctime': 'on'}) + self.assertTrue(ret, 'failed to enable features_ctime feature on %s' + % self.volume) + g.log.info("Successfully enabled ctime feature on %s", self.volume) + + # Create a directory and check if ctime, mtime, atime is same + objectname = 'dir1' + objectpath = ('%s/%s' % (self.mounts[0].mountpoint, objectname)) + ret = mkdir(self.mounts[0].client_system, objectpath) + self.assertTrue(ret, "{} creation failed".format(objectname)) + g.log.info("%s was successfully created on %s", objectname, + self.mounts[0]) + self.validate_timestamp(objectpath, objectname) + + # Create a file and check if ctime, mtime, atime is same + objectname = 'file1' + objectpath = ('%s/%s' % (self.mounts[0].mountpoint, objectname)) + cmd = ('touch %s' % objectpath) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "touch command to create {} has " + "failed".format(objectname)) + g.log.info("%s was successfully created on %s", objectname, + self.mounts[0]) + self.validate_timestamp(objectpath, objectname) + + # Create a file and issue stat immediately. This step helps in + # testing a corner case where issuing stat immediately was changing + # ctime before the touch was effected on the disk + objectname = 'file2' + objectpath = ('%s/%s' % (self.mounts[0].mountpoint, objectname)) + cmd = ("touch {obj};stat {obj};stat {obj}".format(obj=objectpath)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "touch command to create {} has " + "failed".format(objectname)) + g.log.info("%s was successfully created on %s", objectname, + self.mounts[0]) + self.validate_timestamp(objectpath, objectname) diff --git a/tests/functional/dht/test_access_file.py b/tests/functional/dht/test_access_file.py index ac8074a81..bb56e40b7 100644 --- a/tests/functional/dht/test_access_file.py +++ b/tests/functional/dht/test_access_file.py @@ -52,6 +52,18 @@ class TestFileAccessSubvolDown(GlusterBaseClass): g.log.error("Failed to Setup and Mount Volume") raise ExecutionError("Failed to Setup and Mount Volume") + def tearDown(self): + + # Unmount and cleanup original volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + def test_file_access(self): """ Test file access. @@ -158,15 +170,3 @@ class TestFileAccessSubvolDown(GlusterBaseClass): ret, _, _ = g.run(self.clients[0], ("stat %s" % dstfile)) self.assertEqual(ret, 1, ('stat error on for file %s', dstfile)) g.log.info("dstfile access failed as expected") - - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume - g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) - if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") - - # Calling GlusterBaseClass tearDown - cls.get_super_method(cls, 'tearDownClass')() diff --git a/tests/functional/dht/test_access_file_with_stale_linkto_xattr.py b/tests/functional/dht/test_access_file_with_stale_linkto_xattr.py new file mode 100644 index 000000000..c40d33935 --- /dev/null +++ b/tests/functional/dht/test_access_file_with_stale_linkto_xattr.py @@ -0,0 +1,169 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import add_user, del_user, set_passwd +from glustolibs.gluster.volume_ops import (set_volume_options, + reset_volume_option) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.dht_test_utils import find_new_hashed +from glustolibs.gluster.glusterfile import move_file, is_linkto_file +from glustolibs.gluster.glusterfile import set_file_permissions + + +@runs_on([['distributed', 'distributed-arbiter', + 'distributed-replicated', 'distributed-dispersed'], + ['glusterfs']]) +class TestAccessFileWithStaleLinktoXattr(GlusterBaseClass): + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + + # Setup Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup and Mount Volume") + + # Add a new user to the clients + ret = add_user(self.clients[0], "test_user1") + if ret is not True: + raise ExecutionError("Failed to add user") + + # Set password for user "test_user1" + ret = set_passwd(self.clients[0], "test_user1", "red123") + if ret is not True: + raise ExecutionError("Failed to set password") + + # Geneate ssh key on local host + cmd = 'echo -e "n" | ssh-keygen -f ~/.ssh/id_rsa -q -N ""' + ret, out, _ = g.run_local(cmd) + if ret and "already exists" not in out: + raise ExecutionError("Failed to generate ssh-key") + g.log.info("Successfully generated ssh-key") + + # Perform ssh-copy-id + cmd = ('sshpass -p "red123" ssh-copy-id -o StrictHostKeyChecking=no' + ' test_user1@{}'.format(self.clients[0])) + ret, _, _ = g.run_local(cmd) + if ret: + raise ExecutionError("Failed to perform ssh-copy-id") + g.log.info("Successfully performed ssh-copy-id") + + def tearDown(self): + # Delete the added user + ret = del_user(self.clients[0], "test_user1") + if ret is not True: + raise ExecutionError("Failed to delete user") + + # Reset the volume options set inside the test + for opt in ('performance.parallel-readdir', + 'performance.readdir-ahead'): + ret, _, _ = reset_volume_option(self.mnode, self.volname, opt) + if ret: + raise ExecutionError("Failed to reset the volume option %s" + % opt) + g.log.info("Successfully reset the volume options") + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_access_file_with_stale_linkto_xattr(self): + """ + Description: Checks if the files are accessible as non-root user if + the files have stale linkto xattr. + Steps: + 1) Create a volume and start it. + 2) Mount the volume on client node using FUSE. + 3) Create a file. + 4) Enable performance.parallel-readdir and + performance.readdir-ahead on the volume. + 5) Rename the file in order to create + a linkto file. + 6) Force the linkto xattr values to become stale by changing the dht + subvols in the graph + 7) Login as an non-root user and access the file. + """ + # pylint: disable=protected-access + + # Set permissions on the mount-point + m_point = self.mounts[0].mountpoint + ret = set_file_permissions(self.clients[0], m_point, "-R 777") + self.assertTrue(ret, "Failed to set file permissions") + g.log.info("Successfully set file permissions on mount-point") + + # Creating a file on the mount-point + cmd = 'dd if=/dev/urandom of={}/FILE-1 count=1 bs=16k'.format( + m_point) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "File to create file") + + # Enable performance.parallel-readdir and + # performance.readdir-ahead on the volume + options = {"performance.parallel-readdir": "enable", + "performance.readdir-ahead": "enable"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, "Failed to set volume options") + g.log.info("Successfully set volume options") + + # Finding a file name such that renaming source file to it will form a + # linkto file + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + newhash = find_new_hashed(subvols, "/", "FILE-1") + new_name = str(newhash.newname) + new_host = str(newhash.hashedbrickobject._host) + new_name_path = str(newhash.hashedbrickobject._fqpath)[:-1] + + # Move file such that it hashes to some other subvol and forms linkto + # file + ret = move_file(self.clients[0], "{}/FILE-1".format(m_point), + "{}/{}".format(m_point, new_name)) + self.assertTrue(ret, "Rename failed") + g.log.info('Renamed file %s to %s', + "{}/FILE-1".format(m_point), + "{}/{}".format(m_point, new_name)) + + # Check if "dst_file" is linkto file + ret = is_linkto_file(new_host, + '{}{}'.format(new_name_path, new_name)) + self.assertTrue(ret, "File is not a linkto file") + g.log.info("File is linkto file") + + # Force the linkto xattr values to become stale by changing the dht + # subvols in the graph; for that: + # disable performance.parallel-readdir and + # performance.readdir-ahead on the volume + options = {"performance.parallel-readdir": "disable", + "performance.readdir-ahead": "disable"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, "Failed to disable volume options") + g.log.info("Successfully disabled volume options") + + # Access the file as non-root user + cmd = "ls -lR {}".format(m_point) + ret, _, _ = g.run(self.mounts[0].client_system, cmd, + user="test_user1") + self.assertEqual(ret, 0, "Lookup failed ") + g.log.info("Lookup successful") diff --git a/tests/functional/dht/test_accessing_file_when_dht_layout_is_stale.py b/tests/functional/dht/test_accessing_file_when_dht_layout_is_stale.py new file mode 100644 index 000000000..e7f89d84e --- /dev/null +++ b/tests/functional/dht/test_accessing_file_when_dht_layout_is_stale.py @@ -0,0 +1,181 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_fattr, set_fattr +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.utils import collect_mounts_arequal + + +# pylint: disable=too-many-locals +@runs_on([['distributed'], ['glusterfs']]) +class TestAccessFileStaleLayout(GlusterBaseClass): + def setUp(self): + self.get_super_method(self, 'setUp')() + + self.volume['voltype']['dist_count'] = 2 + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError('Failed to setup and mount volume') + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Failed to umount and cleanup Volume') + + self.get_super_method(self, 'tearDown')() + + def _get_brick_node_and_path(self): + '''Yields list containing brick node and path from first brick of each + subvol + ''' + subvols = get_subvols(self.mnode, self.volname) + for subvol in subvols['volume_subvols']: + subvol[0] += self.dir_path + yield subvol[0].split(':') + + def _assert_file_lookup(self, node, fqpath, when, result): + '''Perform `stat` on `fqpath` from `node` and validate against `result` + ''' + cmd = ('stat {}'.format(fqpath)) + ret, _, _ = g.run(node, cmd) + assert_method = self.assertNotEqual + assert_msg = 'fail' + if result: + assert_method = self.assertEqual + assert_msg = 'pass' + assert_method( + ret, 0, 'Lookup on {} from {} should {} {} layout ' + 'change'.format(fqpath, node, assert_msg, when)) + + def test_accessing_file_when_dht_layout_is_stale(self): + ''' + Description : Checks if a file can be opened and accessed if the dht + layout has become stale. + + Steps: + 1. Create, start and mount a volume consisting 2 subvols on 2 clients + 2. Create a dir `dir` and file `dir/file` from client0 + 3. Take note of layouts of `brick1`/dir and `brick2`/dir of the volume + 4. Validate for success lookup from only one brick path + 5. Re-assign layouts ie., brick1/dir to brick2/dir and vice-versa + 6. Remove `dir/file` from client0 and recreate same file from client0 + and client1 + 7. Validate for success lookup from only one brick path (as layout is + changed file creation path will be changed) + 8. Validate checksum is matched from both the clients + ''' + + # Will be used in _get_brick_node_and_path + self.dir_path = '/dir' + + # Will be used in argument to _assert_file_lookup + file_name = '/file' + + dir_path = self.mounts[0].mountpoint + self.dir_path + file_path = dir_path + file_name + + client0, client1 = self.clients[0], self.clients[1] + fattr = 'trusted.glusterfs.dht' + io_cmd = ('cat /dev/urandom | tr -dc [:space:][:print:] | ' + 'head -c 1K > {}'.format(file_path)) + + # Create a dir from client0 + ret = mkdir(self.clients[0], dir_path) + self.assertTrue(ret, 'Unable to create a directory from mount point') + + # Touch a file with data from client0 + ret, _, _ = g.run(client0, io_cmd) + self.assertEqual(ret, 0, 'Failed to create a file on mount') + + # Yields `node` and `brick-path` from first brick of each subvol + gen = self._get_brick_node_and_path() + + # Take note of newly created directory's layout from org_subvol1 + node1, fqpath1 = next(gen) + layout1 = get_fattr(node1, fqpath1, fattr) + self.assertIsNotNone(layout1, + '{} is not present on {}'.format(fattr, fqpath1)) + + # Lookup on file from node1 should fail as `dir/file` will always get + # hashed to node2 in a 2-brick distribute volume by default + self._assert_file_lookup(node1, + fqpath1 + file_name, + when='before', + result=False) + + # Take note of newly created directory's layout from org_subvol2 + node2, fqpath2 = next(gen) + layout2 = get_fattr(node2, fqpath2, fattr) + self.assertIsNotNone(layout2, + '{} is not present on {}'.format(fattr, fqpath2)) + + # Lookup on file from node2 should pass + self._assert_file_lookup(node2, + fqpath2 + file_name, + when='before', + result=True) + + # Set org_subvol2 directory layout to org_subvol1 and vice-versa + for node, fqpath, layout, vol in ((node1, fqpath1, layout2, (2, 1)), + (node2, fqpath2, layout1, (1, 2))): + ret = set_fattr(node, fqpath, fattr, layout) + self.assertTrue( + ret, 'Failed to set layout of org_subvol{} on ' + 'brick {} of org_subvol{}'.format(vol[0], fqpath, vol[1])) + + # Remove file after layout change from client0 + cmd = 'rm -f {}'.format(file_path) + ret, _, _ = g.run(client0, cmd) + self.assertEqual(ret, 0, 'Failed to delete file after layout change') + + # Create file with same name as above after layout change from client0 + # and client1 + for client in (client0, client1): + ret, _, _ = g.run(client, io_cmd) + self.assertEqual( + ret, 0, 'Failed to create file from ' + '{} after layout change'.format(client)) + + # After layout change lookup on file from node1 should pass + self._assert_file_lookup(node1, + fqpath1 + file_name, + when='after', + result=True) + + # After layout change lookup on file from node2 should fail + self._assert_file_lookup(node2, + fqpath2 + file_name, + when='after', + result=False) + + # Take note of checksum from client0 and client1 + checksums = [None] * 2 + for index, mount in enumerate(self.mounts): + ret, checksums[index] = collect_mounts_arequal(mount, dir_path) + self.assertTrue( + ret, 'Failed to get arequal on client {}'.format( + mount.client_system)) + + # Validate no checksum mismatch + self.assertEqual(checksums[0], checksums[1], + 'Checksum mismatch between client0 and client1') + + g.log.info('Pass: Test accessing file on stale layout is complete.') diff --git a/tests/functional/dht/test_add_brick_rebalance_revised.py b/tests/functional/dht/test_add_brick_rebalance_revised.py new file mode 100644 index 000000000..cc749f47a --- /dev/null +++ b/tests/functional/dht/test_add_brick_rebalance_revised.py @@ -0,0 +1,171 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete, get_rebalance_status) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed'], ['glusterfs']]) +class TestAddBrickRebalanceRevised(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def _run_command_50_times(self, operation, msg): + """ + Run a command 50 times on the mount point and display msg if fails + """ + cmd = ("cd %s; for i in {1..50}; do %s;done" + % (self.mounts[0].mountpoint, operation)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, msg) + + def _add_bricks_to_volume(self): + """Add bricks to the volume""" + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + def _trigger_rebalance_and_wait(self, rebal_force=False): + """Start rebalance with or without force and wait""" + # Trigger rebalance on volume + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=rebal_force) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + def _check_if_files_are_skipped_or_not(self): + """Check if files are skipped or not""" + rebalance_status = get_rebalance_status(self.mnode, self.volname) + ret = int(rebalance_status['aggregate']['skipped']) + self.assertNotEqual(ret, 0, "Hardlink rebalance skipped") + + def _check_arequal_checksum_is_equal_before_and_after(self): + """Check if arequal checksum is equal or not""" + self.assertEqual( + self.arequal_checksum_before, self.arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") + + def test_add_brick_rebalance_with_hardlinks(self): + """ + Test case: + 1. Create a volume, start it and mount it using fuse. + 2. Create 50 files on the mount point and create 50 hardlinks for the + files. + 3. After the files and hard links creation is complete, add bricks to + the volume and trigger rebalance on the volume. + 4. Wait for rebalance to complete and check if files are skipped + or not. + 5. Trigger rebalance on the volume with force and repeat step 4. + """ + # Tuple of ops to be done + ops = (("dd if=/dev/urandom of=file_$i bs=1M count=1", + "Failed to create 50 files"), + ("ln file_$i hardfile_$i", + "Failed to create hard links for files")) + + # Create 50 files on the mount point and create 50 hard links + # for the files. + for operation, msg in ops: + self._run_command_50_times(operation, msg) + + # Collect arequal checksum before add brick op + self.arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # After the file creation is complete, add bricks to the volume + self._add_bricks_to_volume() + + # Trigger rebalance on the volume, wait for it to complete + self._trigger_rebalance_and_wait() + + # Check if hardlinks are skipped or not + self._check_if_files_are_skipped_or_not() + + # Trigger rebalance with force on the volume, wait for it to complete + self._trigger_rebalance_and_wait(rebal_force=True) + + # Check if hardlinks are skipped or not + self._check_if_files_are_skipped_or_not() + + # Compare arequals checksum before and after rebalance + self.arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self._check_arequal_checksum_is_equal_before_and_after() + + def test_add_brick_rebalance_with_sticky_bit(self): + """ + Test case: + 1. Create a volume, start it and mount it using fuse. + 2. Create 50 files on the mount point and set sticky bit to the files. + 3. After the files creation and sticky bit addition is complete, + add bricks to the volume and trigger rebalance on the volume. + 4. Wait for rebalance to complete. + 5. Check for data corruption by comparing arequal before and after. + """ + # Tuple of ops to be done + ops = (("dd if=/dev/urandom of=file_$i bs=1M count=1", + "Failed to create 50 files"), + ("chmod +t file_$i", + "Failed to enable sticky bit for files")) + + # Create 50 files on the mount point and enable sticky bit. + for operation, msg in ops: + self._run_command_50_times(operation, msg) + + # Collect arequal checksum before add brick op + self.arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # After the file creation and sticky bit addtion is complete, + # add bricks to the volume + self._add_bricks_to_volume() + + # Trigger rebalance on the volume, wait for it to complete + self._trigger_rebalance_and_wait() + + # Compare arequals checksum before and after rebalance + self.arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self._check_arequal_checksum_is_equal_before_and_after() diff --git a/tests/functional/dht/test_add_brick_rebalance_with_rsync_in_progress.py b/tests/functional/dht/test_add_brick_rebalance_with_rsync_in_progress.py new file mode 100644 index 000000000..799ce1a60 --- /dev/null +++ b/tests/functional/dht/test_add_brick_rebalance_with_rsync_in_progress.py @@ -0,0 +1,151 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal, run_linux_untar + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed'], ['glusterfs']]) +class TestAddBrickRebalanceWithRsyncInProgress(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 3 + self.volume['voltype']['dist_count'] = 3 + + # Set I/O flag to false + self.is_io_running = False + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Wait for I/O if not completed + if self.is_io_running: + if not self._wait_for_untar_and_rsync_completion(): + g.log.error("I/O failed to stop on clients") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def _wait_for_untar_and_rsync_completion(self): + """Wait for untar and rsync to complete""" + has_process_stopped = [] + for proc in self.list_of_io_processes: + try: + ret, _, _ = proc.async_communicate() + if not ret: + has_process_stopped.append(False) + has_process_stopped.append(True) + except ValueError: + has_process_stopped.append(True) + return all(has_process_stopped) + + def test_add_brick_rebalance_with_rsync_in_progress(self): + """ + Test case: + 1. Create, start and mount a volume. + 2. Create a directory on the mount point and start linux utar. + 3. Create another directory on the mount point and start rsync of + linux untar directory. + 4. Add bricks to the volume + 5. Trigger rebalance on the volume. + 6. Wait for rebalance to complete on volume. + 7. Wait for I/O to complete. + 8. Validate if checksum of both the untar and rsync is same. + """ + # List of I/O processes + self.list_of_io_processes = [] + + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Create a new directory and start rsync + self.rsync_dir = "{}/{}".format(self.mounts[0].mountpoint, + 'rsyncuntarlinux') + ret = mkdir(self.clients[0], self.rsync_dir) + self.assertTrue(ret, "Failed to create dir rsyncuntarlinux for rsync") + + # Start rsync for linux untar on mount point + cmd = ("for i in `seq 1 3`; do rsync -azr {} {};sleep 120;done" + .format(self.linux_untar_dir, self.rsync_dir)) + ret = g.run_async(self.clients[0], cmd) + self.list_of_io_processes.append(ret) + + # Add bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick with rsync on volume %s" + % self.volname) + + # Trigger rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=6000) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + + # Wait for IO to complete. + ret = self._wait_for_untar_and_rsync_completion() + self.assertFalse(ret, "IO didn't complete or failed on client") + self.is_io_running = False + + # As we are running rsync and untar together, there are situations + # when some of the new files created by linux untar is not synced + # through rsync which causes checksum to retrun different value, + # Hence to take care of this corner case we are rerunning rsync. + cmd = "rsync -azr {} {}".format(self.linux_untar_dir, self.rsync_dir) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed sync left behind files") + + # Check daata consistency on both the directories + rsync_checksum = collect_mounts_arequal( + self.mounts[0], path='rsyncuntarlinux/linuxuntar/') + untar_checksum = collect_mounts_arequal(self.mounts[0], + path='linuxuntar') + self.assertEqual( + rsync_checksum, untar_checksum, + "Checksum on untar dir and checksum on rsync dir didn't match") diff --git a/tests/functional/dht/test_add_brick_rebalance_with_self_heal_in_progress.py b/tests/functional/dht/test_add_brick_rebalance_with_self_heal_in_progress.py new file mode 100644 index 000000000..6fb7fe4f0 --- /dev/null +++ b/tests/functional/dht/test_add_brick_rebalance_with_self_heal_in_progress.py @@ -0,0 +1,136 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks, bring_bricks_online +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import kill_process + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestAddBrickRebalanceWithSelfHeal(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.is_io_running = False + + def tearDown(self): + + # If I/O processes are running wait for it to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + [self.mounts[0]]): + raise ExecutionError("Failed to wait for I/O to complete") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_add_brick_rebalance_with_self_heal_in_progress(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Start creating a few files on mount point. + 3. While file creation is going on, kill one of the bricks + in the replica pair. + 4. After file creattion is complete collect arequal checksum + on mount point. + 5. Bring back the brick online by starting volume with force. + 6. Check if all bricks are online and if heal is in progress. + 7. Add bricks to the volume and start rebalance. + 8. Wait for rebalance and heal to complete on volume. + 9. Collect arequal checksum on mount point and compare + it with the one taken in step 4. + """ + # Start I/O from mount point and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + self.list_of_io_processes = [ + g.run_async(self.mounts[0].client_system, cmd)] + self.is_copy_running = True + + # Get a list of all the bricks to kill brick + brick_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(brick_list, "Empty present brick list") + + # Kill brick process of a brick which is being removed + brick = choice(brick_list) + node, _ = brick.split(":") + ret = kill_process(node, process_names="glusterfsd") + self.assertTrue(ret, "Failed to kill brick process of brick %s" + % brick) + + # Validate if I/O was successful or not. + ret = validate_io_procs(self.list_of_io_processes, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.is_copy_running = False + + # Collect arequal checksum before ops + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Bring back the brick online by starting volume with force + ret = bring_bricks_online(self.mnode, self.volname, brick_list, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, "Error in bringing back brick online") + g.log.info('All bricks are online now') + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Wait for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "heal has not yet completed") + g.log.info("Self heal completed") + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_add_brick_rebalance_with_symlink_pointing_out_of_gluster.py b/tests/functional/dht/test_add_brick_rebalance_with_symlink_pointing_out_of_gluster.py new file mode 100644 index 000000000..92135b3b4 --- /dev/null +++ b/tests/functional/dht/test_add_brick_rebalance_with_symlink_pointing_out_of_gluster.py @@ -0,0 +1,133 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.glusterfile import get_md5sum +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestAddBrickRebalanceWithSymlinkPointingOutOfGluster(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.is_io_running = False + + def tearDown(self): + + # Remove the temporary dir created for test + ret, _, _ = g.run(self.mounts[0].client_system, "rm -rf /mnt/tmp/") + if ret: + raise ExecutionError("Failed to remove /mnt/tmp create for test") + + # If I/O processes are running wait for it to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + [self.mounts[0]]): + raise ExecutionError("Failed to wait for I/O to complete") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_add_brick_rebalance_with_symlink_pointing_out_of_volume(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create symlinks on the volume such that the files for the symlink + are outside the volume. + 3. Once all the symlinks are create a data file using dd: + dd if=/dev/urandom of=FILE bs=1024 count=100 + 4. Start copying the file's data to all the symlink. + 5. When data is getting copied to all files through symlink add brick + and start rebalance. + 6. Once rebalance is complete check the md5sum of each file through + symlink and compare if it's same as the orginal file. + """ + # Create symlinks on volume pointing outside volume + cmd = ("cd %s; mkdir -p /mnt/tmp;for i in {1..100};do " + "touch /mnt/tmp/file$i; ln -sf /mnt/tmp/file$i link$i;done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse( + ret, "Failed to create symlinks pointing outside volume") + + # Create a data file using dd inside mount point + cmd = ("cd %s; dd if=/dev/urandom of=FILE bs=1024 count=100" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, "Failed to create data file on mount point") + + # Start copying data from file to symliks + cmd = ("cd %s;for i in {1..100};do cat FILE >> link$i;done" + % self.mounts[0].mountpoint) + self.list_of_io_processes = [ + g.run_async(self.mounts[0].client_system, cmd)] + self.is_copy_running = True + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Validate if I/O was successful or not. + ret = validate_io_procs(self.list_of_io_processes, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.is_copy_running = False + + # Get md5sum of the original file and compare it with that of + # all files through the symlink + original_file_md5sum = get_md5sum(self.mounts[0].client_system, + "{}/FILE".format( + self.mounts[0].mountpoint)) + self.assertIsNotNone(original_file_md5sum, + 'Failed to get md5sum of original file') + for number in range(1, 101): + symlink_md5sum = get_md5sum(self.mounts[0].client_system, + "{}/link{}".format( + self.mounts[0].mountpoint, number)) + self.assertEqual(original_file_md5sum.split(' ')[0], + symlink_md5sum.split(' ')[0], + "Original file and symlink checksum not equal" + " for link%s" % number) + g.log.info("Symlink and original file checksum same on all symlinks") diff --git a/tests/functional/dht/test_add_brick_remove_brick_with_lookups_and_kernal_untar.py b/tests/functional/dht/test_add_brick_remove_brick_with_lookups_and_kernal_untar.py new file mode 100644 index 000000000..4e185733e --- /dev/null +++ b/tests/functional/dht/test_add_brick_remove_brick_with_lookups_and_kernal_untar.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from unittest import skip, SkipTest + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.volume_libs import expand_volume, shrink_volume +from glustolibs.gluster.brickmux_ops import enable_brick_mux, disable_brick_mux +from glustolibs.misc.misc_libs import upload_scripts, kill_process +from glustolibs.io.utils import (run_linux_untar, validate_io_procs, + wait_for_io_to_complete) + + +@runs_on([['distributed-replicated', 'distributed-dispersed'], ['glusterfs']]) +class TestAddBrickRemoveBrickWithlookupsAndKernaluntar(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for availability of atleast 4 clients + if len(cls.clients) < 4: + raise SkipTest("This test requires atleast 4 clients") + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Enable brickmux on cluster + if not enable_brick_mux(self.mnode): + raise ExecutionError("Failed to enable brickmux on cluster") + + # Changing dist_count to 3 + self.volume['voltype']['dist_count'] = 3 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + self.list_of_io_processes = [] + self.is_io_running = False + + def tearDown(self): + + # Disable brickmux on cluster + if not disable_brick_mux(self.mnode): + raise ExecutionError("Failed to disable brickmux on cluster") + + # If I/O processes are running wait from them to complete + if self.is_io_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + self.mounts): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + @skip('Skipping due to Bug 1571317') + def test_add_brick_remove_brick_with_lookups_and_kernal_untar(self): + """ + Test case: + 1. Enable brickmux on cluster, create a volume, start it and mount it. + 2. Start the below I/O from 4 clients: + From client-1 : run script to create folders and files continuously + From client-2 : start linux kernel untar + From client-3 : while true;do find;done + From client-4 : while true;do ls -lRt;done + 3. Kill brick process on one of the nodes. + 4. Add brick to the volume. + 5. Remove bricks from the volume. + 6. Validate if I/O was successful or not. + """ + # Fill few bricks till it is full + bricks = get_all_bricks(self.mnode, self.volname) + + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Run script to create folders and files continuously + cmd = ("/usr/bin/env python {} create_deep_dirs_with_files " + "--dirname-start-num 758 --dir-depth 2 " + "--dir-length 100 --max-num-of-dirs 10 --num-of-files 105 {}" + .format(self.script_upload_path, self.mounts[1].mountpoint)) + ret = g.run_async(self.mounts[1].client_system, cmd) + self.list_of_io_processes += [ret] + + # Run lookup operations from 2 clients + cmd = ("cd {}; for i in `seq 1 1000000`;do find .; done" + .format(self.mounts[2].mountpoint)) + ret = g.run_async(self.mounts[2].client_system, cmd) + self.list_of_io_processes += [ret] + + cmd = ("cd {}; for i in `seq 1 1000000`;do ls -lRt; done" + .format(self.mounts[3].mountpoint)) + ret = g.run_async(self.mounts[3].client_system, cmd) + self.list_of_io_processes += [ret] + + # Kill brick process of one of the nodes. + brick = choice(bricks) + node, _ = brick.split(":") + ret = kill_process(node, process_names="glusterfsd") + self.assertTrue(ret, "Failed to kill brick process of brick %s" + % brick) + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + g.log.info("Add brick to volume successful") + + # Remove bricks from the volume + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=2400) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + # Validate if I/O was successful or not. + ret = validate_io_procs(self.list_of_io_processes, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.is_io_running = False diff --git a/tests/functional/dht/test_add_brick_replace_brick_fix_layout.py b/tests/functional/dht/test_add_brick_replace_brick_fix_layout.py new file mode 100644 index 000000000..783ca1800 --- /dev/null +++ b/tests/functional/dht/test_add_brick_replace_brick_fix_layout.py @@ -0,0 +1,124 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterfile import get_fattr +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_fix_layout_to_complete) +from glustolibs.gluster.volume_libs import (form_bricks_list_to_add_brick, + replace_brick_from_volume) + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestAddBrickReplaceBrickFixLayout(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 3 + self.volume['voltype']['dist_count'] = 3 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def _replace_a_old_added_brick(self, brick_to_be_replaced): + """Replace a old brick from the volume""" + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, self.all_servers_info, + src_brick=brick_to_be_replaced) + self.assertTrue(ret, "Failed to replace brick %s " + % brick_to_be_replaced) + g.log.info("Successfully replaced brick %s", brick_to_be_replaced) + + def _check_trusted_glusterfs_dht_on_all_bricks(self): + """Check trusted.glusterfs.dht xattr on the backend bricks""" + bricks = get_all_bricks(self.mnode, self.volname) + fattr_value = [] + for brick_path in bricks: + node, path = brick_path.split(":") + ret = get_fattr(node, "{}".format(path), "trusted.glusterfs.dht") + fattr_value += [ret] + self.assertEqual(len(set(fattr_value)), 4, + "Value of trusted.glusterfs.dht is not as expected") + g.log.info("Successfully checked value of trusted.glusterfs.dht.") + + def test_add_brick_replace_brick_fix_layout(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create files and dirs on the mount point. + 3. Add bricks to the volume. + 4. Replace 2 old bricks to the volume. + 5. Trigger rebalance fix layout and wait for it to complete. + 6. Check layout on all the bricks through trusted.glusterfs.dht. + """ + # Create directories with some files on mount point + cmd = ("cd %s; for i in {1..10}; do mkdir dir$i; for j in {1..5};" + " do dd if=/dev/urandom of=dir$i/file$j bs=1M count=1; done;" + " done" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create dirs and files.") + + # Orginal brick list before add brick + brick_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(brick_list, "Empty present brick list") + + # Add bricks to the volume + add_brick_list = form_bricks_list_to_add_brick( + self.mnode, self.volname, self.servers, self.all_servers_info) + self.assertIsNotNone(add_brick_list, "Empty add brick list") + + ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list) + self.assertFalse(ret, "Failed to add bricks to the volume") + g.log.info("Successfully added bricks to the volume") + + # Replace 2 old bricks to the volume + for _ in range(0, 2): + brick = choice(brick_list) + self._replace_a_old_added_brick(brick) + brick_list.remove(brick) + + # Start rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True) + self.assertFalse(ret, "Failed to start rebalance on volume") + + ret = wait_for_fix_layout_to_complete(self.mnode, self.volname, + timeout=800) + self.assertTrue(ret, "Rebalance failed on volume") + + # Check layout on all the bricks through trusted.glusterfs.dht + self._check_trusted_glusterfs_dht_on_all_bricks() diff --git a/tests/functional/dht/test_brick_full_add_brick_rebalance.py b/tests/functional/dht/test_brick_full_add_brick_rebalance.py new file mode 100644 index 000000000..e67115220 --- /dev/null +++ b/tests/functional/dht/test_brick_full_add_brick_rebalance.py @@ -0,0 +1,120 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import string +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import get_subvols, expand_volume +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestBrickFullAddBrickRebalance(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + @staticmethod + def _get_random_string(): + letters = string.ascii_lowercase + return ''.join(choice(letters) for _ in range(5)) + + def test_brick_full_add_brick_rebalance(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create a data set on the client node such that all the available + space is used and "No space left on device" error is generated. + 3. Set cluster.min-free-disk to 30%. + 4. Add bricks to the volume, trigger rebalance and wait for rebalance + to complete. + """ + # Create a data set on the client node such that all the available + # space is used and "No space left on device" error is generated + bricks = get_all_bricks(self.mnode, self.volname) + + # Calculate the usable size and fill till it reaches + # min free limit + usable_size = get_usable_size_per_disk(bricks[0]) + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + filename = "abc" + for subvol in subvols: + while (subvols[find_hashed_subvol(subvols, "/", filename)[1]] == + subvol): + filename = self._get_random_string() + ret, _, _ = g.run(self.mounts[0].client_system, + "fallocate -l {}G {}/{}".format( + usable_size, self.mounts[0].mountpoint, + filename)) + self.assertFalse(ret, "Failed to fill disk to min free limit") + g.log.info("Disk filled up to min free limit") + + # Try to perfrom I/O from mount point(This should fail) + ret, _, _ = g.run(self.mounts[0].client_system, + "fallocate -l 5G {}/mfile".format( + self.mounts[0].mountpoint)) + self.assertTrue(ret, + "Unexpected: Able to do I/O even when disks are " + "filled to min free limit") + g.log.info("Expected: Unable to perfrom I/O as min free disk is hit") + + # Set cluster.min-free-disk to 30% + ret = set_volume_options(self.mnode, self.volname, + {'cluster.min-free-disk': '30%'}) + self.assertTrue(ret, "Failed to set cluster.min-free-disk to 30%") + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") diff --git a/tests/functional/dht/test_brick_full_add_brick_remove_brick.py b/tests/functional/dht/test_brick_full_add_brick_remove_brick.py new file mode 100644 index 000000000..eaf7dafb4 --- /dev/null +++ b/tests/functional/dht/test_brick_full_add_brick_remove_brick.py @@ -0,0 +1,111 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import string +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.volume_libs import (get_subvols, expand_volume, + shrink_volume) +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestBrickFullAddBrickRemoveBrickRebalance(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + @staticmethod + def _get_random_string(): + letters = string.ascii_lowercase + return ''.join(choice(letters) for _ in range(5)) + + def test_brick_full_add_brick_remove_brick(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Fill few bricks till min-free-limit is reached. + 3. Add brick to the volume.(This should pass.) + 4. Set cluster.min-free-disk to 30%. + 5. Remove bricks from the volume.(This should pass.) + 6. Check for data loss by comparing arequal before and after. + """ + # Fill few bricks till it is full + bricks = get_all_bricks(self.mnode, self.volname) + + # Calculate the usable size and fill till it reaches + # min free limit + usable_size = get_usable_size_per_disk(bricks[0]) + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + filename = "abc" + for _ in range(0, usable_size): + while (subvols[find_hashed_subvol(subvols, "/", filename)[1]] + == subvols[0]): + filename = self._get_random_string() + ret, _, _ = g.run(self.mounts[0].client_system, + "fallocate -l 1G {}/{}".format( + self.mounts[0].mountpoint, filename)) + self.assertFalse(ret, "Failed to fill disk to min free limit") + filename = self._get_random_string() + g.log.info("Disk filled up to min free limit") + + # Collect arequal checksum before ops + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Set cluster.min-free-disk to 30% + ret = set_volume_options(self.mnode, self.volname, + {'cluster.min-free-disk': '30%'}) + self.assertTrue(ret, "Failed to set cluster.min-free-disk to 30%") + + # Remove bricks from the volume + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=1800) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_copy_dir_subvol_down.py b/tests/functional/dht/test_copy_dir_subvol_down.py new file mode 100644 index 000000000..8835bcada --- /dev/null +++ b/tests/functional/dht/test_copy_dir_subvol_down.py @@ -0,0 +1,308 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.io.utils import collect_mounts_arequal, validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + find_new_hashed) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.brick_libs import bring_bricks_offline + + +@runs_on([['distributed', 'distributed-replicated', + 'distributed-arbiter', 'distributed-dispersed'], + ['glusterfs']]) +class TestCopyDirSubvolDown(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + # Check for the default dist_count value and override it if required + if cls.default_volume_type_config['distributed']['dist_count'] <= 2: + cls.default_volume_type_config['distributed']['dist_count'] = 4 + else: + cls.default_volume_type_config[cls.voltype]['dist_count'] = 3 + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_src(self, m_point): + """ + Create the source directory and files under the + source directory. + """ + # Create source dir + ret = mkdir(self.mounts[0].client_system, "{}/src_dir".format(m_point)) + self.assertTrue(ret, "mkdir of src_dir failed") + + # Create files inside source dir + cmd = ("/usr/bin/env python %s create_files " + "-f 100 %s/src_dir/" % ( + self.script_upload_path, m_point)) + proc = g.run_async(self.mounts[0].client_system, + cmd, user=self.mounts[0].user) + g.log.info("IO on %s:%s is started successfully", + self.mounts[0].client_system, m_point) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + def _copy_files_check_contents(self, m_point, dest_dir): + """ + Copy files from source directory to destination + directory when it hashes to up-subvol and check + if all the files are copied properly. + """ + # pylint: disable=protected-access + # collect arequal checksum on src dir + ret, src_checksum = collect_mounts_arequal( + self.mounts[0], '{}/src_dir'.format(m_point)) + self.assertTrue(ret, ("Failed to get arequal on client" + " {}".format(self.clients[0]))) + + # copy src_dir to dest_dir + command = "cd {}; cp -r src_dir {}".format(m_point, dest_dir) + ret, _, _ = g.run(self.mounts[0].client_system, command) + self.assertEqual(ret, 0, "Failed to copy of src dir to" + " dest dir") + g.log.info("Successfully copied src dir to dest dir.") + + # collect arequal checksum on destination dir + ret, dest_checksum = collect_mounts_arequal( + self.mounts[0], '{}/{}'.format(m_point, dest_dir)) + self.assertTrue(ret, ("Failed to get arequal on client" + " {}".format(self.mounts[0]))) + + # Check if the contents of src dir are copied to + # dest dir + self.assertEqual(src_checksum, + dest_checksum, + 'All the contents of src dir are not' + ' copied to dest dir') + g.log.info('Successfully copied the contents of src dir' + ' to dest dir') + + def _copy_when_dest_hash_down(self, m_point, dest_dir): + """ + Copy files from source directory to destination + directory when it hashes to down-subvol. + """ + # pylint: disable=protected-access + # copy src_dir to dest_dir (should fail as hash subvol for dest + # dir is down) + command = "cd {}; cp -r src_dir {}".format(m_point, dest_dir) + ret, _, _ = g.run(self.mounts[0].client_system, command) + self.assertEqual(ret, 1, "Unexpected : Copy of src dir to" + " dest dir passed") + g.log.info("Copy of src dir to dest dir failed as expected.") + + def test_copy_existing_dir_dest_subvol_down(self): + """ + Case 1: + - Create directory from mount point. + - Copy dir ---> Bring down dht sub-volume where destination + directory hashes to down sub-volume. + - Copy directory and make sure destination dir does not exist + """ + # pylint: disable=protected-access + m_point = self.mounts[0].mountpoint + + # Create source dir + ret = mkdir(self.mounts[0].client_system, "{}/src_dir".format(m_point)) + self.assertTrue(ret, "mkdir of src_dir failed") + g.log.info("Directory src_dir created successfully") + + # Get subvol list + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(subvols, "Failed to get subvols") + + # Find out the destination dir name such that it hashes to + # different subvol + newdir = find_new_hashed(subvols, "/", "src_dir") + dest_dir = str(newdir.newname) + dest_count = newdir.subvol_count + + # Kill the brick/subvol to which the destination dir hashes + ret = bring_bricks_offline( + self.volname, subvols[dest_count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + subvols[dest_count])) + g.log.info('DHT subvol %s is offline', subvols[dest_count]) + + # Copy src_dir to dest_dir (should fail as hash subvol for dest + # dir is down) + self._copy_when_dest_hash_down(m_point, dest_dir) + + def test_copy_existing_dir_dest_subvol_up(self): + """ + Case 2: + - Create files and directories from mount point. + - Copy dir ---> Bring down dht sub-volume where destination + directory should not hash to down sub-volume + - copy dir and make sure destination dir does not exist + """ + # pylint: disable=protected-access + m_point = self.mounts[0].mountpoint + + # Create source dir and create files inside it + self._create_src(m_point) + + # Get subvol list + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(subvols, "Failed to get subvols") + + # Find out hashed brick/subvol for src dir + src_subvol, src_count = find_hashed_subvol(subvols, "/", "src_dir") + self.assertIsNotNone(src_subvol, "Could not find srchashed") + g.log.info("Hashed subvol for src_dir is %s", src_subvol._path) + + # Find out the destination dir name such that it hashes to + # different subvol + newdir = find_new_hashed(subvols, "/", "src_dir") + dest_dir = str(newdir.newname) + dest_count = newdir.subvol_count + + # Remove the hashed subvol for dest and src dir from the + # subvol list + for item in (subvols[src_count], subvols[dest_count]): + subvols.remove(item) + + # Bring down a DHT subvol + ret = bring_bricks_offline(self.volname, subvols[0]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + subvols[0])) + g.log.info('DHT subvol %s is offline', subvols[0]) + + # Create files on source dir and + # perform copy of src_dir to dest_dir + self._copy_files_check_contents(m_point, dest_dir) + + def test_copy_new_dir_dest_subvol_up(self): + """ + Case 3: + - Copy dir ---> Bring down dht sub-volume where destination + directory should not hash to down sub-volume + - Create files and directories from mount point. + - copy dir and make sure destination dir does not exist + """ + # pylint: disable=protected-access + # pylint: disable=too-many-statements + m_point = self.mounts[0].mountpoint + + # Get subvols + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(subvols, "Failed to get subvols") + + # Find out hashed brick/subvol for src dir + src_subvol, src_count = find_hashed_subvol( + subvols, "/", "src_dir") + self.assertIsNotNone(src_subvol, "Could not find srchashed") + g.log.info("Hashed subvol for src_dir is %s", src_subvol._path) + + # Find out the destination dir name such that it hashes to + # different subvol + newdir = find_new_hashed(subvols, "/", "src_dir") + dest_dir = str(newdir.newname) + dest_count = newdir.subvol_count + + # Remove the hashed subvol for dest and src dir from the + # subvol list + for item in (subvols[src_count], subvols[dest_count]): + subvols.remove(item) + + # Bring down a dht subvol + ret = bring_bricks_offline(self.volname, subvols[0]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + subvols[0])) + g.log.info('DHT subvol %s is offline', subvols[0]) + + # Create source dir and create files inside it + self._create_src(m_point) + + # Create files on source dir and + # perform copy of src_dir to dest_dir + self._copy_files_check_contents(m_point, dest_dir) + + def test_copy_new_dir_dest_subvol_down(self): + """ + Case 4: + - Copy dir ---> Bring down dht sub-volume where destination + directory hashes to down sub-volume + - Create directory from mount point. + - Copy dir and make sure destination dir does not exist + """ + # pylint: disable=protected-access + m_point = self.mounts[0].mountpoint + + # Get subvol list + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(subvols, "Failed to get subvols") + + # Find out the destination dir name such that it hashes to + # different subvol + newdir = find_new_hashed(subvols, "/", "src_dir") + dest_dir = str(newdir.newname) + dest_count = newdir.subvol_count + + # Bring down the hashed-subvol for dest dir + ret = bring_bricks_offline(self.volname, subvols[dest_count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + subvols[dest_count])) + g.log.info('DHT subvol %s is offline', subvols[dest_count]) + + # Create source dir + ret = mkdir(self.mounts[0].client_system, "{}/src_dir".format(m_point)) + self.assertTrue(ret, "mkdir of src_dir failed") + g.log.info("Directory src_dir created successfully") + + # Copy src_dir to dest_dir (should fail as hash subvol for dest + # dir is down) + self._copy_when_dest_hash_down(m_point, dest_dir) diff --git a/tests/functional/dht/test_copy_file_subvol_down.py b/tests/functional/dht/test_copy_file_subvol_down.py new file mode 100644 index 000000000..afb06ac3c --- /dev/null +++ b/tests/functional/dht/test_copy_file_subvol_down.py @@ -0,0 +1,336 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=protected-access +# pylint: disable=too-many-statements +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brickdir import BrickDir +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + find_new_hashed, + find_specific_hashed) +from glustolibs.gluster.brick_libs import bring_bricks_offline +from glustolibs.gluster.glusterfile import move_file + + +@runs_on([['distributed', 'distributed-dispersed', + 'distributed-arbiter', 'distributed-replicated'], + ['glusterfs']]) +class TestCopyFileSubvolDown(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + # Override the default dist_count value + cls.default_volume_type_config[cls.voltype]['dist_count'] = 4 + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.client, self.m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + self.subvols = (get_subvols( + self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(self.subvols, "Failed to get subvols") + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_src_file(self): + """Create a srcfile""" + cmd = "touch {}/srcfile".format(self.m_point) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, "Failed to create srcfile") + g.log.info("Successfully created srcfile") + + def _find_hash_for_src_file(self): + """Find a new hashsubvol which is different from hash of srcfile""" + src_hash_subvol = find_new_hashed(self.subvols, "/", "srcfile") + new_src_name = str(src_hash_subvol.newname) + src_hash_subvol_count = src_hash_subvol.subvol_count + return new_src_name, src_hash_subvol_count + + def _find_cache_for_src_file(self): + """Find out hash subvol for srcfile which after rename will become + cache subvol""" + src_cache_subvol, src_cache_subvol_count = find_hashed_subvol( + self.subvols, "/", "srcfile") + self.assertIsNotNone(src_cache_subvol, "Could not find src cached") + g.log.info("Cached subvol for srcfile is %s", src_cache_subvol._path) + return src_cache_subvol_count + + def _rename_src(self, new_src_name): + """Rename the srcfile to a new name such that it hashes and + caches to different subvols""" + ret = move_file(self.client, "{}/srcfile".format(self.m_point), + ("{}/".format(self.m_point) + new_src_name)) + self.assertTrue(ret, ("Failed to move file srcfile and {}".format( + new_src_name))) + + def _create_dest_file_find_hash( + self, src_cache_subvol_count, src_hash_subvol_count): + """Find a name for dest file such that it hashed to a subvol different + from the src file's hash and cache subvol""" + # Get subvol list + subvol_list = (get_subvols(self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(subvol_list, "Failed to get subvols") + for item in (subvol_list[src_hash_subvol_count], + subvol_list[src_cache_subvol_count]): + subvol_list.remove(item) + + # Find name for dest file + dest_subvol = BrickDir(subvol_list[0][0] + "/" + "/") + dest_file = find_specific_hashed(self.subvols, "/", dest_subvol) + self.assertIsNotNone(dest_file, "Could not find hashed for destfile") + + # Create dest file + cmd = "touch {}/{}".format(self.m_point, dest_file.newname) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, "Failed to create destfile") + g.log.info("Successfully created destfile") + return dest_file.newname, dest_file.subvol_count + + def _kill_subvol(self, subvol_count): + """Bring down the subvol as the subvol_count""" + ret = bring_bricks_offline( + self.volname, self.subvols[subvol_count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + self.subvols[subvol_count])) + g.log.info('DHT subvol %s is offline', + self.subvols[subvol_count]) + + def _copy_src_file_to_dest_file( + self, src_file, dest_file, expected="pass"): + """ + Copy src file to dest dest, it will either pass or + fail; as per the scenario + """ + command = "cd {}; cp -r {} {}".format( + self.m_point, src_file, dest_file) + expected_ret = 0 if expected == "pass" else 1 + ret, _, _ = g.run(self.client, command) + self.assertEqual(ret, expected_ret, + "Unexpected, Copy of Src file to dest " + "file status : %s" % (expected)) + g.log.info("Copy of src file to dest file returned as expected") + + def test_copy_srchash_up_desthash_up(self): + """ + Case 1: + 1) Create a volume and start it + 2) Create a src file and a dest file + 3) All subvols are up + 4) Copy src file to dest file + """ + # Create a src file + self._create_src_file() + + # Find out cache subvol for src file + src_cache_count = self._find_cache_for_src_file() + + # Find new hash for src file + src_file_new, src_hash_count = self._find_hash_for_src_file() + + # Rename src file so it hash and cache to different subvol + self._rename_src(src_file_new) + + # Create dest file and find its hash subvol + dest_file, _ = self._create_dest_file_find_hash( + src_cache_count, src_hash_count) + + # Copy src file to dest file + self._copy_src_file_to_dest_file(src_file_new, dest_file) + + def test_copy_srccache_down_srchash_up_desthash_down(self): + """ + Case 2: + 1) Create a volume and start it + 2) Create a src file and a dest file + 3) Bring down the cache subvol for src file + 4) Bring down the hash subvol for dest file + 5) Copy src file to dest file + """ + # Create a src file + self._create_src_file() + + # Find out cache subvol for src file + src_cache_count = self._find_cache_for_src_file() + + # Find new hash for src file + src_file_new, src_hash_count = self._find_hash_for_src_file() + + # Rename src file so it hash and cache to different subvol + self._rename_src(src_file_new) + + # Create dest file and find its hash subvol + dest_file, dest_hash_count = self._create_dest_file_find_hash( + src_cache_count, src_hash_count) + + # kill src cache subvol + self._kill_subvol(src_cache_count) + + # Kill dest hash subvol + self._kill_subvol(dest_hash_count) + + # Copy src file to dest file + self._copy_src_file_to_dest_file(src_file_new, dest_file, + expected="fail") + + def test_copy_srccache_down_srchash_up_desthash_up(self): + """ + Case 3: + 1) Create a volume and start it + 2) Create a src file and a dest file + 3) Bring down the cache subvol for src file + 4) Copy src file to dest file + """ + # Create a src file + self._create_src_file() + + # Find out cache subvol for src file + src_cache_count = self._find_cache_for_src_file() + + # Find new hash for src file + src_file_new, src_hash_count = self._find_hash_for_src_file() + + # Rename src file so it hash and cache to different subvol + self._rename_src(src_file_new) + + # Create dest file and find its hash subvol + dest_file, _ = self._create_dest_file_find_hash( + src_cache_count, src_hash_count) + + # kill src cache subvol + self._kill_subvol(src_cache_count) + + # Copy src file to dest file + self._copy_src_file_to_dest_file(src_file_new, dest_file, + expected="fail") + + def test_copy_srchash_down_desthash_down(self): + """ + Case 4: + 1) Create a volume and start it + 2) Create a src file and a dest file + 3) Bring down the hash subvol for src file + 4) Bring down the hash subvol for dest file + 5) Copy src file to dest file + """ + # Create a src file + self._create_src_file() + + # Find out cache subvol for src file + src_cache_count = self._find_cache_for_src_file() + + # Find new hash for src file + src_file_new, src_hash_count = self._find_hash_for_src_file() + + # Rename src file so it hash and cache to different subvol + self._rename_src(src_file_new) + + # Create dest file and find its hash subvol + dest_file, dest_hash_count = self._create_dest_file_find_hash( + src_cache_count, src_hash_count) + + # Kill the hashed subvol for src file + self._kill_subvol(src_hash_count) + + # Kill the hashed subvol for dest file + self._kill_subvol(dest_hash_count) + + # Copy src file to dest file + self._copy_src_file_to_dest_file(src_file_new, dest_file, + expected="fail") + + def test_copy_srchash_down_desthash_up(self): + """ + Case 5: + 1) Create a volume and start it + 2) Create a src file and a dest file + 3) Bring down the hash subvol for src file + 4) Copy src file to dest file + """ + # Create a src file + self._create_src_file() + + # Find out cache subvol for src file + src_cache_count = self._find_cache_for_src_file() + + # Find new hash for src file + src_file_new, src_hash_count = self._find_hash_for_src_file() + + # Rename src file so it hash and cache to different subvol + self._rename_src(src_file_new) + + # Create dest file and find its hash subvol + dest_file, _ = self._create_dest_file_find_hash( + src_cache_count, src_hash_count) + + # Kill the hashed subvol for src file + self._kill_subvol(src_hash_count) + + # Copy src file to dest file + self._copy_src_file_to_dest_file(src_file_new, dest_file) + + def test_copy_srchash_up_desthash_down(self): + """ + Case 6: + 1) Create a volume and start it + 2) Create a src file and a dest file + 3) Bring down the hash subvol for dest file + 4) Copy src file to dest file + """ + # Create a src file + self._create_src_file() + + # Find out cache subvol for src file + src_cache_count = self._find_cache_for_src_file() + + # Find new hash for src file + src_file_new, src_hash_count = self._find_hash_for_src_file() + + # Rename src file so it hash and cache to different subvol + self._rename_src(src_file_new) + + # Create dest file and find its hash subvol + dest_file, dest_hash_count = self._create_dest_file_find_hash( + src_cache_count, src_hash_count) + + # Kill the hashed subvol for dest file + self._kill_subvol(dest_hash_count) + + # Copy src file to dest file + self._copy_src_file_to_dest_file(src_file_new, dest_file, + expected="fail") diff --git a/tests/functional/dht/test_copy_huge_file_with_remove_brick_in_progress.py b/tests/functional/dht/test_copy_huge_file_with_remove_brick_in_progress.py new file mode 100644 index 000000000..f142637f2 --- /dev/null +++ b/tests/functional/dht/test_copy_huge_file_with_remove_brick_in_progress.py @@ -0,0 +1,111 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterfile import get_md5sum +from glustolibs.gluster.volume_libs import shrink_volume +from glustolibs.io.utils import validate_io_procs, wait_for_io_to_complete + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed'], ['glusterfs']]) +class TestCopyHugeFileWithRemoveBrickInProgress(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # If cp is running then wait for it to complete + if self.cp_running: + if not wait_for_io_to_complete(self.io_proc, [self.mounts[0]]): + g.log.error("I/O failed to stop on clients") + ret, _, _ = g.run(self.first_client, "rm -rf /mnt/huge_file.txt") + if ret: + g.log.error("Failed to remove huge file from /mnt.") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_copy_huge_file_with_remove_brick_in_progress(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create files and dirs on the mount point. + 3. Start remove-brick and copy huge file when remove-brick is + in progress. + 4. Commit remove-brick and check checksum of orginal and copied file. + """ + # Create a directory with some files inside + cmd = ("cd %s; for i in {1..10}; do mkdir dir$i; for j in {1..5};" + " do dd if=/dev/urandom of=dir$i/file$j bs=1M count=1; done;" + " done" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, + "Failed to create dirs and files.") + + # Create a hug file under /mnt dir + ret, _, _ = g.run(self.first_client, + "fallocate -l 10G /mnt/huge_file.txt") + self.assertFalse(ret, "Failed to create hug file at /mnt") + + # Copy a huge file when remove-brick is in progress + self.cp_running = False + cmd = ("sleep 60; cd %s;cp ../huge_file.txt ." + % self.mounts[0].mountpoint) + self.io_proc = [g.run_async(self.first_client, cmd)] + self.rename_running = True + + # Start remove-brick on volume and wait for it to complete + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=1000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + # Validate if copy was successful or not + ret = validate_io_procs(self.io_proc, [self.mounts[0]]) + self.assertTrue(ret, "dir rename failed on mount point") + self.cp_running = False + + # Check checksum of orginal and copied file + original_file_checksum = get_md5sum(self.first_client, + "/mnt/huge_file.txt") + copied_file_checksum = get_md5sum(self.first_client, + "{}/huge_file.txt" + .format(self.mounts[0].mountpoint)) + self.assertEqual(original_file_checksum.split(" ")[0], + copied_file_checksum.split(" ")[0], + "md5 checksum of original and copied file are" + " different") + g.log.info("md5 checksum of original and copied file are same.") + + # Remove original huge file + ret, _, _ = g.run(self.first_client, "rm -rf /mnt/huge_file.txt") + self.assertFalse(ret, "Failed to remove huge_file from mount point") diff --git a/tests/functional/dht/test_custom_xattr_healing_for_dir.py b/tests/functional/dht/test_custom_xattr_healing_for_dir.py new file mode 100644 index 000000000..d5bca0fb3 --- /dev/null +++ b/tests/functional/dht/test_custom_xattr_healing_for_dir.py @@ -0,0 +1,332 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=protected-access +# pylint: disable=too-many-statements + +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterfile import (get_fattr, set_fattr, + delete_fattr) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + find_new_hashed) +from glustolibs.gluster.brick_libs import (get_online_bricks_list, + bring_bricks_offline) +from glustolibs.gluster.volume_ops import volume_start + + +@runs_on([['distributed', 'distributed-dispersed', + 'distributed-arbiter', 'distribited-replicated'], + ['glusterfs']]) +class TestCustomXattrHealingForDir(GlusterBaseClass): + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.client, self.m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _set_xattr_value(self, fattr_value="bar2"): + """Set the xattr 'user.foo' as per the value on dir1""" + # Set the xattr on the dir1 + ret = set_fattr(self.client, '{}/dir1'.format(self.m_point), + 'user.foo', fattr_value) + self.assertTrue(ret, "Failed to set the xattr on dir1") + g.log.info("Successfully set the xattr user.foo with value:" + " %s on dir1", fattr_value) + + def _check_xattr_value_on_mnt(self, expected_value=None): + """Check if the expected value for 'user.foo' + is present for dir1 on mountpoint""" + ret = get_fattr(self.client, '{}/dir1'.format(self.m_point), + 'user.foo', encode="text") + self.assertEqual(ret, expected_value, "Failed to get the xattr" + " on:{}".format(self.client)) + g.log.info( + "The xattr user.foo for dir1 is displayed on mointpoint" + " and has value:%s", expected_value) + + def _check_xattr_value_on_bricks(self, online_bricks, expected_value=None): + """Check if the expected value for 'user.foo'is present + for dir1 on backend bricks""" + for brick in online_bricks: + host, brick_path = brick.split(':') + ret = get_fattr(host, '{}/dir1'.format(brick_path), + 'user.foo', encode="text") + self.assertEqual(ret, expected_value, "Failed to get the xattr" + " on:{}".format(brick_path)) + g.log.info("The xattr user.foo is displayed for dir1 on " + "brick:%s and has value:%s", + brick_path, expected_value) + + def _create_dir(self, dir_name=None): + """Create a directory on the mountpoint""" + ret = mkdir(self.client, "{}/{}".format(self.m_point, dir_name)) + self.assertTrue(ret, "mkdir of {} failed".format(dir_name)) + + def _perform_lookup(self): + """Perform lookup on mountpoint""" + cmd = ("ls -lR {}/dir1".format(self.m_point)) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, "Failed to lookup") + g.log.info("Lookup successful") + sleep(5) + + def _create_xattr_check_self_heal(self): + """Create custom xattr and check if its healed""" + # Set the xattr on the dir1 + self._set_xattr_value(fattr_value="bar2") + + # Get online brick list + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Failed to get online bricks") + + # Check if the custom xattr is being displayed on the + # mount-point for dir1 + self._check_xattr_value_on_mnt(expected_value="bar2") + + # Check if the xattr is being displayed on the online-bricks + # for dir1 + self._check_xattr_value_on_bricks(online_bricks, expected_value="bar2") + + # Modify custom xattr value on dir1 + self._set_xattr_value(fattr_value="ABC") + + # Lookup on moint-point to refresh the value of xattr + self._perform_lookup() + + # Check if the modified custom xattr is being displayed + # on the mount-point for dir1 + self._check_xattr_value_on_mnt(expected_value="ABC") + + # Check if the modified custom xattr is being + # displayed on the bricks for dir1 + self._check_xattr_value_on_bricks(online_bricks, expected_value="ABC") + + # Remove the custom xattr from the mount point for dir1 + ret = delete_fattr(self.client, + '{}/dir1'.format(self.m_point), 'user.foo') + self.assertTrue(ret, "Failed to delete the xattr for " + "dir1 on mountpoint") + g.log.info( + "Successfully deleted the xattr for dir1 from mountpoint") + + # Lookup on moint-point to refresh the value of xattr + self._perform_lookup() + + # Check that the custom xattr is not displayed on the + # for dir1 on mountpoint + ret = get_fattr(self.client, '{}/dir1'.format(self.m_point), + 'user.foo', encode="text") + self.assertEqual(ret, None, "Xattr for dir1 is not removed" + " on:{}".format(self.client)) + g.log.info("Success: xattr is removed for dir1 on mointpoint") + + # Check that the custom xattr is not displayed on the + # for dir1 on the backend bricks + for brick in online_bricks: + host, brick_path = brick.split(':') + ret = get_fattr(host, '{}/dir1'.format(brick_path), + 'user.foo', encode="text") + self.assertEqual(ret, None, "Xattr for dir1 is not removed" + " on:{}".format(brick_path)) + g.log.info("Xattr for dir1 is removed from " + "brick:%s", brick_path) + + # Check if the trusted.glusterfs.pathinfo is displayed + # for dir1 on mointpoint + ret = get_fattr(self.client, '{}/dir1'.format(self.m_point), + 'trusted.glusterfs.pathinfo') + self.assertIsNotNone(ret, "Failed to get the xattr" + " on:{}".format(self.client)) + g.log.info("The xattr trusted.glusterfs.pathinfo" + " is displayed on mointpoint for dir1") + + # Set the xattr on the dir1 + self._set_xattr_value(fattr_value="star1") + + # Bring back the bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertFalse(ret, 'Failed to start volume %s with "force" option' + % self.volname) + g.log.info('Successfully started volume %s with "force" option', + self.volname) + + # Execute lookup on the mointpoint + self._perform_lookup() + + # Get online brick list + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Failed to get online bricks") + + # Check if the custom xattr is being displayed + # on the mount-point for dir1 + self._check_xattr_value_on_mnt(expected_value="star1") + + # Check if the custom xattr is displayed on all the bricks + self._check_xattr_value_on_bricks(online_bricks, + expected_value="star1") + + def test_custom_xattr_with_subvol_down_dir_exists(self): + """ + Description: + Steps: + 1) Create directories from mount point. + 2) Bring one or more(not all) dht sub-volume(s) down by killing + processes on that server + 3) Create a custom xattr for dir hashed to down sub-volume and also for + another dir not hashing to down sub-volumes + # setfattr -n user.foo -v bar2 <dir> + 4) Verify that custom xattr for directory is displayed on mount point + and bricks for both directories + # getfattr -n user.foo <dir> + # getfattr -n user.foo <brick_path>/<dir> + 5) Modify custom xattr value and verify that custom xattr for directory + is displayed on mount point and all up bricks + # setfattr -n user.foo -v ABC <dir> + 6) Verify that custom xattr is not displayed once you remove it on + mount point and all up bricks + 7) Verify that mount point shows pathinfo xattr for dir hashed to down + sub-volume and also for dir not hashed to down sub-volumes + # getfattr -n trusted.glusterfs.pathinfo <dir> + 8) Again create a custom xattr for dir not hashing to down sub-volumes + # setfattr -n user.foo -v star1 <dir> + 9) Bring up the sub-volumes + 10) Execute lookup on parent directory of both <dir> from mount point + 11) Verify Custom extended attributes for dir1 on all bricks + """ + # pylint: disable=protected-access + # Create dir1 on client0 + self._create_dir(dir_name="dir1") + + # Get subvol list + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(subvols, "Failed to get subvols") + + # Finding a dir name such that it hashes to a different subvol + newhash = find_new_hashed(subvols, "/", "dir1") + new_name = str(newhash.newname) + new_subvol_count = newhash.subvol_count + + # Create a dir with the new name + self._create_dir(dir_name=new_name) + + # Kill the brick/subvol to which the new dir hashes + ret = bring_bricks_offline( + self.volname, subvols[new_subvol_count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + subvols[new_subvol_count])) + g.log.info('DHT subvol %s is offline', subvols[new_subvol_count]) + + # Set the xattr on dir hashing to down subvol + ret = set_fattr(self.client, '{}/{}'.format(self.m_point, new_name), + 'user.foo', 'bar2') + self.assertFalse(ret, "Unexpected: custom xattr set successfully" + " for dir hashing to down subvol") + g.log.info("Expected: Failed to set xattr on dir:%s" + " which hashes to down subvol due to error: Transport" + " endpoint not connected", new_name) + + # Check if the trusted.glusterfs.pathinfo is displayed + # for dir hashing to down subvol on mointpoint + ret = get_fattr(self.client, '{}/{}'.format( + self.m_point, new_name), 'trusted.glusterfs.pathinfo') + self.assertIsNotNone(ret, "Failed to get the xattr" + " on:{}".format(self.client)) + g.log.info("The xattr trusted.glusterfs.pathinfo" + " is displayed on mointpoint for %s", new_name) + + # Set the xattr on dir hashing to down subvol + ret = set_fattr(self.client, '{}/{}'.format(self.m_point, new_name), + 'user.foo', 'star1') + self.assertFalse(ret, "Unexpected: custom xattr set successfully" + " for dir hashing to down subvol") + g.log.info("Expected: Tansport endpoint not connected") + + # Calling the local function + self._create_xattr_check_self_heal() + + def test_custom_xattr_with_subvol_down_dir_doesnt_exists(self): + """ + Description: + Steps: + 1) Bring one or more(not all) dht sub-volume(s) down by killing + processes on that server + 2) Create a directory from mount point such that it + hashes to up subvol. + 3) Create a custom xattr for dir + # setfattr -n user.foo -v bar2 <dir> + 4) Verify that custom xattr for directory is displayed on mount point + and bricks for directory + # getfattr -n user.foo <dir> + # getfattr -n user.foo <brick_path>/<dir> + 5) Modify custom xattr value and verify that custom xattr for directory + is displayed on mount point and all up bricks + # setfattr -n user.foo -v ABC <dir> + 6) Verify that custom xattr is not displayed once you remove it on + mount point and all up bricks + 7) Verify that mount point shows pathinfo xattr for dir + 8) Again create a custom xattr for dir + # setfattr -n user.foo -v star1 <dir> + 9) Bring up the sub-volumes + 10) Execute lookup on parent directory of both <dir> from mount point + 11) Verify Custom extended attributes for dir1 on all bricks + """ + # Get subvol list + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(subvols, "Failed to get subvols") + + # Find out the hashed subvol for dir1 + hashed_subvol, subvol_count = find_hashed_subvol(subvols, "/", "dir1") + self.assertIsNotNone(hashed_subvol, "Could not find srchashed") + g.log.info("Hashed subvol for dir1 is %s", hashed_subvol._path) + + # Remove the hashed_subvol from subvol list + subvols.remove(subvols[subvol_count]) + + # Bring down a dht subvol + ret = bring_bricks_offline(self.volname, subvols[0]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + subvols[0])) + g.log.info('DHT subvol %s is offline', subvols[0]) + + # Create the dir1 + self._create_dir(dir_name="dir1") + + # Calling the local function + self._create_xattr_check_self_heal() diff --git a/tests/functional/dht/test_delete_dir_with_self_pointing_linkto_files.py b/tests/functional/dht/test_delete_dir_with_self_pointing_linkto_files.py new file mode 100644 index 000000000..f4541e2e8 --- /dev/null +++ b/tests/functional/dht/test_delete_dir_with_self_pointing_linkto_files.py @@ -0,0 +1,140 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_ops import remove_brick +from glustolibs.gluster.glusterdir import mkdir, get_dir_contents +from glustolibs.gluster.glusterfile import set_fattr, get_dht_linkto_xattr +from glustolibs.gluster.rebalance_ops import wait_for_remove_brick_to_complete +from glustolibs.gluster.volume_libs import form_bricks_list_to_remove_brick + + +@runs_on([['distributed'], ['glusterfs']]) +class TestDeletDirWithSelfPointingLinktofiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 2 + self.volume['voltype']['dist_count'] = 2 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + # Assign a variable for the first_client + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_delete_dir_with_self_pointing_linkto_files(self): + """ + Test case: + 1. Create a pure distribute volume with 2 bricks, start and mount it. + 2. Create dir dir0/dir1/dir2 inside which create 1000 files and rename + all the files. + 3. Start remove-brick operation on the volume. + 4. Check remove-brick status till status is completed. + 5. When remove-brick status is completed stop it. + 6. Go to brick used for remove brick and perform lookup on the files. + 8. Change the linkto xattr value for every file in brick used for + remove brick to point to itself. + 9. Perfrom rm -rf * from mount point. + """ + # Create dir /dir0/dir1/dir2 + self.dir_path = "{}/dir0/dir1/dir2/".format(self.mounts[0].mountpoint) + ret = mkdir(self.first_client, self.dir_path, parents=True) + self.assertTrue(ret, "Failed to create /dir0/dir1/dir2/ dir") + + # Create 1000 files inside /dir0/dir1/dir2 + ret, _, _ = g.run(self.first_client, + 'cd %s;for i in {1..1000}; do echo "Test file" ' + '> tfile-$i; done' % self.dir_path) + self.assertFalse(ret, + "Failed to create 1000 files inside /dir0/dir1/dir2") + + # Rename 1000 files present inside /dir0/dir1/dir2 + ret, _, _ = g.run(self.first_client, + "cd %s;for i in {1..1000};do mv tfile-$i " + "ntfile-$i;done" % self.dir_path) + self.assertFalse(ret, + "Failed to rename 1000 files inside /dir0/dir1/dir2") + g.log.info("I/O successful on mount point.") + + # Start remove-brick operation on the volume + brick = form_bricks_list_to_remove_brick(self.mnode, self.volname, + subvol_num=1) + self.assertIsNotNone(brick, "Brick_list is empty") + ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'start') + self.assertFalse(ret, "Failed to start remov-brick on volume") + + # Check remove-brick status till status is completed + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + brick) + self.assertTrue(ret, "Remove-brick didn't complete on volume") + + # When remove-brick status is completed stop it + ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'stop') + self.assertFalse(ret, "Failed to start remov-brick on volume") + g.log.info("Successfully started and stopped remove-brick") + + # Go to brick used for remove brick and perform lookup on the files + node, path = brick[0].split(":") + path = "{}/dir0/dir1/dir2/".format(path) + ret, _, _ = g.run(node, 'ls {}*'.format(path)) + self.assertFalse(ret, "Failed to do lookup on %s" % brick[0]) + + # Change the linkto xattr value for every file in brick used for + # remove brick to point to itself + ret = get_dir_contents(node, path) + self.assertIsNotNone(ret, + "Unable to get files present in dir0/dir1/dir2") + + ret = get_dht_linkto_xattr(node, "{}{}".format(path, ret[0])) + self.assertIsNotNone(ret, "Unable to fetch dht linkto xattr") + + # Change trusted.glusterfs.dht.linkto from dist-client-0 to + # dist-client-1 or visa versa according to initial value + dht_linkto_xattr = ret.split("-") + if int(dht_linkto_xattr[2]): + dht_linkto_xattr[2] = "0" + else: + dht_linkto_xattr[2] = "1" + linkto_value = "-".join(dht_linkto_xattr) + + # Set xattr trusted.glusterfs.dht.linkto on all the linkto files + ret = set_fattr(node, '{}*'.format(path), + 'trusted.glusterfs.dht.linkto', linkto_value) + self.assertTrue(ret, + "Failed to change linkto file to point to itself") + + # Perfrom rm -rf * from mount point + ret, _, _ = g.run(self.first_client, + "rm -rf {}/*".format(self.mounts[0].mountpoint)) + self.assertFalse(ret, "Failed to run rm -rf * on mount point") + g.log.info("rm -rf * successful on mount point") diff --git a/tests/functional/dht/test_delete_file_picked_for_migration.py b/tests/functional/dht/test_delete_file_picked_for_migration.py new file mode 100644 index 000000000..2d66ec63b --- /dev/null +++ b/tests/functional/dht/test_delete_file_picked_for_migration.py @@ -0,0 +1,165 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.rebalance_ops import ( + get_rebalance_status, rebalance_start) +from glustolibs.gluster.volume_libs import (get_subvols, + form_bricks_list_to_add_brick, + log_volume_info_and_status) +from glustolibs.gluster.dht_test_utils import find_new_hashed +from glustolibs.gluster.glusterfile import move_file, is_linkto_file +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['distributed', 'distributed-replicated', + 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class DeleteFileInMigration(GlusterBaseClass): + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + + # Setup Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup and Mount Volume") + + # Form brick list for add-brick operation + self.add_brick_list = form_bricks_list_to_add_brick( + self.mnode, self.volname, self.servers, self.all_servers_info, + distribute_count=1) + if not self.add_brick_list: + raise ExecutionError("Volume %s: Failed to form bricks list for" + " add-brick" % self.volname) + g.log.info("Volume %s: Formed bricks list for add-brick operation", + (self.add_brick_list, self.volname)) + + def tearDown(self): + + # Unmount Volume and Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_delete_file_in_migration(self): + """ + Verify that if a file is picked for migration and then deleted, the + file should be removed successfully. + * First create a big data file of 10GB. + * Rename that file, such that after rename a linkto file is created + (we are doing this to make sure that file is picked for migration.) + * Add bricks to the volume and trigger rebalance using force option. + * When the file has been picked for migration, delete that file from + the mount point. + * Check whether the file has been deleted or not on the mount-point + as well as the back-end bricks. + """ + + # pylint: disable=too-many-statements + # pylint: disable=too-many-locals + # pylint: disable=protected-access + + mountpoint = self.mounts[0].mountpoint + + # Location of source file + src_file = mountpoint + '/file1' + + # Finding a file name such that renaming source file to it will form a + # linkto file + subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] + newhash = find_new_hashed(subvols, "/", "file1") + new_name = str(newhash.newname) + new_host = str(newhash.hashedbrickobject._host) + new_name_path = str(newhash.hashedbrickobject._fqpath)[:-2] + + # Location of destination file to which source file will be renamed + dst_file = '{}/{}'.format(mountpoint, new_name) + # Create a 10GB file source file + cmd = ("dd if=/dev/urandom of={} bs=1024K count=10000".format( + src_file)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, ("File {} creation failed".format(src_file))) + + # Move file such that it hashes to some other subvol and forms linkto + # file + ret = move_file(self.clients[0], src_file, dst_file) + self.assertTrue(ret, "Rename failed") + g.log.info('Renamed file %s to %s', src_file, dst_file) + + # Check if "file_two" is linkto file + ret = is_linkto_file(new_host, + '{}/{}'.format(new_name_path, new_name)) + self.assertTrue(ret, "File is not a linkto file") + g.log.info("File is linkto file") + + # Expanding volume by adding bricks to the volume + ret, _, _ = add_brick(self.mnode, self.volname, + self.add_brick_list, force=True) + self.assertEqual(ret, 0, ("Volume {}: Add-brick failed".format + (self.volname))) + g.log.info("Volume %s: add-brick successful", self.volname) + + # Log Volume Info and Status after expanding the volume + log_volume_info_and_status(self.mnode, self.volname) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, ("Volume {}: Failed to start rebalance".format + (self.volname))) + g.log.info("Volume %s : Rebalance started ", self.volname) + + # Check if rebalance is running and delete the file + status_info = get_rebalance_status(self.mnode, self.volname) + status = status_info['aggregate']['statusStr'] + self.assertEqual(status, 'in progress', "Rebalance is not running") + ret, _, _ = g.run(self.clients[0], (" rm -rf {}".format(dst_file))) + self.assertEqual(ret, 0, ("Cannot delete file {}".format + (dst_file))) + g.log.info("File is deleted") + + # Check if the file is present on the mount point + ret, _, _ = g.run(self.clients[0], ("ls -l {}".format(dst_file))) + self.assertEqual(ret, 2, ("Failed to delete file {}".format + (dst_file))) + + # Check if the file is present on the backend bricks + bricks = get_all_bricks(self.mnode, self.volname) + for brick in bricks: + node, brick_path = brick.split(':') + ret, _, _ = g.run(node, "ls -l {}/{}".format + (brick_path, new_name)) + self.assertEqual(ret, 2, "File is still present on" + " back-end brick: {}".format( + brick_path)) + g.log.info("File is deleted from back-end brick: %s", brick_path) + + # Check if rebalance process is still running + for server in self.servers: + ret, _, _ = g.run(server, "pgrep rebalance") + self.assertEqual(ret, 1, ("Rebalance process is still" + " running on server {}".format + (server))) + g.log.info("Rebalance process is not running") diff --git a/tests/functional/dht/test_dht_copy_dir.py b/tests/functional/dht/test_dht_copy_dir.py index 60ff4406a..a2308b5c2 100644 --- a/tests/functional/dht/test_dht_copy_dir.py +++ b/tests/functional/dht/test_dht_copy_dir.py @@ -202,17 +202,17 @@ class DhtCopyTest(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + # Unmount and cleanup original volume + g.log.info("Starting to Unmount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to create volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_copy_directory(self): diff --git a/tests/functional/dht/test_dht_create_dir.py b/tests/functional/dht/test_dht_create_dir.py index 4b3cbbde6..d3604dcbc 100644 --- a/tests/functional/dht/test_dht_create_dir.py +++ b/tests/functional/dht/test_dht_create_dir.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ # pylint: disable=too-many-statements, undefined-loop-variable # pylint: disable=too-many-branches,too-many-locals,pointless-string-statement +from re import search from glusto.core import Glusto as g from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on @@ -33,10 +34,9 @@ Description: tests to check the dht layouts of files and directories, """ -@runs_on([['replicated', - 'distributed', - 'distributed-replicated', - 'dispersed', 'distributed-dispersed'], +@runs_on([['distributed', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'replicated', + 'arbiter', 'distributed-arbiter'], ['glusterfs']]) class TestDhtClass(GlusterBaseClass): @@ -44,34 +44,30 @@ class TestDhtClass(GlusterBaseClass): Description: tests to check the dht layouts of files and directories, along with their symlinks. """ - @classmethod - def setUpClass(cls): + def setUp(self): - # Calling GlusterBaseClass setUpClass - cls.get_super_method(cls, 'setUpClass')() + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() # Setup Volume and Mount Volume - g.log.info("Starting to Setup Volume and Mount Volume") - ret = cls.setup_volume_and_mount_volume(cls.mounts) + ret = self.setup_volume_and_mount_volume(self.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): + def tearDown(self): - # Cleanup Volume - g.log.info("Starting to clean up Volume %s", cls.volname) - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to clean-up volume") - g.log.info("Successful in cleaning up Volume %s", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_create_directory(self): - g.log.info("creating multiple,multilevel directories") m_point = self.mounts[0].mountpoint command = 'mkdir -p ' + m_point + '/root_dir/test_dir{1..3}' ret, _, _ = g.run(self.mounts[0].client_system, command) @@ -82,8 +78,6 @@ class TestDhtClass(GlusterBaseClass): self.assertEqual(ret, 0, "ls failed on parent directory:root_dir") g.log.info("ls on parent directory: successful") - g.log.info("creating files at different directory levels inside %s", - self.mounts[0].mountpoint) command = 'touch ' + m_point + \ '/root_dir/test_file{1..5} ' + m_point + \ '/root_dir/test_dir{1..3}/test_file{1..5}' @@ -91,7 +85,7 @@ class TestDhtClass(GlusterBaseClass): self.assertEqual(ret, 0, "File creation: failed") command = 'ls ' + m_point + '/root_dir' ret, out, _ = g.run(self.mounts[0].client_system, command) - self.assertEqual(ret, 0, "can't list the created directories") + self.assertEqual(ret, 0, "Failed to list the created directories") list_of_files_and_dirs = out.split('\n') flag = True for x_count in range(3): @@ -104,25 +98,20 @@ class TestDhtClass(GlusterBaseClass): flag = False self.assertTrue(flag, "ls command didn't list all the " "directories and files") - g.log.info("creation of files at multiple levels successful") + g.log.info("Creation of files at multiple levels successful") - g.log.info("creating a list of all directories") command = 'cd ' + m_point + ';find root_dir -type d -print' ret, out, _ = g.run(self.mounts[0].client_system, command) - self.assertEqual(ret, 0, "creation of directory list failed") + self.assertEqual(ret, 0, "Creation of directory list failed") list_of_all_dirs = out.split('\n') del list_of_all_dirs[-1] - g.log.info("verifying that all the directories are present on " - "every brick and the layout ranges are correct") flag = validate_files_in_dir(self.clients[0], m_point + '/root_dir', test_type=k.TEST_LAYOUT_IS_COMPLETE) self.assertTrue(flag, "Layout has some holes or overlaps") g.log.info("Layout is completely set") - g.log.info("Checking if gfid xattr of directories is displayed and" - "is same on all the bricks on the server node") brick_list = get_all_bricks(self.mnode, self.volname) for direc in list_of_all_dirs: list_of_gfid = [] @@ -139,13 +128,11 @@ class TestDhtClass(GlusterBaseClass): for x_count in range(len(list_of_gfid) - 1): if list_of_gfid[x_count] != list_of_gfid[x_count + 1]: flag = False - self.assertTrue(flag, ("the gfid for the directory %s is not " + self.assertTrue(flag, ("The gfid for the directory %s is not " "same on all the bricks", direc)) - g.log.info("the gfid for each directory is the same on all the " + g.log.info("The gfid for each directory is the same on all the " "bricks") - g.log.info("Verify that for all directories mount point " - "should not display xattr") for direc in list_of_all_dirs: list_of_xattrs = get_fattr_list(self.mounts[0].client_system, self.mounts[0].mountpoint @@ -157,13 +144,11 @@ class TestDhtClass(GlusterBaseClass): g.log.info("Verified : mount point not displaying important " "xattrs") - g.log.info("Verifying that for all directories only mount point " - "shows pathinfo xattr") for direc in list_of_all_dirs: fattr = get_fattr(self.mounts[0].client_system, self.mounts[0].mountpoint+'/'+direc, 'trusted.glusterfs.pathinfo') - self.assertTrue(fattr, ("pathinfo not displayed for the " + self.assertTrue(fattr, ("Pathinfo not displayed for the " "directory %s on mount point", direc)) brick_list = get_all_bricks(self.mnode, self.volname) for direc in list_of_all_dirs: @@ -178,118 +163,97 @@ class TestDhtClass(GlusterBaseClass): def test_create_link_for_directory(self): - g.log.info("creating a directory at mount point") m_point = self.mounts[0].mountpoint - test_dir_path = 'test_dir' - fqpath = m_point + '/' + test_dir_path + fqpath_for_test_dir = m_point + '/test_dir' + flag = mkdir(self.clients[0], fqpath_for_test_dir, True) + self.assertTrue(flag, "Failed to create a directory") + fqpath = m_point + '/test_dir/dir{1..3}' flag = mkdir(self.clients[0], fqpath, True) - self.assertTrue(flag, "failed to create a directory") - fqpath = m_point + '/' + test_dir_path + '/dir{1..3}' - flag = mkdir(self.clients[0], fqpath, True) - self.assertTrue(flag, "failed to create sub directories") + self.assertTrue(flag, "Failed to create sub directories") flag = validate_files_in_dir(self.clients[0], - m_point + '/test_dir', + fqpath_for_test_dir, test_type=k.TEST_LAYOUT_IS_COMPLETE) - self.assertTrue(flag, "layout of test directory is complete") - g.log.info("directory created successfully") + self.assertTrue(flag, "Layout of test directory is not complete") + g.log.info("Layout for directory is complete") - g.log.info("creating a symlink for test_dir") sym_link_path = m_point + '/' + 'test_sym_link' - command = 'ln -s ' + m_point + '/test_dir ' + sym_link_path + command = 'ln -s ' + fqpath_for_test_dir + ' ' + sym_link_path ret, _, _ = g.run(self.mounts[0].client_system, command) - self.assertEqual(ret, 0, "failed to create symlink for test_dir") + self.assertEqual(ret, 0, "Failed to create symlink for test_dir") command = 'stat ' + sym_link_path ret, out, _ = g.run(self.mounts[0].client_system, command) - self.assertEqual(ret, 0, "stat command didn't return the details " + self.assertEqual(ret, 0, "Stat command didn't return the details " "correctly") flag = False - g.log.info("checking if the link is symbolic") if 'symbolic link' in out: flag = True - self.assertTrue(flag, "the type of the link is not symbolic") - g.log.info("the link is symbolic") - g.log.info("checking if the sym link points to right directory") - index_start = out.find('->') + 6 - index_end = out.find("\n") - 3 - dir_pointed = out[index_start:index_end] + self.assertTrue(flag, "The type of the link is not symbolic") + g.log.info("The link is symbolic") flag = False - if dir_pointed == m_point + '/' + test_dir_path: + if search(fqpath_for_test_dir, out): flag = True self.assertTrue(flag, "sym link does not point to correct " "location") g.log.info("sym link points to right directory") g.log.info("The details of the symlink are correct") - g.log.info("verifying that inode number of the test_dir " - "and its sym link are different") - command = 'ls -id ' + m_point + '/' + \ - test_dir_path + ' ' + sym_link_path + command = 'ls -id ' + fqpath_for_test_dir + ' ' + sym_link_path ret, out, _ = g.run(self.mounts[0].client_system, command) - self.assertEqual(ret, 0, "inode numbers not retrieved by the " + self.assertEqual(ret, 0, "Inode numbers not retrieved by the " "ls command") list_of_inode_numbers = out.split('\n') - flag = True if (list_of_inode_numbers[0].split(' ')[0] == list_of_inode_numbers[1].split(' ')[0]): flag = False - self.assertTrue(flag, "the inode numbers of the dir and sym link " + self.assertTrue(flag, "The inode numbers of the dir and sym link " "are same") - g.log.info("verified: inode numbers of the test_dir " + g.log.info("Verified: inode numbers of the test_dir " "and its sym link are different") - g.log.info("listing the contents of the test_dir from its sym " - "link") command = 'ls ' + sym_link_path ret, out1, _ = g.run(self.mounts[0].client_system, command) - self.assertEqual(ret, 0, "failed to list the contents using the " + self.assertEqual(ret, 0, "Failed to list the contents using the " "sym link") - command = 'ls ' + m_point + '/' + test_dir_path + command = 'ls ' + fqpath_for_test_dir ret, out2, _ = g.run(self.mounts[0].client_system, command) - self.assertEqual(ret, 0, "failed to list the contents of the " + self.assertEqual(ret, 0, "Failed to list the contents of the " "test_dir using ls command") flag = False if out1 == out2: flag = True - self.assertTrue(flag, "the contents listed using the sym link " + self.assertTrue(flag, "The contents listed using the sym link " "are not the same") - g.log.info("the contents listed using the symlink are" + g.log.info("The contents listed using the symlink are" " the same as that of the test_dir") - g.log.info("verifying that mount point doesn't display important " - "xattrs using the symlink") command = 'getfattr -d -m . -e hex ' + sym_link_path ret, out, _ = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, "failed to retrieve xattrs") list_xattrs = ['trusted.gfid', 'trusted.glusterfs.dht'] - flag = True for xattr in list_xattrs: if xattr in out: flag = False - self.assertTrue(flag, "important xattrs are being compromised" + self.assertTrue(flag, "Important xattrs are being compromised" " using the symlink at the mount point") - g.log.info("verified: mount point doesn't display important " + g.log.info("Verified: mount point doesn't display important " "xattrs using the symlink") - g.log.info("verifying that mount point shows path info xattr for the" - " test_dir and sym link and is same for both") path_info_1 = get_pathinfo(self.mounts[0].client_system, - m_point + '/' + test_dir_path) + fqpath_for_test_dir) path_info_2 = get_pathinfo(self.mounts[0].client_system, sym_link_path) if path_info_1 == path_info_2: flag = True - self.assertTrue(flag, "pathinfos for test_dir and its sym link " + self.assertTrue(flag, "Pathinfos for test_dir and its sym link " "are not same") - g.log.info("pathinfos for test_dir and its sym link are same") + g.log.info("Pathinfos for test_dir and its sym link are same") - g.log.info("verifying readlink on sym link at mount point returns " - "the name of the directory") command = 'readlink ' + sym_link_path ret, out, _ = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, "readlink command returned an error") flag = False - if out.rstrip() == m_point + '/' + test_dir_path: + if out.rstrip() == fqpath_for_test_dir: flag = True self.assertTrue(flag, "readlink did not return the path of the " "test_dir") diff --git a/tests/functional/dht/test_dht_custom_xattr.py b/tests/functional/dht/test_dht_custom_xattr.py new file mode 100644 index 000000000..fa2ad8cdb --- /dev/null +++ b/tests/functional/dht/test_dht_custom_xattr.py @@ -0,0 +1,257 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# pylint: disable=too-many-locals +# pylint: disable=too-many-branches,too-many-statements,too-many-function-args + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.glusterfile import (get_fattr, set_fattr, + create_link_file, + delete_fattr) +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.lib_utils import (append_string_to_file) +from glustolibs.gluster.dht_test_utils import validate_files_in_dir +import glustolibs.gluster.constants as k + + +@runs_on([['distributed', 'distributed-replicated', + 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class TestDhtCustomXattrClass(GlusterBaseClass): + + def check_custom_xattr_visible(self, xattr_val): + """ + Check custom xttar from mount point and on bricks. + """ + # Check custom xattr from mount point + for mount_object in self.mounts: + for fname in self.files_and_soft_links: + attr_val = get_fattr(mount_object.client_system, + fname, 'user.foo', + encode='text') + self.assertEqual(attr_val, xattr_val, + "Custom xattr not found from mount.") + g.log.info("Custom xattr found on mount point.") + + # Check custom xattr on bricks + for brick in get_all_bricks(self.mnode, self.volname): + node, brick_path = brick.split(':') + files_on_bricks = get_dir_contents(node, brick_path) + files = [ + fname.split('/')[3] for fname in self.list_of_files + if fname.split('/')[3] in files_on_bricks] + for fname in files: + attr_val = get_fattr(node, + "{}/{}".format(brick_path, fname), + 'user.foo', encode='text') + self.assertEqual(attr_val, xattr_val, + "Custom xattr not visible on bricks") + g.log.info("Custom xattr found on bricks.") + + def delete_xattr_user_foo(self, list_of_files): + """ + Removes xattr user.foo from all the files. + """ + for fname in list_of_files: + ret = delete_fattr(self.client_node, fname, 'user.foo') + self.assertTrue(ret, "Unable to remove custom xattr for " + "file {}".format(fname)) + g.log.info("Successfully removed custom xattr for each file.") + + def set_xattr_user_foo(self, list_of_files, xattr_val): + """ + sets xattr user.foo on all the files. + """ + for fname in list_of_files: + ret = set_fattr(self.client_node, fname, 'user.foo', + xattr_val) + self.assertTrue(ret, "Unable to create custom xattr " + "for file {}".format(fname)) + g.log.info("Successfully created a custom xattr for all files.") + + def check_for_trusted_glusterfs_pathinfo(self, list_of_files): + """ + Check if trusted.glusterfs.pathinfo is visible. + """ + for fname in list_of_files: + ret = get_fattr(self.client_node, fname, + 'trusted.glusterfs.pathinfo') + self.assertIsNotNone(ret, "pathinfo not visible") + g.log.info("Mount point shows pathinfo xattr for " + "all files") + + def check_mount_point_and_bricks_for_xattr(self, list_of_all_files): + """ + Check xattr on mount point and bricks. + """ + # Check if xattr is visable from mount point + for mount_object in self.mounts: + for fname in list_of_all_files: + ret = get_fattr(mount_object.client_system, + fname, 'user.foo', encode='text') + self.assertIsNone(ret, + "Custom attribute visible at mount " + "point even after deletion") + + # Check if xattr is visable from bricks + for brick in get_all_bricks(self.mnode, self.volname): + node, brick_path = brick.split(':') + files_on_bricks = get_dir_contents(node, brick_path) + files = [ + fname.split('/')[3] for fname in self.list_of_files + if fname.split('/')[3] in files_on_bricks] + for fname in files: + ret = get_fattr(node, "{}/{}".format(brick_path, fname), + 'user.foo', encode='text') + self.assertIsNone(ret, + "Custom attribute visible on " + "brick even after deletion") + + g.log.info("Custom xattr for file is not visible on " + "mount point and bricks") + + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + + # Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("volume clean up failed") + g.log.info("Successful in cleaning up Volume %s", self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_dht_custom_xattr(self): + """ + Test case: + 1.Create a gluster volume and start it. + 2.Create file and link files. + 3.Create a custom xattr for file. + 4.Verify that xattr for file is displayed on + mount point and bricks + 5.Modify custom xattr value and verify that xattr + for file is displayed on mount point and bricks + 6.Verify that custom xattr is not displayed + once you remove it + 7.Create a custom xattr for symbolic link. + 8.Verify that xattr for symbolic link + is displayed on mount point and sub-volume + 9.Modify custom xattr value and verify that + xattr for symbolic link is displayed on + mount point and bricks + 10.Verify that custom xattr is not + displayed once you remove it. + """ + # Initializing variables + mount_point = self.mounts[0].mountpoint + self.client_node = self.mounts[0].client_system + self.list_of_files, list_of_softlinks = [], [] + list_of_hardlinks = [] + + for number in range(1, 3): + + # Create regular files + fname = '{0}/regular_file_{1}'.format(mount_point, + str(number)) + ret = append_string_to_file(self.client_node, fname, + 'Sample content for file.') + self.assertTrue(ret, "Unable to create regular file " + "{}".format(fname)) + self.list_of_files.append(fname) + + # Create hard link for file + hardlink = '{0}/link_file_{1}'.format(mount_point, + str(number)) + ret = create_link_file(self.client_node, fname, hardlink) + self.assertTrue(ret, "Unable to create hard link file " + "{}".format(hardlink)) + list_of_hardlinks.append(hardlink) + + # Create soft link for file + softlink = '{0}/symlink_file_{1}'.format(mount_point, + str(number)) + ret = create_link_file(self.client_node, fname, softlink, + soft=True) + self.assertTrue(ret, "Unable to create symlink file " + "{}".format(softlink)) + list_of_softlinks.append(softlink) + + self.files_and_soft_links = self.list_of_files + list_of_softlinks + + # Check if files are created on the right subvol + ret = validate_files_in_dir( + self.client_node, mount_point, file_type=k.FILETYPE_FILES, + test_type=k.TEST_FILE_EXISTS_ON_HASHED_BRICKS) + self.assertTrue(ret, "Files not created on correct sub-vols") + g.log.info("Files are on correct sub-vols according to " + "the hash value") + + # Set custom xattr on all the regular files + self.set_xattr_user_foo(self.list_of_files, 'bar2') + + # Check if custom xattr is set to all the regular files + self.check_custom_xattr_visible("bar2") + + # Change the custom xattr on all the regular files + self.set_xattr_user_foo(self.list_of_files, 'ABC') + + # Check if xattr is set to all the regular files + self.check_custom_xattr_visible("ABC") + + # Delete Custom xattr from all regular files + self.delete_xattr_user_foo(self.list_of_files) + + # Check mount point and brick for the xattr + list_of_all_files = list_of_hardlinks + self.files_and_soft_links + self.check_mount_point_and_bricks_for_xattr(list_of_all_files) + + # Check if pathinfo xattr is visible + self.check_for_trusted_glusterfs_pathinfo(self.list_of_files) + + # Set custom xattr on all the regular files + self.set_xattr_user_foo(list_of_softlinks, 'bar2') + + # Check if custom xattr is set to all the regular files + self.check_custom_xattr_visible("bar2") + + # Change the custom xattr on all the regular files + self.set_xattr_user_foo(list_of_softlinks, 'ABC') + + # Check if xattr is set to all the regular files + self.check_custom_xattr_visible("ABC") + + # Delete Custom xattr from all regular files + self.delete_xattr_user_foo(list_of_softlinks) + + # Check mount point and brick for the xattr + self.check_mount_point_and_bricks_for_xattr(list_of_all_files) + + # Check if pathinfo xattr is visible + self.check_for_trusted_glusterfs_pathinfo(list_of_softlinks) diff --git a/tests/functional/dht/test_dht_file_rename_when_dest_is_hashed_or_cached_to_diff_subvol_combinations.py b/tests/functional/dht/test_dht_file_rename_when_dest_is_hashed_or_cached_to_diff_subvol_combinations.py new file mode 100644 index 000000000..884b55a2a --- /dev/null +++ b/tests/functional/dht/test_dht_file_rename_when_dest_is_hashed_or_cached_to_diff_subvol_combinations.py @@ -0,0 +1,919 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import re +from glusto.core import Glusto as g +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + create_brickobjectlist, + find_new_hashed, + find_specific_hashed) +from glustolibs.gluster.volume_libs import get_subvols, parse_vol_file +from glustolibs.gluster.glusterfile import (move_file, + is_linkto_file, + get_dht_linkto_xattr) + + +@runs_on([['distributed-arbiter', 'distributed', + 'distributed-replicated', + 'distributed-dispersed'], + ['glusterfs']]) +class DhtFileRenameWithDestFile(GlusterBaseClass): + + def setUp(self): + """ + Setup Volume and Mount Volume + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Change the dist count to 4 in case of 'distributed-replicated' , + # 'distributed-dispersed' and 'distributed-arbiter' + if self.volume_type in ("distributed-replicated", + "distributed-dispersed", + "distributed-arbiter"): + self.volume['voltype']['dist_count'] = 4 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + self.mount_point = self.mounts[0].mountpoint + + self.subvols = (get_subvols( + self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(self.subvols, "failed to get subvols") + + def tearDown(self): + """ + Unmount Volume and Cleanup Volume + """ + # Unmount Volume and Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Unmount Volume and Cleanup Volume: Fail") + g.log.info("Unmount Volume and Cleanup Volume: Success") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_file_and_get_hashed_subvol(self, file_name): + """ Creates a file and return its hashed subvol + + Args: + file_name(str): name of the file to be created + Returns: + hashed_subvol object: An object of type BrickDir type + representing the hashed subvolume + + subvol_count: The subvol index in the subvol list + + source_file: Path to the file created + + """ + # pylint: disable=unsubscriptable-object + + # Create Source File + source_file = "{}/{}".format(self.mount_point, file_name) + ret, _, err = g.run(self.mounts[0].client_system, + ("touch %s" % source_file)) + self.assertEqual(ret, 0, ("Failed to create {} : err {}" + .format(source_file, err))) + g.log.info("Successfully created the source file") + + # Find the hashed subvol for source file + source_hashed_subvol, count = find_hashed_subvol(self.subvols, + "/", + file_name) + self.assertIsNotNone(source_hashed_subvol, + "Couldn't find hashed subvol for the source file") + return source_hashed_subvol, count, source_file + + @staticmethod + def _verify_link_file_exists(brickdir, file_name): + """ Verifies whether a file link is present in given subvol + Args: + brickdir(Class Object): BrickDir object containing data about + bricks under a specific subvol + Returns: + (bool): True if link file exists else false + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + file_path = brickdir._fqpath + file_name + file_stat = get_file_stat(brickdir._host, file_path) + if file_stat is None: + g.log.error("Failed to get File stat for %s", file_path) + return False + if not file_stat['access'] == "1000": + g.log.error("Access value not 1000 for %s", file_path) + return False + + # Check for file type to be'sticky empty', have size of 0 and + # have the glusterfs.dht.linkto xattr set. + ret = is_linkto_file(brickdir._host, file_path) + if not ret: + g.log.error("%s is not a linkto file", file_path) + return False + return True + + @staticmethod + def _verify_file_exists(brick_dir, file_name): + """ Verifies whether a file is present in given subvol or not + Args: + brick_dir(Class Object): BrickDir object containing data about + bricks under a specific subvol + file_name(str): Name of the file to be searched + Returns: + (bool): True if link file exists else false + """ + # pylint: disable=protected-access + + cmd = "[ -f {} ]".format(brick_dir._fqpath + (str(file_name))) + ret, _, _ = g.run(brick_dir._host, cmd) + if ret: + return False + return True + + @staticmethod + def _get_remote_subvolume(vol_file_data, brick_name): + """ Verifies whether a file is present in given subvol or not + Args: + vol_file_data(dict): Dictionary containing data of .vol file + brick_name(str): Brick path + Returns: + (str): Remote subvol name + (None): If error occurred + """ + try: + brick_name = re.search(r'[a-z0-9\-\_]*', brick_name).group() + remote_subvol = (vol_file_data[ + brick_name]['option']['remote-subvolume']) + except KeyError: + return None + return remote_subvol + + def _verify_file_links_to_specified_destination(self, host, file_path, + dest_file): + """ Verifies whether a file link points to the specified destination + Args: + host(str): Host at which commands are to be executed + file_path(str): path to the link file + dest_file(str): path to the dest file to be pointed at + Returns: + (bool) : Based on whether the given file points to dest or not + """ + link_to_xattr = get_dht_linkto_xattr(host, file_path) + # Remove unexpected chars in the value, if any + link_to_xattr = re.search(r'[a-z0-9\-\_]*', link_to_xattr).group() + if link_to_xattr is None: + g.log.error("Failed to get trusted.glusterfs.dht.linkto") + return False + + # Get the remote-subvolume for the corresponding linkto xattr + path = ("/var/lib/glusterd/vols/{}/{}.tcp-fuse.vol" + .format(self.volname, self.volname)) + vol_data = parse_vol_file(self.mnode, path) + if not vol_data: + g.log.error("Failed to parse the file %s", path) + return False + + remote_subvol = self._get_remote_subvolume(vol_data, link_to_xattr) + if remote_subvol is None: + # In case, failed to find the remote subvol, get all the + # subvolumes and then check whether the file is present in + # any of those sunbol + subvolumes = vol_data[link_to_xattr]['subvolumes'] + for subvol in subvolumes: + remote_subvol = self._get_remote_subvolume(vol_data, + subvol) + if remote_subvol: + subvol = re.search(r'[a-z0-9\-\_]*', subvol).group() + remote_host = ( + vol_data[subvol]['option']['remote-host']) + # Verify the new file is in the remote-subvol identified + cmd = "[ -f {}/{} ]".format(remote_subvol, dest_file) + ret, _, _ = g.run(remote_host, cmd) + if not ret: + return True + g.log.error("The given link file doesn't point to any of " + "the subvolumes") + return False + else: + remote_host = vol_data[link_to_xattr]['option']['remote-host'] + # Verify the new file is in the remote-subvol identified + cmd = "[ -f {}/{} ]".format(remote_subvol, dest_file) + ret, _, _ = g.run(remote_host, cmd) + if not ret: + return True + return False + + def test_file_rename_when_dest_doesnt_hash_src_cached_or_hashed(self): + """ + - Destination file should exist + - Source file is hashed on sub volume(s1) and cached on + another subvolume(s2) + - Destination file should be hased to subvolume(s3) other + than above two subvolumes + - Destination file hased on subvolume(s3) but destination file + should be cached on same subvolume(s2) where source file is stored + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination file hashed on subvolume and should link + to new destination file + - source link file should be removed + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + + # Create source file and Get hashed subvol (s2) + src_subvol, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination file, which hashes + # to another subvol (s1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "couldn't find new hashed for destination file") + + # Rename the source file to the new file name + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Identify a file name for dest to get stored in S2 + dest_cached_subvol = find_specific_hashed(self.subvols, + "/", + src_subvol) + # Create the file with identified name + _, _, dst_file = ( + self._create_file_and_get_hashed_subvol( + str(dest_cached_subvol.newname))) + # Verify its in S2 itself + self.assertEqual(dest_cached_subvol.subvol_count, src_count, + ("The subvol found for destination is not same as " + "that of the source file cached subvol")) + + # Find a subvol (s3) for dest file to linkto, other than S1 and S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (src_count, new_hashed.subvol_count): + subvol_new = brickdir + break + + new_hashed2 = find_specific_hashed(self.subvols, + "/", + subvol_new) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Verify the subvol is not same as S1(src_count) and S2(dest_count) + self.assertNotEqual(new_hashed2.subvol_count, src_count, + ("The subvol found for destination is same as that" + " of the source file cached subvol")) + self.assertNotEqual(new_hashed2.subvol_count, new_hashed.subvol_count, + ("The subvol found for destination is same as that" + " of the source file hashed subvol")) + + # Rename the dest file to the new file name + dst_file_ln = "{}/{}".format(self.mount_point, + str(new_hashed2.newname)) + ret = move_file(self.mounts[0].client_system, dst_file, dst_file_ln) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(dst_file, dst_file_ln))) + + # Verify the Dest link file is stored on hashed sub volume(s3) + dest_link_subvol = new_hashed2.hashedbrickobject + ret = self._verify_link_file_exists(dest_link_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(dest_link_subvol._fqpath, + str(new_hashed2.newname)))) + + # Move/Rename Source File to Dest + src_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, src_file, dst_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(src_file, dst_file))) + + # Verify Source file is removed + ret = self._verify_file_exists(src_subvol, "test_source_file") + self.assertFalse(ret, "The source file is still present in {}" + .format(src_subvol._fqpath)) + + # Verify Source link is removed + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, "The source link file is still present in {}" + .format(src_link_subvol._fqpath)) + + # Verify the Destination link is on hashed subvolume + ret = self._verify_link_file_exists(dest_link_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(dest_link_subvol._fqpath, + str(new_hashed2.newname)))) + + # Verify the dest link file points to new destination file + file_path = dest_link_subvol._fqpath + str(new_hashed2.newname) + ret = (self._verify_file_links_to_specified_destination( + dest_link_subvol._host, file_path, + str(dest_cached_subvol.newname))) + self.assertTrue(ret, "The dest link file not pointing towards " + "the desired file") + g.log.info("The Destination link file is pointing to new file" + " as expected") + + def test_file_rename_when_dest_hash_src_cached(self): + """ + - Destination file should exist + - Source file hashed sub volume(s1) and cached on another subvolume(s2) + - Destination file should be hased to subvolume where source file is + stored(s2) + - Destination file hased on subvolume(s2) but should be cached on + some other subvolume(s3) than this two subvolume + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be removed + - source link file should be removed + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + + # Create source file and Get hashed subvol (s2) + src_subvol, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination file, which hashes + # to another subvol (s2) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "couldn't find new hashed for destination file") + + # Rename the source file to the new file name + src_hashed = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, src_hashed) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, src_hashed))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Find a subvol (s3) for dest file to linkto, other than S1 and S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (src_count, new_hashed.subvol_count): + subvol_new = brickdir + break + + new_hashed2 = find_specific_hashed(self.subvols, + "/", + subvol_new) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Create a file in the subvol S3 + dest_subvol, count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed2.newname))) + + # Verify the subvol is not same as S1 and S2 + self.assertNotEqual(count, src_count, + ("The subvol found for destination is same as that" + " of the source file cached subvol")) + self.assertNotEqual(count, new_hashed.subvol_count, + ("The subvol found for destination is same as that" + " of the source file hashed subvol")) + + # Find a file name that hashes to S2 + dest_hashed = find_specific_hashed(self.subvols, + "/", + src_subvol) + self.assertIsNotNone(dest_hashed, + "could not find new hashed for dstfile") + + # Rename destination to hash to S2 and verify + dest = "{}/{}".format(self.mount_point, str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, dest_file, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(dest_file, dest))) + + # Rename Source File to Dest + ret = move_file(self.mounts[0].client_system, src_hashed, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(src_hashed, dest))) + + # Verify Destination File is removed + ret = self._verify_file_exists(new_hashed2.hashedbrickobject, + str(new_hashed2.newname)) + self.assertFalse(ret, "The Destination file is still present in {}" + .format(dest_subvol._fqpath)) + + # Verify Source link is removed + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, "The source link file is still present in {}" + .format(src_link_subvol._fqpath)) + + # Verify Destination Link is removed + ret = self._verify_link_file_exists(dest_hashed.hashedbrickobject, + str(dest_hashed.newname)) + self.assertFalse(ret, "The Dest link file is still present in {}" + .format(dest_hashed.hashedbrickobject._fqpath)) + + def test_file_rename_when_src_linked_and_dest_hash_other(self): + """ + - Destination file should exist + - Source link file hashed on sub volume(s1) and cached on another + subvolume(s2) + - Destination file should be hased to some other + subvolume(s3)(neither s1 nor s2) + - Destination file hased on subvolume(s3) but cached on + subvolume(s1) where source file is hashed + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be there on hashed subvolume + and should link to new destination file + - source link file should be removed + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + + # Create source file and Get hashed subvol (s2) + _, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination file, which hashes + # to another subvol (s2) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "couldn't find new hashed for destination file") + + # Rename the source file to the new file name + src_hashed = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, src_hashed) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, src_hashed))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Find a file name that hashes to S1 + dest_hashed = find_specific_hashed(self.subvols, + "/", + new_hashed.hashedbrickobject, + new_hashed.newname) + self.assertIsNotNone(dest_hashed, + "could not find new hashed for dstfile") + + # Create a file in the subvol S1 + dest_subvol, count, _ = self._create_file_and_get_hashed_subvol( + str(dest_hashed.newname)) + + # Verify the subvol is S1 + self.assertEqual(count, new_hashed.subvol_count, + ("The subvol found for destination is not same as" + " that of the source file hashed subvol")) + + # Find a subvol (s3) for dest file to linkto, other than S1 and S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (src_count, new_hashed.subvol_count): + subvol_new = brickdir + break + + new_hashed2 = find_specific_hashed(self.subvols, + "/", + subvol_new) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Rename destination to hash to S3 and verify + dest_src = "{}/{}".format(self.mount_point, str(dest_hashed.newname)) + dest = "{}/{}".format(self.mount_point, str(new_hashed2.newname)) + ret = move_file(self.mounts[0].client_system, dest_src, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(dest_src, dest))) + + # Rename Source File to Dest + ret = move_file(self.mounts[0].client_system, src_hashed, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(src_hashed, dest))) + + # Verify Destination File is removed + ret = self._verify_file_exists(dest_hashed.hashedbrickobject, + str(dest_hashed.newname)) + self.assertFalse(ret, "The Destination file is still present in {}" + .format(dest_subvol._fqpath)) + + # Verify Source link is removed + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, "The source link file is still present in {}" + .format(src_link_subvol._fqpath)) + + # Verify Destination Link is present and points to new dest file + ret = self._verify_link_file_exists(new_hashed2.hashedbrickobject, + str(new_hashed2.newname)) + self.assertTrue(ret, "The Dest link file is not present in {}" + .format(new_hashed2.hashedbrickobject._fqpath)) + + file_path = new_hashed2.hashedbrickobject._fqpath + str( + new_hashed2.newname) + ret = (self._verify_file_links_to_specified_destination( + new_hashed2.hashedbrickobject._host, file_path, + str(new_hashed2.newname))) + self.assertTrue(ret, "The dest link file not pointing towards " + "the desired file") + g.log.info("The Destination link file is pointing to new file" + " as expected") + + def test_file_rename_when_dest_hash_src_cached_but_hash_other(self): + """ + - Destination file should exist + - Source file hashed on sub volume(s1) and cached + on another subvolume(s2) + - Destination file should be hased to same subvolume(s1) + where source file is hashed + - Destination hased on subvolume(s1) but cached on some other + subvolume(s3)(neither s1 nor s2) + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be there on hashed subvolume + and should link to new destination file + - source link file should be removed + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + + # Create source file and Get hashed subvol (s2) + _, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination file, which hashes + # to another subvol (s2) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "couldn't find new hashed for destination file") + + # Rename the source file to the new file name + src_hashed = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, src_hashed) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, src_hashed))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Destination file cached on S3. + # Find a subvol (s3) for dest file to linkto, other than S1 and S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (src_count, new_hashed.subvol_count): + subvol_new = brickdir + break + + dest_cached = find_specific_hashed(self.subvols, + "/", + subvol_new) + self.assertIsNotNone(dest_cached, + "could not find new hashed for dstfile") + + # Create a file in S3 + _, count, dest_src = self._create_file_and_get_hashed_subvol( + str(dest_cached.newname)) + + # Verify the subvol is not S2 and S1 + self.assertNotEqual(count, new_hashed.subvol_count, + ("The subvol found for destination is same as " + "that of the source file hashed subvol")) + self.assertNotEqual(count, src_count, + ("The subvol found for destination is same as " + "that of the source file cached subvol")) + + # Rename Destination file such that it hashes to S1 + dest_hashed = find_specific_hashed(self.subvols, + "/", + new_hashed.hashedbrickobject, + new_hashed.newname) + # Verify its S1 + self.assertEqual(dest_hashed.subvol_count, new_hashed.subvol_count, + ("The subvol found for destination is not same as " + "that of the source file hashed subvol")) + + # Move dest to new name + dest = "{}/{}".format(self.mount_point, str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, dest_src, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(dest_src, dest))) + + # Move Source file to Dest + src = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, src, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(src, dest))) + + # Verify Destination File is removed + ret = self._verify_file_exists(dest_cached.hashedbrickobject, + str(dest_cached.newname)) + self.assertFalse(ret, "The Dest file is still present in {}" + .format(dest_cached.hashedbrickobject._fqpath)) + + # Verify Source link is removed + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, "The source link file is still present in {}" + .format(src_link_subvol._fqpath)) + + # Verify Destination Link is present and points to new dest file + ret = self._verify_link_file_exists(dest_hashed.hashedbrickobject, + str(dest_hashed.newname)) + self.assertTrue(ret, "The Dest link file is not present in {}" + .format(dest_hashed.hashedbrickobject._fqpath)) + + file_path = dest_hashed.hashedbrickobject._fqpath + str( + dest_hashed.newname) + ret = (self._verify_file_links_to_specified_destination( + dest_hashed.hashedbrickobject._host, file_path, + str(dest_hashed.newname))) + self.assertTrue(ret, "The dest link file not pointing towards " + "the desired file") + g.log.info("The Destination link file is pointing to new file" + " as expected") + + def test_file_rename_when_dest_neither_hash_cache_to_src_subvols(self): + """ + - Destination file should exist + - Source file hashed on sub volume(s1) and cached on + another subvolume(s2) + - Destination file should be hased to some other subvolume(s3) + (neither s1 nor s2) + - Destination file hased on subvolume(s3) but cached on + remaining subvolume(s4) + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be there on hashed subvolume + and should link to new destination file + - source link file should be removed + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + + # Create source file and Get hashed subvol (s2) + _, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination file, which hashes + # to another subvol (s2) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "couldn't find new hashed for destination file") + + # Rename the source file to the new file name + src_hashed = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, src_hashed) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, src_hashed))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Destination file cached on S4. + # Find a subvol (s4) for dest file to linkto, other than S1 and S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (src_count, new_hashed.subvol_count): + subvol_new = brickdir + break + + dest_cached = find_specific_hashed(self.subvols, + "/", + subvol_new) + self.assertIsNotNone(dest_cached, + "could not find new hashed for dstfile") + # Create a file in S3 + _, _, dest_src = self._create_file_and_get_hashed_subvol( + str(dest_cached.newname)) + + # Verify the subvol is not S2 and S1 + self.assertNotEqual(dest_cached.subvol_count, new_hashed.subvol_count, + ("The subvol found for destination is same as " + "that of the source file hashed subvol")) + self.assertNotEqual(dest_cached.subvol_count, src_count, + ("The subvol found for destination is same as " + "that of the source file cached subvol")) + + # Identify a name for dest that hashes to another subvol S3 + # Find a subvol (s3) for dest file to linkto, other than S1 and S2 and + # S4 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (src_count, new_hashed.subvol_count, + dest_cached.subvol_count): + subvol_new = brickdir + break + + dest_hashed = find_specific_hashed(self.subvols, + "/", + subvol_new) + + # Move dest to new name + dest = "{}/{}".format(self.mount_point, str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, dest_src, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(dest_src, dest))) + + # Move Source file to Dest + src = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, src, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(src, dest))) + + # Verify Destination File is removed + ret = self._verify_file_exists(dest_cached.hashedbrickobject, + str(dest_cached.newname)) + self.assertFalse(ret, "The Source file is still present in {}" + .format(dest_cached.hashedbrickobject._fqpath)) + + # Verify Source link is removed + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, "The source link file is still present in {}" + .format(src_link_subvol._fqpath)) + + # Verify Destination Link is present and points to new dest file + ret = self._verify_link_file_exists(dest_hashed.hashedbrickobject, + str(dest_hashed.newname)) + self.assertTrue(ret, "The Dest link file is not present in {}" + .format(dest_hashed.hashedbrickobject._fqpath)) + + file_path = dest_hashed.hashedbrickobject._fqpath + str( + dest_hashed.newname) + ret = (self._verify_file_links_to_specified_destination( + dest_hashed.hashedbrickobject._host, file_path, + str(dest_hashed.newname))) + self.assertTrue(ret, "The dest link file not pointing towards " + "the desired file") + g.log.info("The Destination link file is pointing to new file" + " as expected") + + def test_file_rename_when_dest_hash_src_hashed_but_cache_diff(self): + """ + - Destination file should exist + - Source file is stored on hashed subvolume it self + - Destination file should be hased to some other subvolume(s2) + - Destination file hased on subvolume(s2) but cached on some other + subvolume(s3)(neither s1 nor s2) + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be there on hashed subvolume and + should link to new destination file + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + + # Create source file and Get hashed subvol (s1) + _, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination to hash to some subvol S3 + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "couldn't find new hashed for destination file") + + # Create Dest file in S3 + dest_cached, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed.newname))) + + # Verify S1 and S3 are not same + self.assertNotEqual(src_count, dest_count, + ("The destination file is cached to the source " + "cached subvol")) + + # Find new name for dest file, that it hashes to some other subvol S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (src_count, dest_count): + subvol_new = brickdir + break + + dest_hashed = find_specific_hashed(self.subvols, + "/", + subvol_new) + # Move dest to new name + dest = "{}/{}".format(self.mount_point, str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, dest_file, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(dest_file, dest))) + + # Move Source file to Dest + ret = move_file(self.mounts[0].client_system, source_file, dest) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest))) + + # Verify Destination File is removed + ret = self._verify_file_exists(dest_cached, + str(new_hashed.newname)) + self.assertFalse(ret, "The Source file is still present in {}" + .format(dest_cached._fqpath)) + + # Verify Destination Link is present and points to new dest file + ret = self._verify_link_file_exists(dest_hashed.hashedbrickobject, + str(dest_hashed.newname)) + self.assertTrue(ret, "The Dest link file is not present in {}" + .format(dest_hashed.hashedbrickobject._fqpath)) + + file_path = dest_hashed.hashedbrickobject._fqpath + str( + dest_hashed.newname) + ret = (self._verify_file_links_to_specified_destination( + dest_hashed.hashedbrickobject._host, file_path, + str(dest_hashed.newname))) + self.assertTrue(ret, "The dest link file not pointing towards " + "the desired file") + g.log.info("The Destination link file is pointing to new file" + " as expected") diff --git a/tests/functional/dht/test_dht_file_rename_when_destination_file_exists.py b/tests/functional/dht/test_dht_file_rename_when_destination_file_exists.py new file mode 100644 index 000000000..9673710e0 --- /dev/null +++ b/tests/functional/dht/test_dht_file_rename_when_destination_file_exists.py @@ -0,0 +1,540 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + create_brickobjectlist, + find_new_hashed, + find_specific_hashed) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.glusterfile import move_file, is_linkto_file + + +@runs_on([['distributed', 'distributed-replicated', + 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class DhtFileRenameWithDestFile(GlusterBaseClass): + + def setUp(self): + """ + Setup Volume and Mount Volume + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Change the dist count to 4 in case of 'distributed-replicated' , + # 'distributed-dispersed' and 'distributed-arbiter' + if self.volume_type in ("distributed-replicated", + "distributed-dispersed", + "distributed-arbiter"): + self.volume['voltype']['dist_count'] = 4 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + self.mount_point = self.mounts[0].mountpoint + + self.subvols = (get_subvols( + self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(self.subvols, "failed to get subvols") + + def tearDown(self): + """ + Unmount Volume and Cleanup Volume + """ + # Unmount Volume and Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Unmount Volume and Cleanup Volume: Fail") + g.log.info("Unmount Volume and Cleanup Volume: Success") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_file_and_get_hashed_subvol(self, file_name): + """ Creates a file and return its hashed subvol + + Args: + file_name(str): name of the file to be created + Returns: + hashed_subvol object: An object of type BrickDir type + representing the hashed subvolume + + subvol_count: The subvol index in the subvol list + + source_file: Path to the file created + + """ + # pylint: disable=unsubscriptable-object + + # Create Source File + source_file = "{}/{}".format(self.mount_point, file_name) + ret, _, err = g.run(self.mounts[0].client_system, + ("touch %s" % source_file)) + self.assertEqual(ret, 0, ("Failed to create {} : err {}" + .format(source_file, err))) + g.log.info("Successfully created the source file") + + # Find the hashed subvol for source file + source_hashed_subvol, count = find_hashed_subvol(self.subvols, + "/", + file_name) + self.assertIsNotNone(source_hashed_subvol, + "Couldn't find hashed subvol for the source file") + return source_hashed_subvol, count, source_file + + @staticmethod + def _verify_link_file_exists(brickdir, file_name): + """ Verifies whether a file link is present in given subvol + Args: + brickdir(Class Object): BrickDir object containing data about + bricks under a specific subvol + Returns: + True/False(bool): Based on existance of file link + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + file_path = brickdir._fqpath + file_name + file_stat = get_file_stat(brickdir._host, file_path) + if file_stat is None: + g.log.error("Failed to get File stat for %s", file_path) + return False + if not file_stat['access'] == "1000": + g.log.error("Access value not 1000 for %s", file_path) + return False + + # Check for file type to be'sticky empty', have size of 0 and + # have the glusterfs.dht.linkto xattr set. + ret = is_linkto_file(brickdir._host, file_path) + if not ret: + g.log.error("%s is not a linkto file", file_path) + return False + return True + + @staticmethod + def _verify_file_exists(brick_dir, file_name): + """ Verifies whether a file is present in given subvol or not + Args: + brickdir(Class Object): BrickDir object containing data about + bricks under a specific subvol + file_name(str): Name of the file to be searched + Returns: + True/False(bool): Based on existance of file + """ + # pylint: disable=protected-access + + cmd = "[ -f {} ]".format(brick_dir._fqpath + (str(file_name))) + ret, _, _ = g.run(brick_dir._host, cmd) + if ret != 0: + return False + return True + + def test_dht_file_rename_dest_exists_src_and_dest_hash_diff(self): + """ + case 6 : + - Destination file should exist + - Source file is stored on hashed subvolume(s1) it self + - Destination file should be hashed to some other subvolume(s2) + - Destination file is stored on hashed subvolume + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination hashed file should be created on its hashed + subvolume(s2) + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Create source file and Get hashed subvol (s1) + _, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination file, which hashes + # to another subvol (s2) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "could'nt find new hashed for destination file") + + # create destination_file and get its hashed subvol (s2) + dest_hashed_subvol, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed.newname))) + + # Verify the subvols are not same for source and destination files + self.assertNotEqual(src_count, + dest_count, + "The subvols for src and dest are same.") + + # Rename the source file to the destination file + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}".format( + source_file, dest_file)) + + # Verify destination file is removed + ret = self._verify_file_exists(dest_hashed_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed.newname), + dest_hashed_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + # Verify the Destination link is found in new subvol (s2) + ret = self._verify_link_file_exists(dest_hashed_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(str(new_hashed.newname), + dest_hashed_subvol._fqpath))) + g.log.info("New hashed volume has the expected linkto file") + + def test_dht_file_rename_dest_exists_src_and_dest_hash_same(self): + """ + Case 7: + - Destination file should exist + - Source file is stored on hashed subvolume(s1) it self + - Destination file should be hashed to same subvolume(s1) + - Destination file is stored on hashed subvolume + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed to destination file + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Create soruce file and Get hashed subvol (s1) + source_hashed_subvol, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a new file name for destination file that hashes + # to same subvol (s1) + new_hashed = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + self.assertIsNotNone(new_hashed, "Couldn't find a new hashed subvol " + "for destination file") + + # Create destination_file and get its hashed subvol (should be s1) + dest_hashed_subvol, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed.newname))) + + # Verify the subvols are not same for source and destination files + self.assertEqual(src_count, dest_count, + "The subvols for src and dest are not same.") + + # Rename the source file to the destination file + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, ("Failed to move files {} and {}" + .format(source_file, dest_file))) + + # Verify the file move and the destination file is hashed to + # same subvol or not + _, rename_count = find_hashed_subvol(self.subvols, + "/", + str(new_hashed.newname)) + self.assertEqual(dest_count, + rename_count, + ("The subvols for source : {} and dest : {} are " + "not same.".format(source_hashed_subvol._fqpath, + dest_hashed_subvol._fqpath))) + + # Verify destination file is removed + ret = self._verify_file_exists(dest_hashed_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed.newname), + dest_hashed_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + def test_file_rename_dest_exist_and_not_hash_src_srclink_subvol(self): + """ + Case 8: + - Destination file should exist + - Source file is hashed sub volume(s1) and + cached on another subvolume(s2) + - Destination file should be hashed to some other subvolume(s3) + (should not be same subvolumes mentioned in above condition) + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Souce hashed file should be removed + - Destination hashed file should be created on its hashed subvolume(s3) + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + # pylint: disable=unsubscriptable-object + + # Find a non hashed subvolume(or brick) + # Create soruce file and Get hashed subvol (s2) + _, count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file to create link in hashed subvol -(s1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "could not find new hashed for dstfile") + count2 = new_hashed.subvol_count + # Rename the source file to the new file name + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Find a subvol (s3) other than S1 and S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (count, count2): + subvol_new = brickdir + break + + new_hashed2 = find_specific_hashed(self.subvols, + "/", + subvol_new) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Create destination file in a new subvol (s3) + dest_hashed_subvol, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed2.newname))) + + # Verify the subvol is not same as S1 or S2 + self.assertNotEqual(count2, dest_count, + ("The subvols for src :{} and dest : {} are same." + .format(count2, dest_count))) + # Verify the subvol is not same as S1 or S2 + self.assertNotEqual(count, dest_count, + ("The subvols for src :{} and dest : {} are same." + .format(count, dest_count))) + + # Rename the source file to the destination file + source_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify destination file is removed + ret = self._verify_file_exists(dest_hashed_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed.newname), + dest_hashed_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + # Check that the source link file is removed. + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The New hashed volume {} still have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + g.log.info("The source link file is removed") + + # Check Destination link file is created on its hashed sub-volume(s3) + ret = self._verify_link_file_exists(dest_hashed_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(dest_hashed_subvol._fqpath, + str(new_hashed2.newname)))) + g.log.info("Destinaion link is created in desired subvol") + + def test_file_rename_dest_exist_and_hash_to_src_subvol(self): + """ + Case 9: + - Destination file should exist + - Source file is hashed sub volume(s1) and + cached on another subvolume(s2) + - Destination file should be hashed to subvolume where source file + is cached(s2) + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Souce hashed file should be removed + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Get hashed subvol (S2) + source_hashed_subvol, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file to create link in hashed subvol -(s1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could not find new hashed for {}" + .format(source_file))) + + # Rename the source file to the new file name + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Get a file name for dest file to hash to the subvol s2 + new_hashed2 = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + self.assertIsNotNone(new_hashed2, "Could not find a name hashed" + "to the given subvol") + + # Create destination file in the subvol (s2) + dest_hashed_subvol, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed2.newname))) + + # Verify the subvol is same as S2 + self.assertEqual(src_count, dest_count, + "The subvols for src and dest are not same.") + + # Move the source file to the new file name + source_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify destination file is removed + ret = self._verify_file_exists(dest_hashed_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed.newname), + dest_hashed_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + # Check that the source link file is removed. + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The New hashed volume {} still have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + g.log.info("The source link file is removed") + + def test_file_rename_dest_exist_and_hash_to_srclink_subvol(self): + """ + Case 10: + - Destination file should exist + - Source file is hashed sub volume(s1) and + cached on another subvolume(s2) + - Destination file should be hashed to same subvolume(s1) where source + file is hashed. + mv <source_file> <destination_file> + - Destination file is removed. + - Source file(cached) should be renamed to destination file + - Source file(hashed) should be removed. + - Destination hahshed file should be created on its + hashed subvolume(s1) + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Get hashed subvol s2) + _, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file to create link in another subvol - (s1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could not find new hashed subvol " + "for {}".format(source_file))) + + self.assertNotEqual(src_count, + new_hashed.subvol_count, + "New file should hash to different sub-volume") + + # Rename the source file to the new file name + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Get a file name for dest to hash to the subvol s1 + new_hashed2 = find_specific_hashed(self.subvols, + "/", + src_link_subvol, + new_hashed.newname) + self.assertIsNotNone(new_hashed2, ("Couldn't find a name hashed to the" + " given subvol {}" + .format(src_link_subvol))) + # Create destination file in the subvol (s2) + dest_hashed_subvol, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed2.newname))) + + # Verify the subvol is same as S1 + self.assertEqual(new_hashed.subvol_count, dest_count, + "The subvols for src and dest are not same.") + + # Move the source file to the new file name + source_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + dest_file = "{}/{}".format(self.mount_point, str(new_hashed2.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, "Failed to move file") + + # Verify destination file is removed + ret = self._verify_file_exists(dest_hashed_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed.newname), + dest_hashed_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + # Check that the source link file is removed. + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The hashed volume {} still have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + g.log.info("The source link file is removed") + + # Check Destination link file is created on its hashed sub-volume(s1) + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed2.newname)))) + g.log.info("Destinaion link is created in desired subvol") diff --git a/tests/functional/dht/test_directory_custom_extended_attributes.py b/tests/functional/dht/test_directory_custom_extended_attributes.py index b391593b1..fd1493622 100644 --- a/tests/functional/dht/test_directory_custom_extended_attributes.py +++ b/tests/functional/dht/test_directory_custom_extended_attributes.py @@ -117,7 +117,8 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): mount_point, folder_name) ret = get_fattr(mount_point.client_system, mount_point.mountpoint, - 'trusted.glusterfs.pathinfo') + 'trusted.glusterfs.pathinfo', + encode="text") self.assertIsNotNone(ret, "trusted.glusterfs.pathinfo is not " "presented on %s:%s" % @@ -139,7 +140,7 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): g.log.debug('Check xarttr user.foo on %s:%s', mount_point.client_system, folder_name) ret = get_fattr(mount_point.client_system, folder_name, - 'user.foo') + 'user.foo', encode="text") self.assertEqual(ret, 'bar2', "Xattr attribute user.foo is not presented on " "mount point %s and directory %s" % @@ -153,7 +154,8 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): brick_path = dir_prefix.format(root=brick_dir, client_index=mount_index) - ret = get_fattr(brick_server, brick_path, 'user.foo') + ret = get_fattr(brick_server, brick_path, 'user.foo', + encode="text") g.log.debug('Check custom xattr for directory on brick %s:%s', brick_server, brick_path) @@ -177,7 +179,8 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): g.log.debug('Looking if custom extra attribute user.foo is ' 'presented on mount or on bricks after deletion') self.assertIsNone(get_fattr(mount_point.client_system, - folder_name, 'user.foo'), + folder_name, 'user.foo', + encode="text"), "Xattr user.foo is presented on mount point" " %s:%s after deletion" % (mount_point.mountpoint, folder_name)) @@ -277,7 +280,7 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): g.log.debug('Check mountpoint and bricks for custom xattribute') self.assertEqual('bar2', get_fattr(mount_point.client_system, linked_folder_name, - 'user.foo'), + 'user.foo', encode="text"), 'Custom xattribute is not presented on ' 'mount point %s:%s' % (mount_point.client_system, linked_folder_name)) @@ -297,7 +300,8 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): continue self.assertEqual(get_fattr(brick_server, brick_path, - 'user.foo'), 'bar2', + 'user.foo', encode="text"), + 'bar2', "Actual: custom attribute not " "found on brick %s:%s" % ( brick_server, brick_path)) @@ -319,7 +323,8 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): "after deletion", mount_point.client_system, linked_folder_name) self.assertIsNone(get_fattr(mount_point.client_system, - linked_folder_name, 'user.foo'), + linked_folder_name, 'user.foo', + encode="text"), "Expected: xattr user.foo to be not presented on" " %s:%s" % (mount_point.client_system, linked_folder_name)) @@ -339,7 +344,7 @@ class TestDirectoryCustomExtendedAttributes(GlusterBaseClass): continue self.assertIsNone(get_fattr(brick_server, brick_path, - 'user.foo'), + 'user.foo', encode="text"), "Extended custom attribute is presented on " "%s:%s after deletion" % (brick_server, brick_path)) diff --git a/tests/functional/dht/test_directory_healing.py b/tests/functional/dht/test_directory_healing.py index dd1586711..e01a0240f 100644 --- a/tests/functional/dht/test_directory_healing.py +++ b/tests/functional/dht/test_directory_healing.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,17 +19,16 @@ Description: """ from glusto.core import Glusto as g - from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on -from glustolibs.gluster.brick_libs import bring_bricks_offline -from glustolibs.gluster.brick_libs import bring_bricks_online -from glustolibs.gluster.brickdir import BrickDir -from glustolibs.gluster.volume_libs import get_subvols -from glustolibs.gluster.dht_test_utils import find_nonhashed_subvol +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, get_all_bricks) +from glustolibs.gluster.brickdir import check_hashrange +from glustolibs.gluster.glusterfile import get_file_stat, calculate_hash -@runs_on([['distributed-replicated', 'distributed', 'distributed-dispersed'], +@runs_on([['distributed', 'distributed-replicated', + 'distributed-dispersed', 'distributed-arbiter'], ['glusterfs', 'nfs']]) class TestDirHeal(GlusterBaseClass): ''' @@ -75,18 +74,25 @@ class TestDirHeal(GlusterBaseClass): g.log.info("mkdir of parent successful") # find non-hashed subvol for child - subvols = (get_subvols(self.mnode, self.volname))['volume_subvols'] - - non_hashed, count = find_nonhashed_subvol(subvols, "parent", "child") - self.assertIsNotNone(non_hashed, "could not find non_hashed subvol") - - g.log.info("non_hashed subvol %s", non_hashed._host) + hashed, non_hashed = [], [] + hash_num = calculate_hash(self.mnode, "child") + bricklist = get_all_bricks(self.mnode, self.volname) + for brick in bricklist: + ret = check_hashrange(brick + "/parent") + hash_range_low = ret[0] + hash_range_high = ret[1] + if hash_range_low <= hash_num <= hash_range_high: + hashed.append(brick) + + non_hashed = [brick for brick in bricklist if brick not in hashed] + g.log.info("Non-hashed bricks are: %s", non_hashed) # bring non_hashed offline - ret = bring_bricks_offline(self.volname, subvols[count]) - self.assertTrue(ret, ('Error in bringing down subvolume %s', - subvols[count])) - g.log.info('target subvol %s is offline', subvols[count]) + for brick in non_hashed: + ret = bring_bricks_offline(self.volname, brick) + self.assertTrue(ret, ('Error in bringing down brick %s', + brick)) + g.log.info('Non-hashed brick %s is offline', brick) # create child directory runc = ("mkdir %s" % target_dir) @@ -95,45 +101,53 @@ class TestDirHeal(GlusterBaseClass): g.log.info('mkdir successful %s', target_dir) # Check that the dir is not created on the down brick - brickpath = ("%s/child" % non_hashed._path) - - ret, _, _ = g.run(non_hashed._host, ("stat %s" % brickpath)) - self.assertEqual(ret, 1, ("Expected %s to be not present on %s" % - (brickpath, non_hashed._host))) - g.log.info("stat of %s failed as expected", brickpath) + for brick in non_hashed: + non_hashed_host, dir_path = brick.split(":") + brickpath = ("%s/parent/child" % dir_path) + ret, _, _ = g.run(non_hashed_host, ("stat %s" % brickpath)) + self.assertEqual(ret, 1, ("Expected %s to be not present on %s" % + (brickpath, non_hashed_host))) + g.log.info("Stat of %s failed as expected", brickpath) # bring up the subvol - ret = bring_bricks_online(self.mnode, self.volname, subvols[count], - bring_bricks_online_methods=None) + ret = bring_bricks_online( + self.mnode, self.volname, non_hashed, + bring_bricks_online_methods='volume_start_force') self.assertTrue(ret, "Error in bringing back subvol online") - g.log.info('Subvol is back online') + g.log.info("Subvol is back online") runc = ("ls %s" % target_dir) ret, _, _ = g.run(self.clients[0], runc) - self.assertEqual(ret, 0, ("lookup on %s failed", target_dir)) - g.log.info("lookup is successful on %s", target_dir) + self.assertEqual(ret, 0, ("Lookup on %s failed", target_dir)) + g.log.info("Lookup is successful on %s", target_dir) # check if the directory is created on non_hashed - absolutedirpath = ("%s/child" % non_hashed._path) + for brick in non_hashed: + non_hashed_host, dir_path = brick.split(":") + absolutedirpath = ("%s/parent/child" % dir_path) + ret = get_file_stat(non_hashed_host, absolutedirpath) + self.assertIsNotNone(ret, "Directory is not present on non_hashed") + g.log.info("Directory is created on non_hashed subvol") # check if directory is healed => i.e. layout is zeroed out - temp = BrickDir(absolutedirpath) - - if temp is None: - self.assertIsNot(temp, None, 'temp is None') - - ret = temp.has_zero_hashrange() - self.assertTrue(ret, ("hash range is not there %s", ret)) - g.log.info("directory healing successful") - - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + for brick in non_hashed: + brick_path = ("%s/parent/child" % brick) + ret = check_hashrange(brick_path) + hash_range_low = ret[0] + hash_range_high = ret[1] + if not hash_range_low and not hash_range_high: + g.log.info("Directory healing successful") + else: + g.log.error("Directory is not healed") + + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass tearDown - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_disable_readdirp_data_loss.py b/tests/functional/dht/test_disable_readdirp_data_loss.py new file mode 100644 index 000000000..47be667cc --- /dev/null +++ b/tests/functional/dht/test_disable_readdirp_data_loss.py @@ -0,0 +1,103 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.glusterdir import get_dir_contents + + +@runs_on([['distributed-dispersed'], ['glusterfs']]) +class TestDisableReaddirpDataLoss(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume(): + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + + def tearDown(self): + + # Unmount volume if mounted + if self.currently_mounted_clients: + if not self.unmount_volume(self.currently_mounted_clients): + raise ExecutionError("Failed to unmount Volume") + + # Cleanup volume + if not self.cleanup_volume(): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _mount_on_a_client(self, mountobj): + """Mount volume on one client and update list""" + ret = self.mount_volume([mountobj]) + self.assertTrue(ret, "Failed to mount volume on client") + self.currently_mounted_clients.append(mountobj) + + def _perfrom_lookups_on_mount_point(self, node, mountpoint): + """Perform lookups on a given mount point""" + ret = get_dir_contents(node, mountpoint) + self.assertEqual(len(ret), 8, + "8 dirs not present on mount point %s on %s" + % (node, mountpoint)) + g.log.info("Lookup successful on node %s and mount point %s", + node, mountpoint) + + def test_disable_readdirp_data_loss(self): + """ + Test case: + 1. Create a 2 x (4+2) disperse volume and start it. + 2. Disable performance.force-readdirp and dht.force-readdirp. + 3. Mount the volume on one client and create 8 directories. + 4. Do a lookup on the mount using the same mount point, + number of directories should be 8. + 5. Mount the volume again on a different client and check + if number of directories is the same or not. + """ + # List to determine if volume is mounted or not + self.currently_mounted_clients = [] + + # Disable performance.force-readdirp and dht.force-readdirp + for option, val in (("performance.force-readdirp", "disable"), + ("dht.force-readdirp", "off")): + ret = set_volume_options(self.mnode, self.volname, {option: val}) + self.assertTrue(ret, "Failed to set volume option %s to %s" + % (option, val)) + g.log.info("Successfully disabled performance.force-readdirp and " + "dht.force-readdirp") + + # Mount the volume on one client and create 8 directories + self._mount_on_a_client(self.mounts[0]) + ret, _, _ = g.run(self.mounts[0].client_system, + "mkdir %s/dir{1..8}" % self.mounts[0].mountpoint) + self.assertFalse(ret, "Failed to create 8 directories on mount point") + g.log.info("Successfully mounted and create 8 dirs on mount point") + + # Do a lookup on the mount using the same mount point, + # number of directories should be 8 + self._perfrom_lookups_on_mount_point(self.mounts[0].client_system, + self.mounts[0].mountpoint) + + # Mount the volume again on a different client and check + # if number of directories is the same or not + self._mount_on_a_client(self.mounts[1]) + self._perfrom_lookups_on_mount_point(self.mounts[1].client_system, + self.mounts[1].mountpoint) diff --git a/tests/functional/dht/test_file_creation.py b/tests/functional/dht/test_file_creation.py new file mode 100644 index 000000000..5671cb84b --- /dev/null +++ b/tests/functional/dht/test_file_creation.py @@ -0,0 +1,494 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterfile import (get_file_stat, get_pathinfo, + file_exists, create_link_file, + get_md5sum, get_fattr) +from glustolibs.gluster.lib_utils import append_string_to_file + + +@runs_on([['distributed', 'distributed-arbiter', + 'distributed-replicated', 'distributed-dispersed'], + ['glusterfs']]) +class TestFileCreation(GlusterBaseClass): + def setUp(self): + + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + self.client, self.m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_file_using_touch(self, file_name): + """Creates a regular empty file""" + cmd = "touch {}/{}".format(self.m_point, file_name) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, "Failed to create file {}".format(file_name)) + g.log.info("Successfully created file %s", file_name) + + def _check_file_stat_on_mountpoint(self, file_name, file_type): + """Check the file-type on mountpoint""" + file_stat = (get_file_stat(self.client, "{}/{}".format( + self.m_point, file_name + )))['filetype'] + self.assertEqual(file_stat, file_type, + "File is not a {}".format(file_type)) + g.log.info("File is %s", file_type) + + def _is_file_present_on_brick(self, file_name): + """Check if file is created on the backend-bricks as per + the value of trusted.glusterfs.pathinfo xattr""" + brick_list = get_pathinfo(self.client, "{}/{}".format( + self.m_point, file_name)) + self.assertNotEqual( + brick_list, 0, "Failed to get bricklist for {}".format(file_name)) + + for brick in brick_list['brickdir_paths']: + host, path = brick.split(':') + ret = file_exists(host, path) + self.assertTrue(ret, "File {} is not present on {}".format( + file_name, brick + )) + g.log.info("File %s is present on %s", file_name, brick) + + def _compare_file_permissions(self, file_name, + file_info_mnt=None, file_info_brick=None): + """Check if the file's permission are same on mountpoint and + backend-bricks""" + if (file_info_mnt is None and file_info_brick is None): + file_info_mnt = (get_file_stat(self.client, "{}/{}".format( + self.m_point, file_name + )))['access'] + self.assertIsNotNone( + file_info_mnt, "Failed to get access time for {}".format( + file_name)) + brick_list = get_pathinfo(self.client, "{}/{}".format( + self.m_point, file_name)) + self.assertNotEqual( + brick_list, 0, "Failed to get bricklist for {}".format( + file_name)) + file_info_brick = [] + for brick in brick_list['brickdir_paths']: + host, path = brick.split(':') + info_brick = (get_file_stat(host, path))['access'] + file_info_brick.append(info_brick) + + for info in file_info_brick: + self.assertEqual(info, file_info_mnt, + "File details for {} are diffrent on" + " backend-brick".format(file_name)) + g.log.info("Details for file %s is correct" + " on backend-bricks", file_name) + + def _check_change_time_mnt(self, file_name): + """Find out the modification time for file on mountpoint""" + file_ctime_mnt = (get_file_stat(self.client, "{}/{}".format( + self.m_point, file_name + )))['epoch_ctime'] + return file_ctime_mnt + + def _check_change_time_brick(self, file_name): + """Find out the modification time for file on backend-bricks""" + brick_list = get_pathinfo(self.client, "{}/{}".format( + self.m_point, file_name)) + self.assertNotEqual(brick_list, 0, + "Failed to get bricklist for {}".format(file_name)) + + brick_mtime = [] + for brick in brick_list['brickdir_paths']: + host, path = brick.split(':') + cmd = "ls -lR {}".format(path) + ret, _, _ = g.run(host, cmd) + self.assertEqual(ret, 0, "Lookup failed on" + " brick:{}".format(path)) + file_ctime_brick = (get_file_stat(host, path))['epoch_ctime'] + brick_mtime.append(file_ctime_brick) + return brick_mtime + + def _compare_file_perm_mnt(self, mtime_before, mtime_after, + file_name): + """Compare the file permissions before and after appending data""" + self.assertNotEqual(mtime_before, mtime_after, "Unexpected:" + "The ctime has not been changed") + g.log.info("The modification time for %s has been" + " changed as expected", file_name) + + def _collect_and_compare_file_info_on_mnt( + self, link_file_name, values, expected=True): + """Collect the files's permissions on mountpoint and compare""" + stat_test_file = get_file_stat( + self.client, "{}/test_file".format(self.m_point)) + self.assertIsNotNone(stat_test_file, "Failed to get stat of test_file") + stat_link_file = get_file_stat( + self.client, "{}/{}".format(self.m_point, link_file_name)) + self.assertIsNotNone(stat_link_file, "Failed to get stat of {}".format( + link_file_name)) + + for key in values: + if expected is True: + self.assertEqual(stat_test_file[key], stat_link_file[key], + "The {} is not same for test_file" + " and {}".format(key, link_file_name)) + g.log.info("The %s for test_file and %s is same on mountpoint", + key, link_file_name) + else: + self.assertNotEqual(stat_test_file[key], stat_link_file[key], + "Unexpected : The {} is same for test_file" + " and {}".format(key, link_file_name)) + g.log.info("The %s for test_file and %s is different" + " on mountpoint", key, link_file_name) + + def _compare_file_md5sum_on_mnt(self, link_file_name): + """Collect and compare the md5sum for file on mountpoint""" + md5sum_test_file, _ = (get_md5sum( + self.client, "{}/test_file".format(self.m_point))).split() + self.assertIsNotNone( + md5sum_test_file, "Failed to get md5sum for test_file") + + md5sum_link_file, _ = get_md5sum( + self.client, "{}/{}".format(self.m_point, link_file_name)).split() + self.assertIsNotNone(md5sum_link_file, "Failed to get" + " md5sum for {}".format(link_file_name)) + self.assertEqual(md5sum_test_file, md5sum_link_file, + "The md5sum for test_file and {} is" + " not same".format(link_file_name)) + g.log.info("The md5sum is same for test_file and %s" + " on mountpoint", link_file_name) + + def _compare_file_md5sum_on_bricks(self, link_file_name): + """Collect and compare md5sum for file on backend-bricks""" + brick_list_test_file = get_pathinfo(self.client, "{}/test_file".format( + self.m_point)) + md5sum_list_test_file = [] + for brick in brick_list_test_file['brickdir_paths']: + host, path = brick.split(':') + md5sum_test_file, _ = (get_md5sum(host, path)).split() + md5sum_list_test_file.append(md5sum_test_file) + + brick_list_link_file = get_pathinfo(self.client, "{}/{}".format( + self.m_point, link_file_name)) + md5sum_list_link_file = [] + for brick in brick_list_link_file['brickdir_paths']: + md5sum_link_file, _ = (get_md5sum(host, path)).split() + md5sum_list_link_file.append(md5sum_link_file) + + self.assertEqual(md5sum_test_file, md5sum_link_file, + "The md5sum for test_file and {} is" + " not same on brick {}".format(link_file_name, brick)) + g.log.info("The md5sum for test_file and %s is same" + " on backend brick %s", link_file_name, brick) + + def _compare_gfid_xattr_on_files(self, link_file_name, expected=True): + """Collect and compare the value of trusted.gfid xattr for file + on backend-bricks""" + brick_list_test_file = get_pathinfo(self.client, "{}/test_file".format( + self.m_point)) + xattr_list_test_file = [] + for brick in brick_list_test_file['brickdir_paths']: + host, path = brick.split(':') + xattr_test_file = get_fattr(host, path, "trusted.gfid") + xattr_list_test_file.append(xattr_test_file) + + brick_list_link_file = get_pathinfo(self.client, "{}/{}".format( + self.m_point, link_file_name)) + xattr_list_link_file = [] + for brick in brick_list_link_file['brickdir_paths']: + host, path = brick.split(':') + xattr_link_file = get_fattr(host, path, "trusted.gfid") + xattr_list_link_file.append(xattr_link_file) + + if expected is True: + self.assertEqual(xattr_list_test_file, xattr_list_link_file, + "Unexpected: The xattr trusted.gfid is not same " + "for test_file and {}".format(link_file_name)) + g.log.info("The xattr trusted.gfid is same for test_file" + " and %s", link_file_name) + else: + self.assertNotEqual(xattr_list_test_file, xattr_list_link_file, + "Unexpected: The xattr trusted.gfid is same " + "for test_file and {}".format(link_file_name)) + g.log.info("The xattr trusted.gfid is not same for test_file" + " and %s", link_file_name) + + def test_special_file_creation(self): + """ + Description : check creation of different types of files. + + Steps: + 1) From mount point, Create a regular file + eg: + touch f1 + - From mount point, create character, block device and pipe files + mknod c + mknod b + mkfifo + 2) Stat on the files created in Step-2 from mount point + 3) Verify that file is stored on only one bricks which is mentioned in + trusted.glusterfs.pathinfo xattr + On mount point - + " getfattr -n trusted.glusterfs.pathinfo + On all bricks + " ls / " + 4) Verify that file permissions are same on mount point and sub-volumes + " stat " + 5) Append some data to the file. + 6) List content of file to verify that data has been appended. + " cat " + 7) Verify that file change time and size has been updated + accordingly(from mount point and sub-volume) + " stat / " + """ + # pylint: disable=too-many-statements + # pylint: disable=too-many-locals + # Create a regular file + self._create_file_using_touch("regfile") + + # Create a character and block file + for (file_name, parameter) in [ + ("blockfile", "b"), ("charfile", "c")]: + cmd = "mknod {}/{} {} 1 5".format(self.m_point, file_name, + parameter) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual( + ret, 0, "Failed to create {} file".format(file_name)) + g.log.info("%s file created successfully", file_name) + + # Create a pipe file + cmd = "mkfifo {}/pipefile".format(self.m_point) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, "Failed to create pipe file") + g.log.info("Pipe file is created successfully") + + # Stat all the files created on mount-point + for (file_name, check_string) in [ + ("regfile", "regular empty file"), + ("charfile", "character special file"), + ("blockfile", "block special file"), + ("pipefile", "fifo")]: + self._check_file_stat_on_mountpoint(file_name, check_string) + + # Verify files are stored on backend bricks as per + # the trusted.glusterfs.pathinfo + file_types = ["regfile", "charfile", "blockfile", "pipefile"] + + for file_name in file_types: + self._is_file_present_on_brick(file_name) + + # Verify that the file permissions are same on + # mount-point and bricks + for file_name in file_types: + self._compare_file_permissions(file_name) + + # Note the modification time on mount and bricks + # for all files. Also it should be same on mnt and bricks + reg_mnt_ctime_1 = self._check_change_time_mnt("regfile") + char_mnt_ctime_1 = self._check_change_time_mnt("charfile") + block_mnt_ctime_1 = self._check_change_time_mnt("blockfile") + fifo_mnt_ctime_1 = self._check_change_time_mnt("pipefile") + + reg_brick_ctime_1 = self._check_change_time_brick("regfile") + char_brick_ctime_1 = self._check_change_time_brick("charfile") + block_brick_ctime_1 = self._check_change_time_brick("blockfile") + fifo_brick_ctime_1 = self._check_change_time_brick("pipefile") + + for (file_name, mnt_ctime, brick_ctime) in [ + ("regfile", reg_mnt_ctime_1, reg_brick_ctime_1), + ("charfile", char_mnt_ctime_1, char_brick_ctime_1), + ("blockfile", block_mnt_ctime_1, block_brick_ctime_1), + ("pipefile", fifo_mnt_ctime_1, fifo_brick_ctime_1)]: + self._compare_file_permissions( + file_name, mnt_ctime, brick_ctime) + + # Append some data to the files + for (file_name, data_str) in [ + ("regfile", "regular"), + ("charfile", "character special"), + ("blockfile", "block special")]: + ret = append_string_to_file( + self.client, "{}/{}".format(self.m_point, file_name), + "Welcome! This is a {} file".format(data_str)) + self.assertTrue( + ret, "Failed to append data to {}".format(file_name)) + g.log.info( + "Successfully appended data to %s", file_name) + + # Check if the data has been appended + check = "Welcome! This is a regular file" + cmd = "cat {}/{}".format(self.m_point, "regfile") + ret, out, _ = g.run(self.client, cmd) + self.assertEqual(out.strip(), check, "No data present at regfile") + + # Append data to pipefile and check if it has been appended + g.run_async(self.client, "echo 'Hello' > {}/{} ".format( + self.m_point, "pipefile")) + ret, out, _ = g.run( + self.client, "cat < {}/{}".format(self.m_point, "pipefile")) + self.assertEqual( + ret, 0, "Unable to fetch datat on other terimnal") + self.assertEqual( + "Hello", out.split('\n')[0], + "Hello not recieved on the second terimnal") + + # Lookup on mount-point + cmd = "ls -lR {}".format(self.m_point) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, "Lookup on mountpoint failed") + + # Collect ctime on mount point after appending data + reg_mnt_ctime_2 = self._check_change_time_mnt("regfile") + + # After appending data the ctime for file should change + self.assertNotEqual(reg_mnt_ctime_1, reg_mnt_ctime_2, "Unexpected:" + "The ctime has not been changed") + g.log.info("The modification time for regfile has been" + " changed as expected") + + # Collect the ctime on bricks + reg_brick_ctime_2 = self._check_change_time_brick("regfile") + + # Check if the ctime has changed on bricks as per mount + self._compare_file_permissions( + "regfile", reg_mnt_ctime_2, reg_brick_ctime_2) + + def test_hard_link_file(self): + """ + Description: link file create, validate and access file + using it + + Steps: + 1) From mount point, create a regular file + 2) Verify that file is stored on only on bricks which is + mentioned in trusted.glusterfs.pathinfo xattr + 3) From mount point create hard-link file for the created file + 4) From mount point stat on the hard-link file and original file; + file inode, permission, size should be same + 5) From mount point, verify that file contents are same + "md5sum" + 6) Verify "trusted.gfid" extended attribute of the file + on sub-vol + 7) From sub-volume stat on the hard-link file and original file; + file inode, permission, size should be same + 8) From sub-volume verify that content of file are same + """ + # Create a regular file + self._create_file_using_touch("test_file") + + # Check file is create on bricks as per trusted.glusterfs.pathinfo + self._is_file_present_on_brick("test_file") + + # Create a hard-link file for the test_file + ret = create_link_file( + self.client, "{}/test_file".format(self.m_point), + "{}/hardlink_file".format(self.m_point)) + self.assertTrue(ret, "Failed to create hard link file for" + " test_file") + g.log.info("Successfully created hardlink_file") + + # On mountpoint perform stat on original and hard-link file + values = ["inode", "access", "size"] + self._collect_and_compare_file_info_on_mnt( + "hardlink_file", values, expected=True) + + # Check the md5sum on original and hard-link file on mountpoint + self._compare_file_md5sum_on_mnt("hardlink_file") + + # Compare the value of trusted.gfid for test_file and hard-link file + # on backend-bricks + self._compare_gfid_xattr_on_files("hardlink_file") + + # On backend bricks perform stat on original and hard-link file + values = ["inode", "access", "size"] + self._collect_and_compare_file_info_on_mnt("hardlink_file", values) + + # On backend bricks check the md5sum + self._compare_file_md5sum_on_bricks("hardlink_file") + + def test_symlink_file(self): + """ + Description: Create symbolic link file, validate and access file + using it + + Steps: + 1) From mount point, create a regular file + 2) Verify that file is stored on only on bricks which is + mentioned in trusted.glusterfs.pathinfo xattr + 3) From mount point create symbolic link file for the created file + 4) From mount point stat on the symbolic link file and original file; + file inode should be different + 5) From mount point, verify that file contents are same + "md5sum" + 6) Verify "trusted.gfid" extended attribute of the file + on sub-vol + 7) Verify readlink on symbolic link from mount point + "readlink " + 8) From sub-volume verify that content of file are same + """ + # Create a regular file on mountpoint + self._create_file_using_touch("test_file") + + # Check file is create on bricks as per trusted.glusterfs.pathinfo + self._is_file_present_on_brick("test_file") + + # Create a symbolic-link file for the test_file + ret = create_link_file( + self.client, "{}/test_file".format(self.m_point), + "{}/softlink_file".format(self.m_point), soft=True) + self.assertTrue(ret, "Failed to create symbolic link file for" + " test_file") + g.log.info("Successfully created softlink_file") + + # On mountpoint perform stat on original and symbolic-link file + # The value of inode should be different + values = ["inode"] + self._collect_and_compare_file_info_on_mnt( + "softlink_file", values, expected=False) + + # Check the md5sum on original and symbolic-link file on mountpoint + self._compare_file_md5sum_on_mnt("softlink_file") + + # Compare the value of trusted.gfid for test_file and + # symbolic-link file on backend-bricks + self._compare_gfid_xattr_on_files("softlink_file") + + # Verify readlink on symbolic-link from mount point + cmd = "readlink {}/softlink_file".format(self.m_point) + ret, out, _ = g.run(self.client, cmd) + self.assertEqual( + out.strip(), "{}/test_file".format(self.m_point), + "Symbolic link points to incorrect file") + g.log.info("Symbolic link points to correct file") + + # Check the md5sum on original and symbolic-link file on backend bricks + self._compare_file_md5sum_on_bricks("softlink_file") diff --git a/tests/functional/dht/test_file_rename_when_destination_file_doesnot_exist.py b/tests/functional/dht/test_file_rename_when_destination_file_doesnot_exist.py new file mode 100644 index 000000000..7f98fbede --- /dev/null +++ b/tests/functional/dht/test_file_rename_when_destination_file_doesnot_exist.py @@ -0,0 +1,450 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + create_brickobjectlist, + find_new_hashed, + find_specific_hashed) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.glusterfile import move_file, is_linkto_file + + +@runs_on([['distributed', 'distributed-replicated', + 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class DhtFileRenameVerification(GlusterBaseClass): + + def setUp(self): + """ + Setup Volume and Mount Volume + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Change the dist count to 4 in case of 'distributed-replicated' , + # 'distributed-dispersed' and 'distributed-arbiter' + if self.volume_type in ("distributed-replicated", + "distributed-dispersed", + "distributed-arbiter"): + self.volume['voltype']['dist_count'] = 4 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + mount_obj = self.mounts[0] + self.mount_point = mount_obj.mountpoint + + self.subvols = (get_subvols( + self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(self.subvols, "failed to get subvols") + + def tearDown(self): + """ + Unmount Volume and Cleanup Volume + """ + # Unmount Volume and Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Unmount Volume and Cleanup Volume: Fail") + g.log.info("Unmount Volume and Cleanup Volume: Success") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_file_and_get_hashed_subvol(self, file_name): + """ Creates a file and return its hashed subvol + + Args: + file_name(str): name of the file to be created + Returns: + hashed_subvol object: An object of type BrickDir type + representing the hashed subvolume + + subvol_count: The subvol index in the subvol list + + source_file: Path to the file created + + """ + # pylint: disable=unsubscriptable-object + + # Create Source File + source_file = "{}/{}".format(self.mount_point, file_name) + ret, _, err = g.run(self.clients[0], ("touch %s" % source_file)) + self.assertEqual(ret, 0, ("Failed to create {} : err {}" + .format(source_file, err))) + g.log.info("Successfully created the source file") + + # Find the hashed subvol for source file + source_hashed_subvol, count = find_hashed_subvol(self.subvols, + "/", + file_name) + self.assertIsNotNone(source_hashed_subvol, ("Couldn't find hashed " + "subvol for the {}" + .format(source_file))) + return source_hashed_subvol, count, source_file + + @staticmethod + def _verify_link_file_exists(brickdir, file_name): + """ Verifies whether a file link is present in given subvol + Args: + brickdir(Class Object): BrickDir object containing data about + bricks under a specific subvol + Returns: + True/False(bool): Based on existance of file link + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + file_path = brickdir._fqpath + file_name + file_stat = get_file_stat(brickdir._host, file_path) + if file_stat is None: + g.log.error("Failed to get File stat for %s", file_path) + return False + if not file_stat['access'] == "1000": + g.log.error("Access value not 1000 for %s", file_path) + return False + + # Check for file type to be'sticky empty', have size of 0 and + # have the glusterfs.dht.linkto xattr set. + ret = is_linkto_file(brickdir._host, file_path) + if not ret: + g.log.error("%s is not a linkto file", file_path) + return False + return True + + def test_file_rename_when_source_and_dest_hash_diff_subvol(self): + """ + case 1 : + - Destination file does not exist + - Source file is stored on hashed subvolume(s1) it self + - Destination file should be hashed to some other subvolume(s2) + mv <source_file> <destination_file> + - Source file should be renamed to to Destination file. + - Destination link file should be created on its hashed + subvolume(s2) + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Create soruce file and Get hashed subvol (s2) + _, count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file such that the new name hashes to a new subvol (S1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could'nt find new hashed for {}" + .format(source_file))) + src_link_subvol = new_hashed.hashedbrickobject + + # Verify the subvols are not same for source and destination files + self.assertNotEqual(count, + new_hashed.subvol_count, + "The subvols for src and dest are same.") + + # Rename the source file to the destination file + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, ("Failed to move files {} and {}" + .format(source_file, dest_file))) + + # Verify the link file is found in new subvol + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + g.log.info("New hashed volume has the expected linkto file") + + def test_file_rename_when_source_and_dest_hash_same_subvol(self): + """ + Case 2: + - Destination file does not exist + - Source file is stored on hashed subvolume(s1) it self + - Destination file should be hashed to same subvolume(s1) + mv <source_file> <destination_file> + - Source file should be renamed to destination file + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Create soruce file and Get hashed subvol (s1) + source_hashed_subvol, count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file such that the new name hashes to a new subvol + new_hashed = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + self.assertIsNotNone(new_hashed, + "could not find new hashed for destination file") + + # Rename the source file to the destination file + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}".format( + source_file, dest_file)) + + _, rename_count = find_hashed_subvol(self.subvols, + "/", + str(new_hashed.newname)) + self.assertEqual(count, rename_count, + "The hashed subvols for src and dest are not same.") + + def test_file_rename_when_dest_not_hash_to_src_or_src_link_subvol(self): + """ + Case 3: + - Destination file does not exist + - Source link file is stored on hashed sub volume(s1) and Source + file is stored on another subvolume(s2) + - Destination file should be hashed to some other subvolume(s3) + (should not be same subvolumes mentioned in above condition) + mv <source_file> <destination_file> + - Source file should be ranamed to destination file + - source link file should be removed. + - Destination link file should be created on its hashed + subvolume(s3) + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + # pylint: disable=unsubscriptable-object + + # Find a non hashed subvolume(or brick) + # Create soruce file and Get hashed subvol (s2) + _, count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file to create link in hashed subvol -(s1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, + "could not find new hashed for dstfile") + count2 = new_hashed.subvol_count + # Rename the source file to the new file name + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # find a subvol (s3) other than S1 and S2 + brickobject = create_brickobjectlist(self.subvols, "/") + self.assertIsNotNone(brickobject, "Failed to get brick object list") + br_count = -1 + subvol_new = None + for brickdir in brickobject: + br_count += 1 + if br_count not in (count, count2): + subvol_new = brickdir + break + + new_hashed2 = find_specific_hashed(self.subvols, + "/", + subvol_new) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Rename the source file to the destination file + source_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + dest_file = "{}/{}".format(self.mount_point, str(new_hashed2.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + hashed_subvol_after_rename, rename_count = ( + find_hashed_subvol(self.subvols, + "/", + str(new_hashed2.newname))) + self.assertNotEqual(count2, rename_count, + "The subvols for src and dest are same.") + + # check that the source link file is removed. + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The New hashed volume {} still have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + g.log.info("The source link file is removed") + + # Check Destination link file is created on its hashed sub-volume(s3) + ret = self._verify_link_file_exists(hashed_subvol_after_rename, + str(new_hashed2.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(hashed_subvol_after_rename._fqpath, + str(new_hashed2.newname)))) + g.log.info("Destinaion link is created in desired subvol") + + def test_file_rename_when_src_file_and_dest_file_hash_same_subvol(self): + """ + Case 4: + - Destination file does not exist + - Source link file is stored on hashed sub volume(s1) and Source + file is stored on another subvolume(s2) + - Destination file should be hashed to same subvolume(s2) + mv <source_file> <destination_file> + - Source file should be ranamed to destination file + - source link file should be removed. + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Get hashed subvol (S2) + source_hashed_subvol, count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file to create link in hashed subvol -(s1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could not find new hashed for {}" + .format(source_file))) + + # Rename the source file to the new file name + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Get a file name to hash to the subvol s2 + new_hashed2 = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + self.assertIsNotNone(new_hashed2, "Could not find a name hashed" + "to the given subvol") + + _, rename_count = ( + find_hashed_subvol(self.subvols, "/", str(new_hashed2.newname))) + self.assertEqual(count, rename_count, + "The subvols for src and dest are not same.") + + # Move the source file to the new file name + source_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + dest_file = "{}/{}".format(self.mount_point, str(new_hashed2.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # check that the source link file is removed. + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The New hashed volume {} still have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + g.log.info("The source link file is removed") + + def test_file_rename_when_src_link_and_dest_file_hash_same_subvol(self): + """ + Case 5: + - Destination file does not exist + - Source link file is stored on hashed sub volume(s1) and Source + file is stored on another subvolume(s2) + - Destination file should be hashed to same subvolume(s1) + mv <source_file> <destination_file> + - Source file should be renamed to destination file + - Source link file should be removed. + - Destination link file should be created on its + hashed subvolume(s1) + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + + # Get hashed subvol s2) + _, count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file to create link in another subvol - (s1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could not find new hashed subvol " + "for {}".format(source_file))) + + self.assertNotEqual(count, + new_hashed.subvol_count, + "New file should hash to different sub-volume") + + # Rename the source file to the new file name + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, ("Failed to move file {} and {}" + .format(source_file, dest_file))) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Get a file name to hash to the subvol s1 + new_hashed2 = find_specific_hashed(self.subvols, + "/", + src_link_subvol, + new_hashed.newname) + self.assertIsNotNone(new_hashed2, ("Couldn't find a name hashed to the" + " given subvol {}" + .format(src_link_subvol))) + + _, rename_count = ( + find_hashed_subvol(self.subvols, "/", str(new_hashed2.newname))) + self.assertEqual(new_hashed.subvol_count, rename_count, + "The subvols for src and dest are not same.") + + # Move the source file to the new file name + source_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + dest_file = "{}/{}".format(self.mount_point, str(new_hashed2.newname)) + ret = move_file(self.clients[0], source_file, dest_file) + self.assertTrue(ret, "Failed to move file") + + # check that the source link file is removed. + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The hashed volume {} still have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + g.log.info("The source link file is removed") + + # Check Destination link file is created on its hashed sub-volume(s1) + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("The New hashed volume {} doesn't have the " + "expected linkto file {}" + .format(src_link_subvol._fqpath, + str(new_hashed2.newname)))) + g.log.info("Destinaion link is created in desired subvol") diff --git a/tests/functional/dht/test_file_rename_when_destination_file_stored_on_source_file_hashed_subvol.py b/tests/functional/dht/test_file_rename_when_destination_file_stored_on_source_file_hashed_subvol.py new file mode 100644 index 000000000..9efe2a891 --- /dev/null +++ b/tests/functional/dht/test_file_rename_when_destination_file_stored_on_source_file_hashed_subvol.py @@ -0,0 +1,639 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import re +from glusto.core import Glusto as g +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + find_new_hashed, + find_specific_hashed) +from glustolibs.gluster.volume_libs import get_subvols, parse_vol_file +from glustolibs.gluster.glusterfile import (move_file, + is_linkto_file, + get_dht_linkto_xattr) + + +@runs_on([['distributed-replicated', 'distributed', + 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class DhtFileRenameWithDestFileHashed(GlusterBaseClass): + + def setUp(self): + """ + Setup Volume and Mount Volume + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Change the dist count to 4 in case of 'distributed-replicated' , + # 'distributed-dispersed' and 'distributed-arbiter' + if self.volume_type in ("distributed-replicated", + "distributed-dispersed", + "distributed-arbiter"): + self.volume['voltype']['dist_count'] = 4 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + self.mount_point = self.mounts[0].mountpoint + + self.subvols = (get_subvols( + self.mnode, self.volname))['volume_subvols'] + self.assertIsNotNone(self.subvols, "failed to get subvols") + + def tearDown(self): + """ + Unmount Volume and Cleanup Volume + """ + # Unmount Volume and Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Unmount Volume and Cleanup Volume: Fail") + g.log.info("Unmount Volume and Cleanup Volume: Success") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _create_file_and_get_hashed_subvol(self, file_name): + """ Creates a file and return its hashed subvol + + Args: + file_name(str): name of the file to be created + Returns: + hashed_subvol object: An object of type BrickDir type + representing the hashed subvolume + + subvol_count: The subvol index in the subvol list + + source_file: Path to the file created + + """ + # pylint: disable=unsubscriptable-object + + # Create Source File + source_file = "{}/{}".format(self.mount_point, file_name) + ret, _, err = g.run(self.mounts[0].client_system, + ("touch %s" % source_file)) + self.assertEqual(ret, 0, + ("Failed to create %s : err %s", source_file, err)) + g.log.info("Successfully created the source file") + + # Find the hashed subvol for source file + source_hashed_subvol, count = find_hashed_subvol(self.subvols, + "/", + file_name) + self.assertIsNotNone(source_hashed_subvol, + "Couldn't find hashed subvol for the source file") + return source_hashed_subvol, count, source_file + + @staticmethod + def _verify_link_file_exists(brickdir, file_name): + """ Verifies whether a file link is present in given subvol + Args: + brickdir(Class Object): BrickDir object containing data about + bricks under a specific subvol + Returns: + (bool): Based on existance of file link + """ + # pylint: disable=protected-access + # pylint: disable=unsubscriptable-object + file_path = brickdir._fqpath + file_name + file_stat = get_file_stat(brickdir._host, file_path) + if file_stat is None: + g.log.error("Failed to get File stat for %s", file_path) + return False + if not file_stat['access'] == "1000": + g.log.error("Access value not 1000 for %s", file_path) + return False + + # Check for file type to be'sticky empty', have size of 0 and + # have the glusterfs.dht.linkto xattr set. + ret = is_linkto_file(brickdir._host, file_path) + if not ret: + g.log.error("%s is not a linkto file", file_path) + return False + return True + + @staticmethod + def _verify_file_exists(brick_dir, file_name): + """ Verifies whether a file is present in given subvol or not + Args: + brickdir(Class Object): BrickDir object containing data about + bricks under a specific subvol + file_name(str): Name of the file to be searched + Returns: + (bool): Based on existance of file + """ + # pylint: disable=protected-access + + cmd = "[ -f {} ]".format(brick_dir._fqpath + + (str(file_name))) + ret, _, _ = g.run(brick_dir._host, cmd) + if ret: + return False + return True + + @staticmethod + def _get_remote_subvolume(vol_file_data, brick_name): + """ Verifies whether a file is present in given subvol or not + Args: + vol_file_data(dict): Dictionary containing data of .vol file + brick_name(str): Brick path + Returns: + (str): Remote subvol name + (None): If error occurred + """ + try: + brick_name = re.search(r'[a-z0-9\-\_]*', brick_name).group() + remote_subvol = (vol_file_data[ + brick_name]['option']['remote-subvolume']) + except KeyError: + return None + return remote_subvol + + def _verify_file_links_to_specified_destination(self, host, file_path, + dest_file): + """ Verifies whether a file link points to the specified destination + Args: + host(str): Host at which commands are to be executed + file_path(str): path to the link file + dest_file(str): path to the dest file to be pointed at + Returns: + (bool) : Based on whether the given file points to dest or not + """ + link_to_xattr = get_dht_linkto_xattr(host, file_path) + # Remove unexpected chars in the value, if any + link_to_xattr = re.search(r'[a-z0-9\-\_]*', link_to_xattr).group() + if link_to_xattr is None: + g.log.error("Failed to get trusted.glusterfs.dht.linkto") + return False + + # Get the remote-subvolume for the corresponding linkto xattr + path = ("/var/lib/glusterd/vols/{}/{}.tcp-fuse.vol" + .format(self.volname, self.volname)) + vol_data = parse_vol_file(self.mnode, path) + if not vol_data: + g.log.error("Failed to parse the file %s", path) + return False + + remote_subvol = self._get_remote_subvolume(vol_data, link_to_xattr) + if remote_subvol is None: + # In case, failed to find the remote subvol, get all the + # subvolumes and then check whether the file is present in + # any of those sunbol + subvolumes = vol_data[link_to_xattr]['subvolumes'] + for subvol in subvolumes: + remote_subvol = self._get_remote_subvolume(vol_data, + subvol) + if remote_subvol: + subvol = re.search(r'[a-z0-9\-\_]*', subvol).group() + remote_host = ( + vol_data[subvol]['option']['remote-host']) + # Verify the new file is in the remote-subvol identified + cmd = "[ -f {}/{} ]".format(remote_subvol, dest_file) + ret, _, _ = g.run(remote_host, cmd) + if ret == 0: + return True + g.log.error("The given link file doesn't point to any of " + "the subvolumes") + return False + else: + remote_host = vol_data[link_to_xattr]['option']['remote-host'] + # Verify the new file is in the remote-subvol identified + cmd = "[ -f {}/{} ]".format(remote_subvol, dest_file) + ret, _, _ = g.run(remote_host, cmd) + if ret == 0: + return True + return False + + def test_file_rename_when_source_and_dest_hash_diff_subvol(self): + """ + - Destination file should exist + - Source file is stored on hashed sub volume(s1) and cached on + another subvolume(s2) + - Destination file should be hased to subvolume where source file is + stored(s2) + - Destination file should hased subvolume(s2) but cached same + subvolume(s1) where source file is hashed + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be removed + - source link file should be removed + """ + # pylint: disable=protected-access + + # Create soruce file and Get hashed subvol (s2) + source_hashed_subvol, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file such that the new name hashes to a new subvol (S1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could'nt find new hashed for {}" + .format(source_file))) + + # Verify the subvols are not same for source and destination files + self.assertNotEqual(src_count, + new_hashed.subvol_count, + "The subvols for src and dest are same.") + + # Rename/Move the file + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}".format( + source_file, dest_file)) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Get a file name that stores to S1 for destination + new_hashed2 = find_specific_hashed(self.subvols, + "/", + src_link_subvol, + new_hashed.newname) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Create destination file in subvol S1 + dest_hashed_subvol, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed2.newname))) + + # Verify the subvol is S1 itself + self.assertEqual(new_hashed.subvol_count, dest_count, + "The destination file is not stored to desired " + "subvol :{}, instead to subvol : {}" + .format(new_hashed2.subvol_count, dest_count)) + + # Create a linkfile to dest by renaming it to hash to S2 + dest_hashed = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + # Verify the subvol is S2 + self.assertEqual(dest_hashed.subvol_count, src_count, + "The destination file is not stored to desired " + "subvol :{}, instead to subvol : {}" + .format(dest_hashed.subvol_count, src_count)) + + # Rename the source file to the new file name + dest_file_2 = "{}/{}".format(self.mount_point, + str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, dest_file, dest_file_2) + self.assertTrue(ret, "Failed to move files {} and {}".format( + source_file, dest_file_2)) + + # Verify the Dest link file is stored on sub volume(s2) + ret = self._verify_link_file_exists(source_hashed_subvol, + str(dest_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(source_hashed_subvol._fqpath, + str(dest_hashed.newname)))) + + # Rename source to destination + src = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + dest_file = "{}/{}".format(self.mount_point, + str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, src, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}" + .format(src, dest_file)) + + # Verify destination file is removed + ret = self._verify_file_exists(dest_hashed_subvol, + str(new_hashed2.newname)) + self.assertFalse(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed2.newname), + dest_hashed_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + # Verify the source link is removed + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The hashed subvol {} still have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + g.log.info("The source link file is removed as expected") + + def test_file_rename_when_source_and_dest_hash_same_subvol(self): + """ + - Destination file should exist + - Source file is hashed sub volume(s1) and cached on another + subvolume(s2) + - Destination file should be hased to same subvolume(s1) where + source file is hased + - Destination hashed on subvolume(s1) but should be cached on + subvolume(s2) where source file is stored + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be there on hashed subvolume and + should link to new destination file + - source link file should be removed + """ + # pylint: disable=protected-access + # pylint: disable=too-many-locals + + # Create soruce file and Get hashed subvol (s2) + source_hashed_subvol, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Rename the file such that the new name hashes to a new subvol (S1) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could'nt find new hashed for {}" + .format(source_file))) + + # Verify the subvols are not same for source and destination files + self.assertNotEqual(src_count, + new_hashed.subvol_count, + "The subvols for src and dest are same.") + + # Rename/Move the file + dest_file = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + ret = move_file(self.mounts[0].client_system, source_file, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}".format( + source_file, dest_file)) + + # Verify the Source link file is stored on hashed sub volume(s1) + src_link_subvol = new_hashed.hashedbrickobject + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + # Get a file name that stores to S2 for destination + new_hashed2 = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Create destination file in subvol S2 + dest_hashed_subvol, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed2.newname))) + + # Verify the subvol is S2 itself + self.assertEqual(dest_count, src_count, + "The destination file is not stored to desired " + "subvol :{}" + .format(dest_count)) + + # Create a linkfile to dest by renaming it to hash to S1 + dest_hashed = find_specific_hashed(self.subvols, + "/", + src_link_subvol, + new_hashed.newname) + # Verify the subvol is S1 + self.assertEqual(dest_hashed.subvol_count, new_hashed.subvol_count, + "The destination file is not stored to desired " + "subvol :{}, instead to subvol : {}" + .format(dest_hashed.subvol_count, new_hashed)) + + # Rename the dest file to the new file name + dest_file_2 = "{}/{}".format(self.mount_point, + str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, dest_file, dest_file_2) + self.assertTrue(ret, "Failed to move files {} and {}".format( + source_file, dest_file_2)) + + # Rename source to destination + src = "{}/{}".format(self.mount_point, str(new_hashed.newname)) + dest_file = "{}/{}".format(self.mount_point, + str(dest_hashed.newname)) + ret = move_file(self.mounts[0].client_system, src, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}" + .format(src, dest_file)) + + # Verify destination file is removed + ret = self._verify_file_exists(dest_hashed_subvol, + str(new_hashed2.newname)) + self.assertFalse(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed2.newname), + dest_hashed_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + # Verify the source link is removed + ret = self._verify_link_file_exists(src_link_subvol, + str(new_hashed.newname)) + self.assertFalse(ret, ("The hashed subvol {} still have the " + "expected linkto file: {}" + .format(src_link_subvol._fqpath, + str(new_hashed.newname)))) + + g.log.info("The source link file is removed as expected") + + # Verify the Destination link is on hashed subvolume + ret = self._verify_link_file_exists(src_link_subvol, + str(dest_hashed.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(dest_hashed_subvol._fqpath, + str(dest_hashed.newname)))) + + # Verify the dest link file points to new destination file + file_path = src_link_subvol._fqpath + str(dest_hashed.newname) + ret = (self._verify_file_links_to_specified_destination( + src_link_subvol._host, file_path, str(dest_hashed.newname))) + self.assertTrue(ret, "The dest link file not pointing towards " + "the desired file") + g.log.info("The Destination link file is pointing to new file" + " as expected") + + def test_file_rename_when_dest_hash_to_src_subvol(self): + """ + - Destination file should exist + - Source file is stored on hashed subvolume it self + - Destination file should be hased to same subvolume(s1) + where source file is + - Destination file hased subvolume(s1) but cached onsubvolume(s2) + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be removed + """ + # pylint: disable=protected-access + + # Create soruce file and Get hashed subvol (s1) + source_hashed_subvol, src_count, source_file = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find a file name that hashes to another subvol (s2) + new_hashed = find_new_hashed(self.subvols, "/", "test_source_file") + self.assertIsNotNone(new_hashed, ("could'nt find new hashed for {}" + .format(source_file))) + + # Create destination file in subvol S2 + _, dest_count, dest_file = ( + self._create_file_and_get_hashed_subvol(str(new_hashed.newname))) + + # Rename dest file such that it hashes to S1 + new_hashed2 = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + self.assertIsNotNone(new_hashed2, + "could not find new hashed for dstfile") + + # Verify the subvol is S1 itself + self.assertEqual(new_hashed2.subvol_count, src_count, + "The destination file is not stored to desired " + "subvol :{}".format(dest_count)) + + # Rename/Move the file + dest_file2 = "{}/{}".format(self.mount_point, str(new_hashed2.newname)) + ret = move_file(self.mounts[0].client_system, dest_file, dest_file2) + self.assertTrue(ret, "Failed to move files {} and {}" + .format(source_file, dest_file)) + + # Verify the Dest link file is stored on hashed sub volume(s1) + dest_link_subvol = new_hashed2.hashedbrickobject + ret = self._verify_link_file_exists(dest_link_subvol, + str(new_hashed2.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(dest_link_subvol._fqpath, + str(new_hashed2.newname)))) + + # Rename Source to Dest + src = "{}/{}".format(self.mount_point, "test_source_file") + dest_file = "{}/{}".format(self.mount_point, str(new_hashed2.newname)) + ret = move_file(self.mounts[0].client_system, src, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}" + .format(src, dest_file)) + + # Verify destination file is removed + ret = self._verify_file_exists(new_hashed.hashedbrickobject, + str(new_hashed.newname)) + self.assertFalse(ret, ("Destination file : {} is not removed in subvol" + " : {}".format(str(new_hashed.newname), + new_hashed.hashedbrickobject + ._fqpath))) + g.log.info("The destination file is removed as expected") + + # Verify the Destination link is removed + ret = self._verify_link_file_exists(new_hashed2.hashedbrickobject, + str(new_hashed2.newname)) + self.assertFalse(ret, ("The hashed subvol {} still have the " + "expected linkto file: {}" + .format(new_hashed2.hashedbrickobject._fqpath, + str(new_hashed2.newname)))) + + g.log.info("The Destination link file is removed as expected") + + def test_file_rename_when_dest_cache_to_src_subvol(self): + """ + - Destination file should exist + - Source file is stored on hashed subvolume it self + - Destination file should be hased to some other subvolume(s2) + - Destination file hashed on subvolume(s2) but cached on the + subvolume(s1) where souce file is present + mv <source_file> <destination_file> + - Destination file is removed. + - Source file should be renamed as destination file + - Destination link file should be there on hashed subvolume and + should link to new destination file + """ + # pylint: disable=protected-access + + # Create soruce file and Get hashed subvol (s1) + source_hashed_subvol, src_count, _ = ( + self._create_file_and_get_hashed_subvol("test_source_file")) + + # Find name for dest file to cache to S1 + dest_subvol = find_specific_hashed(self.subvols, + "/", + source_hashed_subvol) + dest_name = str(dest_subvol.newname) + + # Create destination file in subvol S1 + _, dest_count, _ = self._create_file_and_get_hashed_subvol(dest_name) + + # Verify its subvol (s1) + self.assertEqual(src_count, dest_count, + ("The newly created file falls under subvol {} " + "rather than {}".format(dest_count, src_count))) + + # Rename dest file such that it hashes to some other subvol S2 + dest_hashed_subvol = find_new_hashed(self.subvols, + "/", + dest_name) + self.assertIsNotNone(dest_hashed_subvol, + "could not find new hashed for dstfile") + + # Rename/Move the file + dest_file = "{}/{}".format(self.mount_point, + dest_hashed_subvol.newname) + src_file = "{}/{}".format(self.mount_point, dest_name) + ret = move_file(self.mounts[0].client_system, src_file, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}" + .format(src_file, dest_file)) + + # Verify the Dest link file is stored on hashed sub volume(s2) + dest_link_subvol = dest_hashed_subvol.hashedbrickobject + ret = self._verify_link_file_exists(dest_link_subvol, + str(dest_hashed_subvol.newname)) + self.assertTrue(ret, ("The hashed subvol {} doesn't have the " + "expected linkto file: {}" + .format(dest_link_subvol._fqpath, + str(dest_hashed_subvol.newname)))) + + # Rename Source to Dest + src = "{}/{}".format(self.mount_point, "test_source_file") + dest_file = "{}/{}".format(self.mount_point, + dest_hashed_subvol.newname) + ret = move_file(self.mounts[0].client_system, src, dest_file) + self.assertTrue(ret, "Failed to move files {} and {}" + .format(src, dest_file)) + + # Verify destination file is removed + ret = self._verify_file_exists(dest_subvol.hashedbrickobject, + dest_name) + self.assertFalse(ret, ("Destination file : {} is not removed in subvol" + " : {}" + .format(str(dest_hashed_subvol.newname), + dest_link_subvol._fqpath))) + g.log.info("The destination file is removed as expected") + + # Verify the Destination link is present + ret = self._verify_link_file_exists(dest_link_subvol, + str(dest_hashed_subvol.newname)) + self.assertTrue(ret, ("The hashed subvol {} still have the " + "expected linkto file: {}" + .format(dest_link_subvol._fqpath, + str(dest_hashed_subvol.newname)))) + + g.log.info("The Destination link file is present as expected") + + # Verify the dest link file points to new destination file + file_path = dest_link_subvol._fqpath + str(dest_hashed_subvol.newname) + ret = (self._verify_file_links_to_specified_destination( + dest_link_subvol._host, file_path, + str(dest_hashed_subvol.newname))) + self.assertTrue(ret, "The dest link file not pointing towards " + "the desired file") + g.log.info("The Destination link file is pointing to new file" + " as expected") diff --git a/tests/functional/dht/test_healing_of_custom_xattrs_on_newly_added_bricks.py b/tests/functional/dht/test_healing_of_custom_xattrs_on_newly_added_bricks.py index 4f7f77ec0..d396702a0 100644 --- a/tests/functional/dht/test_healing_of_custom_xattrs_on_newly_added_bricks.py +++ b/tests/functional/dht/test_healing_of_custom_xattrs_on_newly_added_bricks.py @@ -50,17 +50,17 @@ class TestCustomxattrsOnNewBricks(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass tearDown - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() def check_xattr(self, list_of_all_dirs): """ diff --git a/tests/functional/dht/test_induce_holes_in_layout_by_removebrick_force_then_lookup.py b/tests/functional/dht/test_induce_holes_in_layout_by_removebrick_force_then_lookup.py index c4acb67c3..04b72725a 100644 --- a/tests/functional/dht/test_induce_holes_in_layout_by_removebrick_force_then_lookup.py +++ b/tests/functional/dht/test_induce_holes_in_layout_by_removebrick_force_then_lookup.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Red Hat, Inc. http://www.redhat.com> +# Copyright (C) 2018-2020 Red Hat, Inc. http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,9 +14,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import tempfile from glusto.core import Glusto as g - from glustolibs.gluster.brick_ops import remove_brick from glustolibs.gluster.constants import \ TEST_LAYOUT_IS_COMPLETE as LAYOUT_IS_COMPLETE @@ -30,46 +28,72 @@ from glustolibs.gluster.dht_test_utils import is_layout_complete from glustolibs.gluster.mount_ops import mount_volume -@runs_on([['distributed', 'distributed-replicated', 'distributed-dispersed'], +@runs_on([['distributed', 'distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed'], ['glusterfs']]) class RebalanceValidation(GlusterBaseClass): - def setUp(self): - - # Calling GlusterBaseClass setUp - self.get_super_method(self, 'setUp')() + @classmethod + def setUpClass(cls): + + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for the default dist_count value and override it if required + if cls.default_volume_type_config['distributed']['dist_count'] <= 2: + cls.default_volume_type_config['distributed']['dist_count'] = 4 + if (cls.default_volume_type_config['distributed-replicated'] + ['dist_count']) <= 2: + (cls.default_volume_type_config['distributed-replicated'] + ['dist_count']) = 4 + if (cls.default_volume_type_config['distributed-dispersed'] + ['dist_count']) <= 2: + (cls.default_volume_type_config['distributed-dispersed'] + ['dist_count']) = 4 + if (cls.default_volume_type_config['distributed-arbiter'] + ['dist_count']) <= 2: + (cls.default_volume_type_config['distributed-arbiter'] + ['dist_count']) = 4 # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and Mount Volume") - ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts) if not ret: raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - # Form bricks list for Shrinking volume - self.remove_brick_list = form_bricks_list_to_remove_brick(self.mnode, - self.volname, - subvol_num=1) - if not self.remove_brick_list: - g.log.error("Volume %s: Failed to form bricks list " - "for volume shrink", self.volname) - raise ExecutionError("Volume %s: Failed to form bricks list " - "for volume shrink" % self.volname) - g.log.info("Volume %s: Formed bricks list for volume shrink", - self.volname) - def test_induce_holes_then_lookup(self): + """ + Test Script to induce holes in layout by using remove-brick force + and then performing lookup in order to fix the layout. + + Steps : + 1) Create a volume and mount it using FUSE. + 2) Create a directory "testdir" on mount point. + 3) Check if the layout is complete. + 4) Log volume info and status before remove-brick operation. + 5) Form a list of bricks to be removed. + 6) Start remove-brick operation using 'force'. + 7) Let remove-brick complete and check layout. + 8) Mount the volume on a new mount. + 9) Send a lookup on mount point. + 10) Check if the layout is complete. + + """ # pylint: disable=too-many-statements + # Create a directory on mount point m_point = self.mounts[0].mountpoint - command = 'mkdir -p ' + m_point + '/testdir' + dirpath = '/testdir' + command = 'mkdir -p ' + m_point + dirpath ret, _, _ = g.run(self.clients[0], command) self.assertEqual(ret, 0, "mkdir failed") g.log.info("mkdir is successful") # DHT Layout validation g.log.debug("Verifying hash layout values %s:%s", - self.clients[0], self.mounts[0].mountpoint) - ret = validate_files_in_dir(self.clients[0], self.mounts[0].mountpoint, + self.clients[0], m_point) + ret = validate_files_in_dir(self.clients[0], m_point, test_type=LAYOUT_IS_COMPLETE, file_type=FILETYPE_DIRS) self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED") @@ -79,6 +103,16 @@ class RebalanceValidation(GlusterBaseClass): g.log.info("Logging volume info and Status before shrinking volume") log_volume_info_and_status(self.mnode, self.volname) + # Form bricks list for Shrinking volume + self.remove_brick_list = form_bricks_list_to_remove_brick(self.mnode, + self.volname, + subvol_num=1) + self.assertNotEqual(self.remove_brick_list, None, + ("Volume %s: Failed to form bricks list for volume" + " shrink", self.volname)) + g.log.info("Volume %s: Formed bricks list for volume shrink", + self.volname) + # Shrinking volume by removing bricks g.log.info("Start removing bricks from volume") ret, _, _ = remove_brick(self.mnode, self.volname, @@ -87,32 +121,33 @@ class RebalanceValidation(GlusterBaseClass): g.log.info("Remove-brick with force: PASS") # Check the layout - dirpath = '/testdir' ret = is_layout_complete(self.mnode, self.volname, dirpath) - self.assertFalse(ret, "Volume %s: Layout is complete") - g.log.info("Volume %s: Layout has some holes") + self.assertFalse(ret, ("Volume %s: Layout is complete", self.volname)) + g.log.info("Volume %s: Layout has some holes", self.volname) # Mount the volume on a new mount point - mount_point = tempfile.mkdtemp() ret, _, _ = mount_volume(self.volname, mtype='glusterfs', - mpoint=mount_point, + mpoint=m_point, mserver=self.mnode, - mclient=self.mnode) - self.assertEqual(ret, 0, ("Failed to do gluster mount on volume %s", - self.volname)) - g.log.info("Volume %s: mount success", self.mnode) + mclient=self.clients[1]) + self.assertEqual(ret, 0, ("Failed to do gluster mount of volume %s" + " on client node %s", + self.volname, self.clients[1])) + g.log.info("Volume %s mounted successfullly on %s", self.volname, + self.clients[1]) # Send a look up on the directory - cmd = 'ls %s%s' % (mount_point, dirpath) - ret, _, err = g.run(self.mnode, cmd) + cmd = 'ls %s%s' % (m_point, dirpath) + ret, _, err = g.run(self.clients[1], cmd) self.assertEqual(ret, 0, ("Lookup failed on %s with error %s", (dirpath, err))) - g.log.info("Lookup sent successfully on %s", dirpath) + g.log.info("Lookup sent successfully on %s", m_point + dirpath) # DHT Layout validation + g.log.info("Checking layout after new mount") g.log.debug("Verifying hash layout values %s:%s", - self.mnode, mount_point + dirpath) - ret = validate_files_in_dir(self.mnode, mount_point + dirpath, + self.clients[1], m_point + dirpath) + ret = validate_files_in_dir(self.clients[1], m_point + dirpath, test_type=LAYOUT_IS_COMPLETE, file_type=FILETYPE_DIRS) self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED") @@ -128,9 +163,13 @@ class RebalanceValidation(GlusterBaseClass): if ret: raise ExecutionError("Failed to delete removed brick dir " "%s:%s" % (brick_node, brick_path)) + # Unmount Volume and Cleanup Volume g.log.info("Starting to Unmount Volume and Cleanup Volume") ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_invalid_memory_read_after_freed.py b/tests/functional/dht/test_invalid_memory_read_after_freed.py new file mode 100644 index 000000000..fb4e3719c --- /dev/null +++ b/tests/functional/dht/test_invalid_memory_read_after_freed.py @@ -0,0 +1,102 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.gluster.lib_utils import is_core_file_created + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed'], ['glusterfs']]) +class TestInvalidMemoryReadAfterFreed(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + # Assign a variable for the first_client + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_invalid_memory_read_after_freed(self): + """ + Test case: + 1. Create a volume and start it. + 2. Mount the volume using FUSE. + 3. Create multiple level of dirs and files inside every dir. + 4. Rename files such that linkto files are created. + 5. From the mount point do an rm -rf * and check if all files + are delete or not from mount point as well as backend bricks. + """ + # Fetch timestamp to check for core files + ret, test_timestamp, _ = g.run(self.mnode, "date +%s") + self.assertEqual(ret, 0, "date command failed") + test_timestamp = test_timestamp.strip() + + # Create multiple level of dirs and files inside every dir + cmd = ("cd %s; for i in {1..100}; do mkdir dir$i; cd dir$i; " + "for i in {1..200}; do dd if=/dev/urandom of=file$i bs=1K" + " count=1; done; done" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create dirs and files") + + # Rename files such that linkto files are created + cmd = ("cd %s; for i in {1..100}; do cd dir$i; for i in {1..200}; do " + "mv file$i ntfile$i; done; done" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to rename files") + g.log.info("Files created and renamed successfully") + + # From the mount point do an rm -rf * and check if all files + # are delete or not from mount point as well as backend bricks. + ret, _, _ = g.run(self.first_client, + "rm -rf {}/*".format(self.mounts[0].mountpoint)) + self.assertFalse(ret, "rn -rf * failed on mount point") + + ret = get_dir_contents(self.first_client, + "{}/".format(self.mounts[0].mountpoint)) + self.assertEqual(ret, [], "Unexpected: Files and directories still " + "seen from mount point") + + for brick in get_all_bricks(self.mnode, self.volname): + node, brick_path = brick.split(":") + ret = get_dir_contents(node, "{}/".format(brick_path)) + self.assertEqual(ret, [], "Unexpected: Files and dirs still seen " + "on brick %s on node %s" % (brick_path, node)) + g.log.info("rm -rf * on mount point successful") + + # Check for core file on servers and clients + servers = self.servers + [self.first_client] + ret = is_core_file_created(servers, test_timestamp) + self.assertTrue(ret, "Core files found on servers used for test") + g.log.info("No cores found on all participating servers") diff --git a/tests/functional/dht/test_kill_brick_with_remove_brick.py b/tests/functional/dht/test_kill_brick_with_remove_brick.py new file mode 100644 index 000000000..0257b3d86 --- /dev/null +++ b/tests/functional/dht/test_kill_brick_with_remove_brick.py @@ -0,0 +1,128 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_ops import remove_brick +from glustolibs.gluster.rebalance_ops import ( + wait_for_remove_brick_to_complete, get_remove_brick_status) +from glustolibs.gluster.volume_libs import form_bricks_list_to_remove_brick +from glustolibs.misc.misc_libs import upload_scripts, kill_process +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'distributed-arbiter'], ['glusterfs']]) +class TestKillBrickWithRemoveBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 3 + self.volume['voltype']['dist_count'] = 3 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_kill_brick_with_remove_brick(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create some data on the volume. + 3. Start remove-brick on the volume. + 4. When remove-brick is in progress kill brick process of a brick + which is being remove. + 5. Remove-brick should complete without any failures. + """ + # Start I/O from clients on the volume + counter = 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s" % ( + self.script_upload_path, + counter, mount_obj.mountpoint)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Failed to create datat on volume") + counter += 10 + + # Collect arequal checksum before ops + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Start remove-brick on the volume + brick_list = form_bricks_list_to_remove_brick(self.mnode, self.volname) + self.assertIsNotNone(brick_list, "Brick list is empty") + + ret, _, _ = remove_brick(self.mnode, self.volname, brick_list, 'start') + self.assertFalse(ret, "Failed to start remove-brick on volume") + g.log.info("Successfully started remove-brick on volume") + + # Check rebalance is in progress + ret = get_remove_brick_status(self.mnode, self.volname, brick_list) + ret = ret['aggregate']['statusStr'] + self.assertEqual(ret, "in progress", ("Rebalance is not in " + "'in progress' state, either " + "rebalance is in completed state" + " or failed to get rebalance " + "status")) + + # kill brick process of a brick which is being removed + brick = choice(brick_list) + node, _ = brick.split(":") + ret = kill_process(node, process_names="glusterfsd") + self.assertTrue(ret, "Failed to kill brick process of brick %s" + % brick) + + # Wait for remove-brick to complete on the volume + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + brick_list, timeout=1200) + self.assertTrue(ret, "Remove-brick didn't complete") + g.log.info("Remove brick completed successfully") + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_lookup_dir.py b/tests/functional/dht/test_lookup_dir.py index 40a2d12f2..1e78ab7c0 100644 --- a/tests/functional/dht/test_lookup_dir.py +++ b/tests/functional/dht/test_lookup_dir.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -267,14 +267,14 @@ class TestLookupDir(GlusterBaseClass): ret = self.mkdir_nonhashed_down(subvols, parent_dir) self.assertTrue(ret, 'mkdir_nonhashed_down failed') - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass tearDown - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_mkdir_hashdown.py b/tests/functional/dht/test_mkdir_hashdown.py index 827dc32a9..c64751ec3 100644 --- a/tests/functional/dht/test_mkdir_hashdown.py +++ b/tests/functional/dht/test_mkdir_hashdown.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -142,14 +142,14 @@ class TestMkdirHashdown(GlusterBaseClass): g.log.info('dir %s does not exist on mount as expected', child_dir) - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # Calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_nuke_happy_path.py b/tests/functional/dht/test_nuke_happy_path.py new file mode 100644 index 000000000..e2e040e60 --- /dev/null +++ b/tests/functional/dht/test_nuke_happy_path.py @@ -0,0 +1,95 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import mkdir, get_dir_contents +from glustolibs.gluster.glusterfile import set_fattr +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed'], ['glusterfs']]) +class TestNukeHappyPath(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + # Assign a variable for the first_client + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_nuke_happy_path(self): + """ + Test case: + 1. Create a distributed volume, start and mount it + 2. Create 1000 dirs and 1000 files under a directory say 'dir1' + 3. Set xattr glusterfs.dht.nuke to "test" for dir1 + 4. Validate dir-1 is not seen from mount point + 5. Validate if the entry is moved to '/brickpath/.glusterfs/landfill' + and deleted eventually. + """ + # Create 1000 dirs and 1000 files under a directory say 'dir1' + self.dir_1_path = "{}/dir1/".format(self.mounts[0].mountpoint) + ret = mkdir(self.first_client, self.dir_1_path) + self.assertTrue(ret, "Failed to create dir1 on mount point") + cmd = ("cd {};for i in `seq 1 1000`;do mkdir dir$i;touch file$i;done" + .format(self.dir_1_path)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "I/O failed at dir1 on mount point") + + # Set xattr glusterfs.dht.nuke to "test" for dir1 + ret = set_fattr(self.first_client, self.dir_1_path, + 'glusterfs.dht.nuke', 'test') + self.assertTrue(ret, "Failed to set xattr glusterfs.dht.nuke") + + # Validate dir-1 is not seen from mount point + ret = get_dir_contents(self.first_client, self.mounts[0].mountpoint) + self.assertEqual([], ret, + "UNEXPECTED: Mount point has files ideally it should " + "be empty.") + + # Validate if the entry is moved to '/brickpath/.glusterfs/landfill' + # and deleted eventually + for brick_path in get_all_bricks(self.mnode, self.volname): + node, path = brick_path.split(":") + path = "{}/.glusterfs/landfill/*/".format(path) + ret = get_dir_contents(node, path) + # In case if landfile is already cleaned before checking + # stop execution of the loop. + if ret is None: + g.log.info("Bricks have been already cleaned up.") + break + self.assertIsNotNone(ret, + "Files not present in /.glusterfs/landfill" + " dir") + g.log.info("Successully nuked dir1.") diff --git a/tests/functional/dht/test_one_brick_full_add_brick_rebalance.py b/tests/functional/dht/test_one_brick_full_add_brick_rebalance.py new file mode 100644 index 000000000..1ef5d1e90 --- /dev/null +++ b/tests/functional/dht/test_one_brick_full_add_brick_rebalance.py @@ -0,0 +1,139 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import string +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.glusterdir import get_dir_contents, mkdir +from glustolibs.gluster.glusterfile import get_dht_linkto_xattr +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import (get_subvols, expand_volume) +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed'], ['glusterfs']]) +class TestOneBrickFullAddBrickRebalance(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 3 + self.volume['voltype']['dist_count'] = 3 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + @staticmethod + def _get_random_string(): + letters = string.ascii_lowercase + return ''.join(choice(letters) for _ in range(10)) + + def test_one_brick_full_add_brick_rebalance(self): + """ + Test case: + 1. Create a pure distribute volume with 3 bricks. + 2. Start it and mount it on client. + 3. Fill one disk of the volume till it's full + 4. Add brick to volume, start rebalance and wait for it to complete. + 5. Check arequal checksum before and after add brick should be same. + 6. Check if link files are present on bricks or not. + """ + # Fill few bricks till it is full + bricks = get_all_bricks(self.mnode, self.volname) + + # Calculate the usable size and fill till it reaches + # min free limit + usable_size = get_usable_size_per_disk(bricks[0]) + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + fname = "abc" + + # Create directories in hierarchy + dirp = "/dir1/dir2/" + path = "{}{}".format(self.mounts[0].mountpoint, dirp) + ret = mkdir(self.mounts[0].client_system, path, parents=True) + self.assertTrue(ret, "Failed to create dir hierarchy") + + for _ in range(0, usable_size): + + # Create files inside directories + while (subvols[find_hashed_subvol(subvols, dirp, fname)[1]][0] != + subvols[0][0]): + fname = self._get_random_string() + ret, _, _ = g.run(self.mounts[0].client_system, + "fallocate -l 1G {}{}".format(path, fname)) + self.assertFalse(ret, "Failed to fill disk to min free limit") + fname = self._get_random_string() + g.log.info("Disk filled up to min free limit") + + # Collect arequal checksum before ops + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") + + # Check if linkto files exist or not as rebalance is already + # completed we shouldn't be seeing any linkto files + for brick in bricks: + node, path = brick.split(":") + path += dirp + list_of_files = get_dir_contents(node, path) + self.assertIsNotNone(list_of_files, "Unable to get files") + for filename in list_of_files: + ret = get_dht_linkto_xattr(node, "{}{}".format(path, + filename)) + self.assertIsNone(ret, "Unable to fetch dht linkto xattr") diff --git a/tests/functional/dht/test_open_file_migration.py b/tests/functional/dht/test_open_file_migration.py new file mode 100644 index 000000000..55709cdb7 --- /dev/null +++ b/tests/functional/dht/test_open_file_migration.py @@ -0,0 +1,131 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.io.utils import open_file_fd +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete, + get_rebalance_status) + + +@runs_on([['distributed', 'replicated', 'arbiter', + 'dispersed'], + ['glusterfs']]) +class TestOpenFileMigration(GlusterBaseClass): + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + def test_open_file_migration(self): + """ + Description: Checks that files with open fd are migrated successfully. + + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Create files on volume mount. + 4) Open fd for the files and keep on doing read write operations on + these files. + 5) While fds are open, add bricks to the volume and trigger rebalance. + 6) Wait for rebalance to complete. + 7) Wait for write on open fd to complete. + 8) Check for any data loss during rebalance. + 9) Check if rebalance has any failures. + """ + # Create files and open fd for the files on mount point + m_point = self.mounts[0].mountpoint + cmd = ('cd {}; for i in `seq 261 1261`;do touch testfile$i;' + 'done'.format(m_point)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to create files") + g.log.info("Successfully created files") + proc = open_file_fd(m_point, 2, self.clients[0], + start_range=301, end_range=400) + + # Calculate file count for the mount-point + cmd = ("ls -lR {}/testfile* | wc -l".format(m_point)) + ret, count_before, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to get file count") + g.log.info("File count before rebalance is:%s", count_before) + + # Add bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", + self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Trigger rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start rebalance") + g.log.info("Rebalance is started") + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=300) + self.assertTrue(ret, ("Rebalance failed on volume %s", + self.volname)) + g.log.info("Rebalance is successful on " + "volume %s", self.volname) + + # Close connection and check if write on open fd has completed + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Write on open fd" + " has not completed yet") + g.log.info("Write completed on open fd") + + # Calculate file count for the mount-point + cmd = ("ls -lR {}/testfile* | wc -l".format(m_point)) + ret, count_after, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to get file count") + g.log.info("File count after rebalance is:%s", count_after) + + # Check if there is any data loss + self.assertEqual(int(count_before), int(count_after), + "The file count before and after" + " rebalance is not same." + " There is data loss.") + g.log.info("The file count before and after rebalance is same." + " No data loss occurred.") + + # Check if rebalance has any failures + ret = get_rebalance_status(self.mnode, self.volname) + no_of_failures = ret['aggregate']['failures'] + self.assertEqual(int(no_of_failures), 0, + "Failures in rebalance") + g.log.info("No failures in rebalance") diff --git a/tests/functional/dht/test_pipe_character_and_block_device_files.py b/tests/functional/dht/test_pipe_character_and_block_device_files.py new file mode 100644 index 000000000..8a3739b83 --- /dev/null +++ b/tests/functional/dht/test_pipe_character_and_block_device_files.py @@ -0,0 +1,328 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from socket import gethostbyname + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.gluster.glusterfile import ( + get_file_stat, get_fattr, set_fattr, delete_fattr, get_pathinfo, + file_exists) + + +@runs_on([['distributed-replicated', 'distributed-arbiter', 'distributed'], + ['glusterfs']]) +class TestPipeCharacterAndBlockDeviceFiles(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 5 + self.volume['voltype']['dist_count'] = 5 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def _create_character_and_block_device_files(self): + """Create character and block device files""" + self.list_of_device_files, self.file_names = [], [] + for ftype, filename in (('b', 'blockfile'), ('c', 'Characterfile')): + + # Create files using mknod + cmd = ("cd {}; mknod {} {} 1 5".format( + self.mounts[0].mountpoint, filename, ftype)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual( + ret, 0, 'Failed to create %s file' % filename) + + # Add file names and file path to lists + self.file_names.append(filename) + self.list_of_device_files.append('{}/{}'.format( + self.mounts[0].mountpoint, filename)) + + # Create file type list for the I/O + self.filetype_list = ["block special file", "character special file"] + + def _create_pipe_file(self): + """Create pipe files""" + + # Create pipe files using mkfifo + cmd = "cd {}; mkfifo {}".format(self.mounts[0].mountpoint, 'fifo') + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, 'Failed to create %s file' % 'fifo') + + # Populate variables with fifo file details + self.list_of_device_files = [ + '{}/{}'.format(self.mounts[0].mountpoint, 'fifo')] + self.file_names = ['fifo'] + self.filetype_list = ['fifo'] + + def _set_xattr_trusted_foo(self, xattr_val): + """Sets xattr trusted.foo on all the files""" + for fname in self.list_of_device_files: + ret = set_fattr(self.clients[0], fname, 'trusted.foo', + xattr_val) + self.assertTrue(ret, "Unable to create custom xattr " + "for file {}".format(fname)) + + def _delete_xattr_trusted_foo(self): + """Removes xattr trusted.foo from all the files.""" + for fname in self.list_of_device_files: + ret = delete_fattr(self.clients[0], fname, 'trusted.foo') + self.assertTrue(ret, "Unable to remove custom xattr for " + "file {}".format(fname)) + + def _check_custom_xattr_trusted_foo(self, xattr_val, visible=True): + """Check custom xttar from mount point and on bricks.""" + # Check custom xattr from mount point + for fname in self.list_of_device_files: + ret = get_fattr(self.clients[0], fname, 'trusted.foo', + encode='text') + if visible: + self.assertEqual(ret, xattr_val, + "Custom xattr not found from mount.") + else: + self.assertIsNone(ret, "Custom attribute visible at mount " + "point even after deletion") + + # Check custom xattr on bricks + for brick in get_all_bricks(self.mnode, self.volname): + node, brick_path = brick.split(':') + files_on_bricks = get_dir_contents(node, brick_path) + files = [ + fname for fname in self.file_names + if fname in files_on_bricks] + for fname in files: + ret = get_fattr(node, "{}/{}".format(brick_path, fname), + 'trusted.foo', encode='text') + if visible: + self.assertEqual(ret, xattr_val, + "Custom xattr not visible on bricks") + else: + self.assertIsNone(ret, "Custom attribute visible on " + "brick even after deletion") + + def _check_if_files_are_stored_only_on_expected_bricks(self): + """Check if files are stored only on expected bricks""" + for fname in self.list_of_device_files: + # Fetch trusted.glusterfs.pathinfo and check if file is present on + # brick or not + ret = get_pathinfo(self.clients[0], fname) + self.assertIsNotNone(ret, "Unable to get " + "trusted.glusterfs.pathinfo of file %s" + % fname) + present_brick_list = [] + for brick_path in ret['brickdir_paths']: + node, path = brick_path.split(":") + ret = file_exists(node, path) + self.assertTrue(ret, "Unable to find file {} on brick {}" + .format(fname, path)) + brick_text = brick_path.split('/')[:-1] + if brick_text[0][0:2].isdigit(): + brick_text[0] = gethostbyname(brick_text[0][:-1]) + ":" + present_brick_list.append('/'.join(brick_text)) + + # Check on other bricks where file doesn't exist + brick_list = get_all_bricks(self.mnode, self.volname) + other_bricks = [ + brk for brk in brick_list if brk not in present_brick_list] + for brick in other_bricks: + node, path = brick.split(':') + ret = file_exists(node, "{}/{}".format(path, + fname.split('/')[-1])) + self.assertFalse(ret, "Unexpected: Able to find file {} on " + "brick {}".format(fname, path)) + + def _check_filetype_of_files_from_mountpoint(self): + """Check filetype of files from mountpoint""" + for filetype in self.filetype_list: + # Check if filetype is as expected + ret = get_file_stat(self.clients[0], self.list_of_device_files[ + self.filetype_list.index(filetype)]) + self.assertEqual(ret['filetype'], filetype, + "File type not reflecting properly for %s" + % filetype) + + def _compare_stat_output_from_mout_point_and_bricks(self): + """Compare stat output from mountpoint and bricks""" + for fname in self.list_of_device_files: + # Fetch stat output from mount point + mountpoint_stat = get_file_stat(self.clients[0], fname) + bricks = get_pathinfo(self.clients[0], fname) + + # Fetch stat output from bricks + for brick_path in bricks['brickdir_paths']: + node, path = brick_path.split(":") + brick_stat = get_file_stat(node, path) + for key in ("filetype", "access", "size", "username", + "groupname", "uid", "gid", "epoch_atime", + "epoch_mtime", "epoch_ctime"): + self.assertEqual(mountpoint_stat[key], brick_stat[key], + "Difference observed between stat output " + "of mountpoint and bricks for file %s" + % fname) + + def test_character_and_block_device_file_creation(self): + """ + Test case: + 1. Create distributed volume with 5 sub-volumes, start amd mount it. + 2. Create character and block device files. + 3. Check filetype of files from mount point. + 4. Verify that the files are stored on only the bricks which is + mentioned in trusted.glusterfs.pathinfo xattr. + 5. Verify stat output from mount point and bricks. + """ + # Create Character and block device files + self._create_character_and_block_device_files() + + # Check filetype of files from mount point + self._check_filetype_of_files_from_mountpoint() + + # Verify that the files are stored on only the bricks which is + # mentioned in trusted.glusterfs.pathinfo xattr + self._check_if_files_are_stored_only_on_expected_bricks() + + # Verify stat output from mount point and bricks + self._compare_stat_output_from_mout_point_and_bricks() + + def test_character_and_block_device_file_removal_using_rm(self): + """ + Test case: + 1. Create distributed volume with 5 sub-volumes, start and mount it. + 2. Create character and block device files. + 3. Check filetype of files from mount point. + 4. Verify that the files are stored on only one bricks which is + mentioned in trusted.glusterfs.pathinfo xattr. + 5. Delete the files. + 6. Verify if the files are delete from all the bricks + """ + # Create Character and block device files + self._create_character_and_block_device_files() + + # Check filetype of files from mount point + self._check_filetype_of_files_from_mountpoint() + + # Verify that the files are stored on only the bricks which is + # mentioned in trusted.glusterfs.pathinfo xattr + self._check_if_files_are_stored_only_on_expected_bricks() + + # Delete both the character and block device files + for fname in self.list_of_device_files: + ret, _, _ = g.run(self.clients[0], 'rm -rf {}'.format(fname)) + self.assertEqual( + ret, 0, 'Failed to remove {} file'.format(fname)) + + # Verify if the files are deleted from all bricks or not + for brick in get_all_bricks(self.mnode, self.volname): + node, path = brick.split(':') + for fname in self.file_names: + ret = file_exists(node, "{}/{}".format(path, fname)) + self.assertFalse(ret, "Unexpected: Able to find file {} on " + " brick {} even after deleting".format(fname, + path)) + + def test_character_and_block_device_file_with_custom_xattrs(self): + """ + Test case: + 1. Create distributed volume with 5 sub-volumes, start and mount it. + 2. Create character and block device files. + 3. Check filetype of files from mount point. + 4. Set a custom xattr for files. + 5. Verify that xattr for files is displayed on mount point and bricks. + 6. Modify custom xattr value and verify that xattr for files + is displayed on mount point and bricks. + 7. Remove the xattr and verify that custom xattr is not displayed. + 8. Verify that mount point and brick shows pathinfo xattr properly. + """ + # Create Character and block device files + self._create_character_and_block_device_files() + + # Check filetype of files from mount point + self._check_filetype_of_files_from_mountpoint() + + # Set a custom xattr for files + self._set_xattr_trusted_foo("bar1") + + # Verify that xattr for files is displayed on mount point and bricks + self._check_custom_xattr_trusted_foo("bar1") + + # Modify custom xattr value + self._set_xattr_trusted_foo("bar2") + + # Verify that xattr for files is displayed on mount point and bricks + self._check_custom_xattr_trusted_foo("bar2") + + # Remove the xattr + self._delete_xattr_trusted_foo() + + # Verify that custom xattr is not displayed + self._check_custom_xattr_trusted_foo("bar2", visible=False) + + # Verify that mount point shows pathinfo xattr properly + self._check_if_files_are_stored_only_on_expected_bricks() + + def test_pipe_file_create(self): + """ + Test case: + 1. Create distributed volume with 5 sub-volumes, start and mount it. + 2. Create a pipe file. + 3. Check filetype of files from mount point. + 4. Verify that the files are stored on only the bricks which is + mentioned in trusted.glusterfs.pathinfo xattr. + 5. Verify stat output from mount point and bricks. + 6. Write data to fifo file and read data from fifo file + from the other instance of the same client. + """ + # Create a pipe file + self._create_pipe_file() + + # Check filetype of files from mount point + self._check_filetype_of_files_from_mountpoint() + + # Verify that the files are stored on only the bricks which is + # mentioned in trusted.glusterfs.pathinfo xattr + self._check_if_files_are_stored_only_on_expected_bricks() + + # Verify stat output from mount point and bricks + self._compare_stat_output_from_mout_point_and_bricks() + + # Write data to fifo file and read data from fifo file + # from the other instance of the same client. + g.run_async(self.clients[0], "echo 'Hello' > {} ".format( + self.list_of_device_files[0])) + ret, out, _ = g.run( + self.clients[0], "cat < {}".format(self.list_of_device_files[0])) + self.assertEqual( + ret, 0, "Unable to fetch datat on other terimnal") + self.assertEqual( + "Hello", out.split('\n')[0], + "Hello not recieved on the second terimnal") diff --git a/tests/functional/dht/test_readdirp_with_rebalance.py b/tests/functional/dht/test_readdirp_with_rebalance.py new file mode 100644 index 000000000..6845e0fe3 --- /dev/null +++ b/tests/functional/dht/test_readdirp_with_rebalance.py @@ -0,0 +1,173 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.rebalance_ops import (set_rebalance_throttle, + rebalance_start, + get_rebalance_status) +from glustolibs.gluster.volume_libs import form_bricks_list_to_add_brick +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['distributed', 'replicated', 'dispersed', + 'arbiter', 'distributed-dispersed', + 'distributed-replicated', 'distributed-arbiter'], + ['glusterfs']]) +class TestReaddirpWithRebalance(GlusterBaseClass): + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs, self.io_validation_complete = [], False + + # Setup Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup and Mount Volume") + + # Upload io scripts for running IO on mounts + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.clients[0], self.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") + + # Form brick list for expanding volume + self.add_brick_list = form_bricks_list_to_add_brick( + self.mnode, self.volname, self.servers, self.all_servers_info, + distribute_count=3) + if not self.add_brick_list: + raise ExecutionError("Volume %s: Failed to form bricks list for" + " expand" % self.volname) + g.log.info("Volume %s: Formed bricks list for expand", self.volname) + + def tearDown(self): + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_readdirp_with_rebalance(self): + """ + Description: Tests to check that all directories are read + and listed while rebalance is still in progress. + + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Create a dir "master" on mount-point. + 4) Create 8000 empty dirs (dir1 to dir8000) inside dir "master". + 5) Now inside a few dirs (e.g. dir1 to dir10), create deep dirs + and inside every dir, create 50 files. + 6) Collect the number of dirs present on /mnt/<volname>/master + 7) Change the rebalance throttle to lazy. + 8) Add-brick to the volume (at least 3 replica sets.) + 9) Start rebalance using "force" option on the volume. + 10) List the directories on dir "master". + """ + # pylint: disable=too-many-statements + # Start IO on mounts + m_point = self.mounts[0].mountpoint + ret = mkdir(self.mounts[0].client_system, + "{}/master".format(m_point)) + self.assertTrue(ret, "mkdir of dir master failed") + + # Create 8000 empty dirs + cmd = ("ulimit -n 64000; /usr/bin/env python {} create_deep_dir" + " --dir-length 8000 --dir-depth 0" + " {}/master/".format(self.script_upload_path, m_point)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + g.log.info("IO on %s:%s is started successfully", + self.mounts[0].client_system, m_point) + + # Validate 8000 empty dirs are created successfully + ret = validate_io_procs(self.all_mounts_procs, self.mounts[0]) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Create deep dirs and files + self.all_mounts_procs = [] + cmd = ("/usr/bin/env python {} create_deep_dirs_with_files" + " --dir-length 10 --dir-depth 1 --max-num-of-dirs 50 " + " --num-of-files 50 --file-type empty-file" + " {}/master/".format(self.script_upload_path, m_point)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + g.log.info("IO on %s:%s is started successfully", + self.mounts[0].client_system, m_point) + + # Validate deep dirs and files are created successfully + ret = validate_io_procs(self.all_mounts_procs, self.mounts[0]) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Check the dir count before rebalance + cmd = ('cd {}/master; ls -l | wc -l'.format(m_point)) + ret, dir_count_before, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to " + "get directory count") + g.log.info("Dir count before %s", dir_count_before) + + # Change the rebalance throttle to lazy + ret, _, _ = set_rebalance_throttle(self.mnode, self.volname, + throttle_type='lazy') + self.assertEqual(ret, 0, "Failed to set rebal-throttle to lazy") + g.log.info("Rebal-throttle set to 'lazy' successfully") + + # Add-bricks to the volume + ret, _, _ = add_brick(self.mnode, self.volname, self.add_brick_list) + self.assertEqual(ret, 0, "Failed to add-brick to the volume") + g.log.info("Added bricks to the volume successfully") + + # Start rebalance using force + ret, _, _ = rebalance_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Failed to start rebalance") + g.log.info("Rebalance started successfully") + + # Check if rebalance is in progress + rebalance_status = get_rebalance_status(self.mnode, self.volname) + status = rebalance_status['aggregate']['statusStr'] + self.assertEqual(status, "in progress", + ("Rebalance is not in 'in progress' state," + " either rebalance is in compeleted state" + " or failed to get rebalance status")) + + # Check the dir count after rebalance + cmd = ('cd {}/master; ls -l | wc -l'.format(m_point)) + ret, dir_count_after, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to do lookup and" + " get directory count") + g.log.info("Dir count after %s", dir_count_after) + + # Check if there is any data loss + self.assertEqual(set(dir_count_before), set(dir_count_after), + ("There is data loss")) + g.log.info("The checksum before and after rebalance is same." + " There is no data loss.") diff --git a/tests/functional/dht/test_rebalance_add_brick_and_lookup.py b/tests/functional/dht/test_rebalance_add_brick_and_lookup.py new file mode 100644 index 000000000..b02fe5eea --- /dev/null +++ b/tests/functional/dht/test_rebalance_add_brick_and_lookup.py @@ -0,0 +1,113 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Rebalance with add brick and log time taken for lookup +""" + +from time import time +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume + + +@runs_on([['distributed', 'distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed'], ['glusterfs']]) +class TestRebalanceWithAddBrickAndLookup(GlusterBaseClass): + """ Rebalance with add brick and do lookup """ + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup Volume and Mount it") + + def test_rebalance_with_add_brick_and_lookup(self): + """ + Rebalance with add brick and then lookup on mount + - Create a Distributed-Replicated volume. + - Create deep dirs(200) and 100 files on the deepest directory. + - Expand volume. + - Initiate rebalance + - Once rebalance is completed, do a lookup on mount and time it. + """ + # Create Deep dirs. + cmd = ( + "cd %s/; for i in {1..200};do mkdir dir${i}; cd dir${i};" + " if [ ${i} -eq 100 ]; then for j in {1..100}; do touch file${j};" + " done; fi; done;" % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to create the deep dirs and files") + g.log.info("Deep dirs and files created.") + + # Expand the volume. + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Start Rebalance. + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the volume %s", + self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=500) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + # Do a lookup on the mountpoint and note the time taken to run. + # The time used for comparison is taken as a benchmark on using a + # RHGS 3.5.2 for this TC. For 3.5.2, the time takes came out to be + # 4 seconds. Now the condition for subtest to pass is for the lookup + # should not be more than 10% of this value, i.e. 4.4 seconds. + cmd = ("ls -R %s/" % (self.mounts[0].mountpoint)) + start_time = time() + ret, _, _ = g.run(self.clients[0], cmd) + end_time = time() + self.assertEqual(ret, 0, "Failed to do a lookup") + time_taken = end_time - start_time + self.assertTrue(time_taken <= 4.4, "Lookup takes more time " + "than the previously benchmarked value.") + g.log.info("Lookup took : %d seconds", time_taken) + + def tearDown(self): + """tear Down callback""" + # Unmount Volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Filed to Unmount Volume and " + "Cleanup Volume") + g.log.info("Successful in Unmount Volume and cleanup.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rebalance_dir_file_from_multiple_clients.py b/tests/functional/dht/test_rebalance_dir_file_from_multiple_clients.py index dd80479bb..add72aec5 100644 --- a/tests/functional/dht/test_rebalance_dir_file_from_multiple_clients.py +++ b/tests/functional/dht/test_rebalance_dir_file_from_multiple_clients.py @@ -201,7 +201,7 @@ class RebalanceValidation(GlusterBaseClass): # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname, - timeout=600) + timeout=1800) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance status on volume %s: Complete", diff --git a/tests/functional/dht/test_rebalance_files_with_holes.py b/tests/functional/dht/test_rebalance_files_with_holes.py new file mode 100644 index 000000000..ba01eadcb --- /dev/null +++ b/tests/functional/dht/test_rebalance_files_with_holes.py @@ -0,0 +1,128 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume, shrink_volume + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed', 'replicated', + 'arbiter', 'dispersed'], ['glusterfs']]) +class TestAddBrickRebalanceFilesWithHoles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_add_brick_rebalance_files_with_holes(self): + """ + Test case: + 1. Create a volume, start it and mount it using fuse. + 2. On the volume root, create files with holes. + 3. After the file creation is complete, add bricks to the volume. + 4. Trigger rebalance on the volume. + 5. Wait for rebalance to complete. + """ + # On the volume root, create files with holes + cmd = ("cd %s;for i in {1..5000}; do dd if=/dev/urandom" + " of=file_with_holes$i bs=1M count=1 seek=100M; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files with holes") + + # After the file creation is complete, add bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=9000) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed'], ['glusterfs']]) +class TestRemoveBrickRebalanceFilesWithHoles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_remove_brick_rebalance_files_with_holes(self): + """ + Test case: + 1. Create a volume, start it and mount it using fuse. + 2. On the volume root, create files with holes. + 3. After the file creation is complete, remove-brick from volume. + 4. Wait for remove-brick to complete. + """ + # On the volume root, create files with holes + cmd = ("cd %s;for i in {1..2000}; do dd if=/dev/urandom" + " of=file_with_holes$i bs=1M count=1 seek=100M; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files with holes") + + # After the file creation is complete, remove-brick from volume + # Wait for remove-brick to complete + ret = shrink_volume(self.mnode, self.volname, rebalance_timeout=16000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") diff --git a/tests/functional/dht/test_rebalance_multiple_expansions.py b/tests/functional/dht/test_rebalance_multiple_expansions.py new file mode 100644 index 000000000..e96d88d56 --- /dev/null +++ b/tests/functional/dht/test_rebalance_multiple_expansions.py @@ -0,0 +1,100 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed', 'distributed-replicated'], + ['glusterfs']]) +class TestRebalanceMultipleExpansions(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_multiple_expansions(self): + """ + Test case: + 1. Create a volume, start it and mount it + 2. Create some file on mountpoint + 3. Collect arequal checksum on mount point pre-rebalance + 4. Do the following 3 times: + 5. Expand the volume + 6. Start rebalance and wait for it to finish + 7. Collect arequal checksum on mount point post-rebalance + and compare with value from step 3 + """ + + # Create some file on mountpoint + cmd = ("cd %s; for i in {1..500} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + for _ in range(3): + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on " + "volume %s" % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after + # rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_multiple_shrinks.py b/tests/functional/dht/test_rebalance_multiple_shrinks.py new file mode 100644 index 000000000..a95cdf141 --- /dev/null +++ b/tests/functional/dht/test_rebalance_multiple_shrinks.py @@ -0,0 +1,87 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import shrink_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed'], ['glusterfs']]) +class TestRebalanceMultipleShrinks(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 6 + self.volume['voltype']['dist_count'] = 6 + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_multiple_shrinks(self): + """ + Test case: + 1. Modify the distribution count of a volume + 2. Create a volume, start it and mount it + 3. Create some file on mountpoint + 4. Collect arequal checksum on mount point pre-rebalance + 5. Do the following 3 times: + 6. Shrink the volume + 7. Collect arequal checksum on mount point post-rebalance + and compare with value from step 4 + """ + + # Create some file on mountpoint + cmd = ("cd %s; for i in {1..500} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + for _ in range(3): + # Shrink volume + ret = shrink_volume(self.mnode, self.volname, + rebalance_timeout=16000) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after + # rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_nested_dir.py b/tests/functional/dht/test_rebalance_nested_dir.py new file mode 100644 index 000000000..77f099ad3 --- /dev/null +++ b/tests/functional/dht/test_rebalance_nested_dir.py @@ -0,0 +1,99 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed', 'distributed-replicated'], + ['glusterfs']]) +class TestRebalanceNestedDir(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_nested_dir(self): + """ + Test case: + 1. Create a volume, start it and mount it + 2. On mount point, create a large nested dir structure with + files in the inner-most dir + 3. Collect arequal checksum on mount point pre-rebalance + 4. Expand the volume + 5. Start rebalance and wait for it to finish + 6. Collect arequal checksum on mount point post-rebalance + and compare wth value from step 3 + """ + + # create a large nested dir structure with files in the inner-most dir + cmd = ("cd %s; for i in {1..100} ; do mkdir $i; cd $i; done;" + "for j in {1..100} ; do " + "dd if=/dev/urandom of=file$j bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_peer_probe.py b/tests/functional/dht/test_rebalance_peer_probe.py new file mode 100644 index 000000000..7ffc9ca63 --- /dev/null +++ b/tests/functional/dht/test_rebalance_peer_probe.py @@ -0,0 +1,130 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal +from glustolibs.gluster.peer_ops import (peer_probe_servers, peer_detach) + + +@runs_on([['distributed'], ['glusterfs']]) +class TestRebalancePeerProbe(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + self.is_peer_detached = False + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Probe detached node in case it's still detached + if self.is_peer_detached: + if not peer_probe_servers(self.mnode, self.servers[5]): + raise ExecutionError("Failed to probe detached " + "servers %s" % self.servers) + g.log.info("Peer probe success for detached " + "servers %s", self.servers) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_peer_probe(self): + """ + Test case: + 1. Detach a peer + 2. Create a volume, start it and mount it + 3. Start creating a few files on mount point + 4. Collect arequal checksum on mount point pre-rebalance + 5. Expand the volume + 6. Start rebalance + 7. While rebalance is going, probe a peer and check if + the peer was probed successfully + 7. Collect arequal checksum on mount point post-rebalance + and compare wth value from step 4 + """ + + # Detach a peer + ret, _, _ = peer_detach(self.mnode, self.servers[5]) + self.assertEqual(ret, 0, "Failed to detach peer %s" + % self.servers[5]) + + self.is_peer_detached = True + + # Start I/O from mount point and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Let rebalance run for a while + sleep(5) + + # Add new node to the cluster + ret = peer_probe_servers(self.mnode, self.servers[5]) + self.assertTrue(ret, "Failed to peer probe server : %s" + % self.servers[5]) + g.log.info("Peer probe success for %s and all peers are in " + "connected state", self.servers[5]) + + self.is_peer_detached = False + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_preserve_user_permissions.py b/tests/functional/dht/test_rebalance_preserve_user_permissions.py new file mode 100644 index 000000000..59327f329 --- /dev/null +++ b/tests/functional/dht/test_rebalance_preserve_user_permissions.py @@ -0,0 +1,194 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Rebalance: permissions check as non root user +""" + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import ( + expand_volume, + log_volume_info_and_status) +from glustolibs.io.utils import (collect_mounts_arequal) +from glustolibs.gluster.lib_utils import (add_user, del_user) +from glustolibs.gluster.glusterfile import ( + get_file_stat, + set_file_permissions) + + +@runs_on([['distributed', 'distributed-replicated'], + ['glusterfs']]) +class TestRebalancePreserveUserPermissions(GlusterBaseClass): + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + self.user = "glusto_user" + self.client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + # Add new user on the client node + ret = add_user(self.client, self.user) + if not ret: + raise ExecutionError("Failed to add user") + + def tearDown(self): + ret = del_user(self.client, self.user) + if not ret: + raise ExecutionError("Failed to delete user") + # Unmount Volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and " + "Cleanup Volume") + g.log.info("Successful in Unmount Volume and cleanup.") + + self.get_super_method(self, 'tearDown')() + + def _start_rebalance_and_wait(self): + """Start rebalance and wait""" + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the volume %s", + self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + def _get_arequal_and_check_if_equal_to_before(self): + """Check if arequal checksum is equal or not""" + self.arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual( + self.arequal_checksum_before, self.arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") + + def _logged_vol_info(self): + """Log volume info and status""" + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume %s", self.volname)) + + def _check_user_permission(self): + """ + Verify permissions on MP and file + """ + stat_mp_dict = get_file_stat(self.client, self.mountpoint) + self.assertIsNotNone(stat_mp_dict, "stat on %s failed" + % self.mountpoint) + self.assertEqual(stat_mp_dict['access'], '777', + "Expected 777 " + "but found %s" % stat_mp_dict['access']) + g.log.info("File permissions for mountpoint is 777 as expected") + + # check owner and group of random file + fpath = self.mountpoint + "/d1/f.1" + stat_dict = get_file_stat(self.client, fpath) + self.assertIsNotNone(stat_dict, "stat on %s failed" % fpath) + self.assertEqual(stat_dict['username'], self.user, + "Expected %s but found %s" + % (self.user, stat_dict['username'])) + self.assertEqual(stat_dict['groupname'], self.user, + "Expected %s but found %s" + % (self.user, stat_dict['groupname'])) + g.log.info("User and Group are %s as expected", self.user) + + def _testcase(self, number_of_expands=1): + """ + Test case: + 1. Create a volume start it and mount on the client. + 2. Set full permission on the mount point. + 3. Add new user to the client. + 4. As the new user create dirs/files. + 5. Compute arequal checksum and check permission on / and subdir. + 6. expand cluster according to number_of_expands and start rebalance. + 7. After rebalance is completed: + 7.1 check arequal checksum + 7.2 verfiy no change in / and sub dir permissions. + 7.3 As the new user create and delete file/dir. + """ + # Set full permissions on the mount point. + ret = set_file_permissions(self.clients[0], self.mountpoint, "-R 777") + self.assertTrue(ret, "Failed to set permissions on the mount point") + g.log.info("Set full permissions on the mount point") + + # Create dirs/files as self.test_user + cmd = (r'su -l %s -c "cd %s;' + r'for i in {0..9}; do mkdir d\$i; done;' + r'for i in {0..99}; do let x=\$i%%10;' + r'dd if=/dev/urandom of=d\$x/f.\$i bs=1024 count=1; done"' + % (self.user, self.mountpoint)) + ret, _, _ = g.run(self.client, cmd) + self.assertEqual(ret, 0, ("Failed to create files as %s", self.user)) + g.log.info("IO as %s is successful", self.user) + + # check permission on / and subdir + self._check_user_permission() + + # get arequal checksum before expand + self.arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + self._logged_vol_info() + + # expand the volume + for i in range(number_of_expands): + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand iter %d volume %s", + i, self.volname)) + + self._logged_vol_info() + # Start Rebalance and wait for completion + self._start_rebalance_and_wait() + + # compare arequals checksum before and after rebalance + self._get_arequal_and_check_if_equal_to_before() + + # permissions check on / and sub dir + self._check_user_permission() + + # Create/Delete file as self.test_user + cmd = ('su -l %s -c ' + '"cd %s; touch file.test;' + 'find . -mindepth 1 -maxdepth 1 -type d | xargs rm -rf"' + % (self.user, self.mountpoint)) + ret, _, _ = g.run(self.client, cmd) + + self.assertEqual(ret, 0, ("User %s failed to create files", self.user)) + g.log.info("IO as %s is successful", self.user) + + def test_rebalance_preserve_user_permissions(self): + self._testcase() + + def test_rebalance_preserve_user_permissions_multi_expands(self): + self._testcase(2) diff --git a/tests/functional/dht/test_rebalance_remove_brick_with_quota.py b/tests/functional/dht/test_rebalance_remove_brick_with_quota.py new file mode 100644 index 000000000..b9da72b47 --- /dev/null +++ b/tests/functional/dht/test_rebalance_remove_brick_with_quota.py @@ -0,0 +1,160 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Removing brick from volume after enabling quota. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, + shrink_volume) +from glustolibs.gluster.quota_ops import ( + quota_enable, + quota_set_hard_timeout, + quota_set_soft_timeout, + quota_limit_usage) +from glustolibs.gluster.quota_libs import quota_validate +from glustolibs.io.utils import wait_for_io_to_complete +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['distributed'], ['glusterfs']]) +class TestRemoveBrickWithQuota(GlusterBaseClass): + """ Remove Brick With Quota Enabled""" + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + + # Setup and Mount the volume + g.log.info("Starting to Setup volume and mount it.") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup Volume and mount it") + + # Upload IO script for running IO on mounts + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.mounts[0].client_system, + self.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to client") + + def test_brick_removal_with_quota(self): + """ + Test Brick removal with quota in place + 1. Create Volume of type distribute + 2. Set Quota limit on the directory + 3. Do some IO to reach the Hard limit + 4. After IO ends, remove bricks + 5. Quota validation should succeed. + """ + # Enable Quota + ret, _, _ = quota_enable(self.mnode, self.volname) + self.assertEqual( + ret, 0, ("Failed to enable quota on the volume 5s", self.volname)) + g.log.info("Successfully enabled quota on volume %s", self.volname) + + # Set the Quota timeouts to 0 for strict accounting + ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0) + self.assertEqual( + ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname)) + ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0) + self.assertEqual( + ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname)) + g.log.info( + "Quota soft and hard timeout has been set to 0 for %s", + self.volname) + + # Set the quota limit of 100 MB on root dir of the volume + ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "100MB") + self.assertEqual(ret, 0, "Failed to set Quota for dir root") + g.log.info("Successfully set quota limit for dir root") + + # Do some IO until hard limit is reached. + cmd = ( + "/usr/bin/env python %s create_files " + "-f 100 --fixed-file-size 1M --base-file-name file %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async( + self.mounts[0].client_system, cmd, user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete and validate IO + self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]), + "IO failed on some of the clients") + g.log.info("IO completed on the clients") + + # Validate quota + ret = quota_validate(self.mnode, self.volname, + path='/', hard_limit=104857600, + sl_exceeded=True, hl_exceeded=True) + self.assertTrue(ret, "Quota validate Failed for '/'") + g.log.info("Quota Validated for path '/'") + + # Log Volume info and status before shrinking volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Shrink the volume. + ret = shrink_volume(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to shrink volume on " + "volume %s", self.volname)) + g.log.info("Shrinking volume is successful on " + "volume %s", self.volname) + + # Log volume info and status after shrinking volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Perform rebalance start operation. + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Rebalance started.") + + # Wait till rebalance ends. + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + # Validate quota + ret = quota_validate(self.mnode, self.volname, + path='/', hard_limit=104857600, + sl_exceeded=True, hl_exceeded=True) + self.assertTrue(ret, "Quota validate Failed for '/'") + g.log.info("Quota Validated for path '/'") + + def tearDown(self): + "tear Down Callback""" + # Unmount volume and do cleanup + g.log.info("Starting to Unmount volume and cleanup") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Faile to Unmount and cleanup volume") + g.log.info("Successful in Unmount and cleanup of volumes") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rebalance_rename.py b/tests/functional/dht/test_rebalance_rename.py new file mode 100644 index 000000000..c449d2945 --- /dev/null +++ b/tests/functional/dht/test_rebalance_rename.py @@ -0,0 +1,181 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + +Description: + Renaming of directories and files while rebalance is running +""" + +from unittest import skip +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.rebalance_ops import (get_rebalance_status, + rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import ( + expand_volume, + log_volume_info_and_status) +from glustolibs.io.utils import ( + collect_mounts_arequal, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['arbiter', 'distributed-arbiter', 'dispersed', 'replicated', + 'distributed-dispersed', 'distributed-replicated', 'distributed'], + ['glusterfs']]) +class TestRenameDuringRebalance(GlusterBaseClass): + """Renaming Files during rebalance""" + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup Volume and mount it") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup Volume and Mount it") + + # Upload io script for running IO on mounts + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.mounts[0].client_system, + self.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") + + @skip('Skipping due to Bug 1755834') + def test_rename_file_rebalance(self): + """ + Test file renames during rebalance + - Create a volume + - Create directories or files + - Calculate the checksum using arequal + - Add brick and start rebalance + - While rebalance is running, rename files or directories. + - After rebalancing calculate checksum. + """ + # Taking the instance of mount point. + mount_point = self.mounts[0].mountpoint + + # Creating main directory. + ret = mkdir(self.mounts[0].client_system, + "{}/main".format(mount_point)) + self.assertTrue(ret, "mkdir of dir main failed") + + # Creating Files. + self.all_mounts_procs = [] + command = ("/usr/bin/env python {} create_files" + " {}/main/ -f 4000" + " --fixed-file-size 1k".format(self.script_upload_path, + mount_point)) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + g.log.info("IO on %s:%s is started successfully", + self.mounts[0].client_system, mount_point) + + # Wait for IO completion. + self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]), + "IO failed on some of the clients") + g.log.info("IO completed on the clients") + + # Getting the arequal checksum. + arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts) + + # Log Volume Info and Status before expanding the volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Expanding volume by adding bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Log Volume Info and Status after expanding the volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the volume %s", + self.volname) + + # Check that rebalance status is "in progress" + rebalance_status = get_rebalance_status(self.mnode, self.volname) + ret = rebalance_status['aggregate']['statusStr'] + self.assertEqual(ret, "in progress", ("Rebalance is not in " + "'in progress' state, either " + "rebalance is in completed state" + " or failed to get rebalance " + " status")) + g.log.info("Rebalance is in 'in progress' state") + + # Renaming the files during rebalance. + self.all_mounts_procs = [] + command = ("/usr/bin/env python {} mv" + " {}/main/ --postfix re ".format( + self.script_upload_path, + mount_point)) + proc = g.run_async(self.mounts[0].client_system, command, + user=self.mounts[0].user) + g.log.info("IO on %s:%s is started successfully", + self.mounts[0].client_system, mount_point) + self.all_mounts_procs.append(proc) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalace is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + # Wait for IO completion. + self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]), + "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Getting arequal checksum after rebalance + arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts) + + # Comparing arequals checksum before and after rebalance. + self.assertEqual(arequal_checksum_before_rebalance, + arequal_checksum_after_rebalance, + "arequal checksum is NOT MATCHING") + g.log.info("arequal checksum is SAME") + + def tearDown(self): + """tear Down Callback""" + # Unmount Volume and Cleanup volume. + g.log.info("Starting to Umount Volume and Cleanup Volume") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rebalance_start_when_glusterd_down.py b/tests/functional/dht/test_rebalance_start_when_glusterd_down.py index 885deb2b0..bca96d2cd 100644 --- a/tests/functional/dht/test_rebalance_start_when_glusterd_down.py +++ b/tests/functional/dht/test_rebalance_start_when_glusterd_down.py @@ -193,14 +193,12 @@ class RebalanceValidation(GlusterBaseClass): "Failed") g.log.info("All peers are in connected state") - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass tearDown - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rebalance_stop_mid_migration.py b/tests/functional/dht/test_rebalance_stop_mid_migration.py new file mode 100644 index 000000000..07f78431b --- /dev/null +++ b/tests/functional/dht/test_rebalance_stop_mid_migration.py @@ -0,0 +1,167 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Description: + This test case creates a large file at mount point, + adds extra brick and initiates rebalance. While + migration is in progress, it stops rebalance process + and checks if it has stopped. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster import constants as k +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.gluster.volume_libs import form_bricks_list_to_add_brick +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_pathinfo +from glustolibs.gluster.rebalance_ops import (rebalance_start, rebalance_stop) +from glustolibs.gluster.dht_test_utils import validate_files_in_dir +from glustolibs.gluster.exceptions import ExecutionError + + +@runs_on([['replicated', 'distributed', 'distributed-replicated', + 'arbiter', 'distributed-arbiter', 'disperse', + 'distributed-dispersed'], + ['glusterfs']]) +class TestDhtClass(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to create volume") + g.log.info("Successful in cleaning up Volume %s", self.volname) + + def tearDown(self): + + # Cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Successful in cleaning up Volume %s", self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_rebalance_stop_with_large_file(self): + """ + Testcase Steps: + 1. Create and start a volume. + 2. Mount volume on client and create a large file. + 3. Add bricks to the volume and check layout + 4. Rename the file such that it hashs to different + subvol. + 5. Start rebalance on volume. + 6. Stop rebalance on volume. + """ + # Create file BIG1. + command = ("dd if=/dev/urandom of={}/BIG1 bs=1024K count=10000" + .format(self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, command) + self.assertEqual(ret, 0, "Unable to create file I/O failed") + g.log.info('Successfully created file BIG1.') + + # Checking if file created on correct subvol or not. + ret = validate_files_in_dir( + self.mounts[0].client_system, + self.mounts[0].mountpoint, + file_type=k.FILETYPE_FILES, + test_type=k.TEST_FILE_EXISTS_ON_HASHED_BRICKS) + self.assertTrue(ret, "Files not created on correct subvol.") + g.log.info("File BIG1 is on correct subvol according to " + "the hash value") + + # Adding brick to volume + add_brick_list = form_bricks_list_to_add_brick(self.mnode, + self.volname, + self.servers, + self.all_servers_info) + ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list) + self.assertEqual(ret, 0, "Unable to add bricks to volume") + g.log.info("Successfully added bricks to volume.") + + # Check if brick is added successfully or not. + current_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(current_bricks, "Unable to get " + "current active bricks of volume") + g.log.info("Successfully got active bricks of volume.") + for brick in add_brick_list: + self.assertIn(brick, current_bricks, + ("Brick %s is not added to volume" % brick)) + + # Create directory testdir. + ret = mkdir(self.mounts[0].client_system, + self.mounts[0].mountpoint + '/testdir') + self.assertTrue(ret, "Failed to create testdir directory") + g.log.info("Successfuly created testdir directory.") + + # Layout should be set on the new brick and should be + # continous and complete + ret = validate_files_in_dir(self.mounts[0].client_system, + self.mounts[0].mountpoint + '/testdir', + test_type=k.TEST_LAYOUT_IS_COMPLETE) + self.assertTrue(ret, "Layout not set for the new subvol") + g.log.info("New subvol has been added successfully") + + # Rename file so that it gets hashed to different subvol + file_index = 0 + path_info_dict = get_pathinfo(self.mounts[0].client_system, + self.mounts[0].mountpoint + '/BIG1') + initial_brick_set = path_info_dict['brickdir_paths'] + + while True: + # Calculate old_filename and new_filename and rename. + file_index += 1 + old_filename = "{}/BIG{}".format(self.mounts[0].mountpoint, + file_index) + new_filename = "{}/BIG{}".format(self.mounts[0].mountpoint, + (file_index+1)) + ret, _, _ = g.run(self.mounts[0].client_system, + "mv {} {}".format(old_filename, new_filename)) + self.assertEqual(ret, 0, "Rename not successful") + + # Checking if it was moved to new subvol or not. + path_info_dict = get_pathinfo(self.mounts[0].client_system, + self.mounts[0].mountpoint + + '/BIG%d' % (file_index+1)) + if path_info_dict['brickdir_paths'] != initial_brick_set: + break + g.log.info("file renamed successfully") + + # Start rebalance on volume + ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=False) + self.assertEqual(ret, 0, "Rebalance did not start") + g.log.info("Rebalance started successfully on volume %s", self.volname) + + # Stop rebelance on volume + ret, _, _ = rebalance_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Rebalance stop command did not execute.") + g.log.info("Rebalance stopped successfully on volume %s", + self.volname) + + # Get rebalance status in xml + command = ("gluster volume rebalance {} status --xml" + .format(self.volname)) + ret, _, _ = g.run(self.mnode, command) + self.assertEqual(ret, 1, + "Unexpected: Rebalance still running " + "even after stop.") + g.log.info("Rebalance is not running after stop.") diff --git a/tests/functional/dht/test_rebalance_two_volumes.py b/tests/functional/dht/test_rebalance_two_volumes.py new file mode 100644 index 000000000..c96f75586 --- /dev/null +++ b/tests/functional/dht/test_rebalance_two_volumes.py @@ -0,0 +1,163 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal +from glustolibs.gluster.mount_ops import mount_volume +from glustolibs.gluster.volume_ops import (volume_create, volume_start, + volume_stop, volume_delete) +from glustolibs.gluster.lib_utils import form_bricks_list + + +@runs_on([['distributed', 'distributed-replicated'], ['glusterfs']]) +class TestRebalanceTwoVolumes(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup and mount volume") + + self.first_client = self.mounts[0].client_system + + self.second_vol_name = "second_volume" + self.second_mountpoint = "/mnt/{}".format(self.second_vol_name) + self.is_second_volume_created = False + + def tearDown(self): + + # Unmount and clean volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + if self.is_second_volume_created: + # Stop the 2nd volume + ret, _, _ = volume_stop(self.mnode, self.second_vol_name) + self.assertEqual(ret, 0, ("volume stop failed for %s" + % self.second_vol_name)) + g.log.info("Volume %s stopped", self.second_vol_name) + + # Delete the 2nd volume + ret = volume_delete(self.mnode, self.second_vol_name) + self.assertTrue(ret, ("Failed to cleanup the Volume " + "%s", self.second_vol_name)) + g.log.info("Volume deleted successfully : %s", + self.second_vol_name) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_two_volumes(self): + """ + Test case: + 1. Create a volume, start it and mount it + 2. Create a 2nd volume, start it and mount it + 3. Create files on mount points + 4. Collect arequal checksum on mount point pre-rebalance + 5. Expand the volumes + 6. Start rebalance simultaneously on the 2 volumes + 7. Wait for rebalance to complete + 8. Collect arequal checksum on mount point post-rebalance + and compare with value from step 4 + """ + + # Get brick list + bricks_list = form_bricks_list(self.mnode, self.volname, 3, + self.servers, self.all_servers_info) + self.assertIsNotNone(bricks_list, "Bricks list is None") + + # Create 2nd volume + ret, _, _ = volume_create(self.mnode, self.second_vol_name, + bricks_list) + self.assertEqual(ret, 0, ("Failed to create volume %s") % ( + self.second_vol_name)) + g.log.info("Volume %s created successfully", self.second_vol_name) + + # Start 2nd volume + ret, _, _ = volume_start(self.mnode, self.second_vol_name) + self.assertEqual(ret, 0, ("Failed to start volume %s") % ( + self.second_vol_name)) + g.log.info("Started volume %s", self.second_vol_name) + + self.is_second_volume_created = True + + # Mount 2nd volume + for mount_obj in self.mounts: + ret, _, _ = mount_volume(self.second_vol_name, + mtype=self.mount_type, + mpoint=self.second_mountpoint, + mserver=self.mnode, + mclient=mount_obj.client_system) + self.assertEqual(ret, 0, ("Failed to mount volume %s") % ( + self.second_vol_name)) + g.log.info("Volume mounted successfully : %s", + self.second_vol_name) + + # Start I/O from mount point for volume 1 and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.volname) + + # Start I/O from mount point for volume 2 and wait for it to complete + cmd = ("cd %s; for i in {1..1000} ; do " + "dd if=/dev/urandom of=file$i bs=10M count=1; done" + % self.second_mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "IO failed on volume %s" + % self.second_vol_name) + + # Collect arequal checksum before rebalance + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Add bricks to volumes + for volume in (self.volname, self.second_vol_name): + ret = expand_volume(self.mnode, volume, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % volume) + + # Trigger rebalance + for volume in (self.volname, self.second_vol_name): + ret, _, _ = rebalance_start(self.mnode, volume, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the" + " volume %s" % volume) + + # Wait for rebalance to complete + for volume in (self.volname, self.second_vol_name): + ret = wait_for_rebalance_to_complete(self.mnode, volume, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume" + " %s" % volume) + g.log.info("Rebalance successfully completed") + + # Collect arequal checksum after rebalance + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + + # Check for data loss by comparing arequal before and after rebalance + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") diff --git a/tests/functional/dht/test_rebalance_with_acl_set_to_files.py b/tests/functional/dht/test_rebalance_with_acl_set_to_files.py new file mode 100644 index 000000000..d290ae56a --- /dev/null +++ b/tests/functional/dht/test_rebalance_with_acl_set_to_files.py @@ -0,0 +1,129 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterfile import set_acl, get_acl +from glustolibs.gluster.lib_utils import add_user, del_user +from glustolibs.gluster.mount_ops import mount_volume +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'distributed-arbiter', 'distributed', + 'replicated', 'arbiter', 'distributed-dispersed', + 'dispersed'], ['glusterfs']]) +class TestRebalanceWithAclSetToFiles(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume(): + raise ExecutionError("Failed to Setup volume") + + self.first_client = self.mounts[0].client_system + self.mount_point = self.mounts[0].mountpoint + + # Mount volume with -o acl option + ret, _, _ = mount_volume(self.volname, self.mount_type, + self.mount_point, self.mnode, + self.first_client, options='acl') + if ret: + raise ExecutionError("Failed to mount volume") + + # Create a non-root user + if not add_user(self.first_client, 'joker'): + raise ExecutionError("Failed to create user joker") + + def tearDown(self): + + # Remove non-root user created for test + if not del_user(self.first_client, 'joker'): + raise ExecutionError("Failed to remove user joker") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_acl_set_to_files(self): + """Check acl values set to files""" + for number in range(1, 11): + ret = get_acl(self.first_client, self.mount_point, + 'file{}'.format(str(number))) + self.assertIn('user:joker:rwx', ret['rules'], + "Rule not present in getfacl output") + + def test_add_brick_rebalance_with_acl_set_to_files(self): + """ + Test case: + 1. Create a volume, start it and mount it to a client. + 2. Create 10 files on the mount point and set acls on the files. + 3. Check the acl value and collect arequal-checksum. + 4. Add bricks to the volume and start rebalance. + 5. Check the value of acl(it should be same as step 3), + collect and compare arequal-checksum with the one collected + in step 3 + """ + # Create 10 files on the mount point. + cmd = ("cd {}; for i in `seq 1 10`;do touch file$i;done" + .format(self.mount_point)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files on mount point") + + for number in range(1, 11): + ret = set_acl(self.first_client, 'u:joker:rwx', '{}/file{}' + .format(self.mount_point, str(number))) + self.assertTrue(ret, "Failed to set acl on files") + + # Collect arequal on mount point and check acl value + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + self._check_acl_set_to_files() + g.log.info("Files created and acl set to files properly") + + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1200) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + # Check acl value if it's same as before rebalance + self._check_acl_set_to_files() + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum and acl value are SAME") diff --git a/tests/functional/dht/test_rebalance_with_brick_down.py b/tests/functional/dht/test_rebalance_with_brick_down.py new file mode 100644 index 000000000..fd0a0ffe2 --- /dev/null +++ b/tests/functional/dht/test_rebalance_with_brick_down.py @@ -0,0 +1,171 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Rebalance with one brick down in replica +""" + +from random import choice + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import ( + expand_volume, + log_volume_info_and_status, + volume_start) +from glustolibs.gluster.brick_libs import ( + get_all_bricks, + bring_bricks_offline) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.io.utils import ( + wait_for_io_to_complete, + collect_mounts_arequal) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['distributed-arbiter', 'distributed-replicated', + 'distributed-dispersed'], ['glusterfs']]) +class TestRebalanceWithBrickDown(GlusterBaseClass): + """ Rebalance with brick down in replica""" + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + + # Setup and mount the volume + g.log.info("Starting to setup and mount the volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup Volume and Mount it") + + # Upload IO script for running IO on mounts + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.mounts[0].client_system, + self.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") + + def test_rebalance_with_brick_down(self): + """ + Rebalance with brick down in replica + - Create a Replica volume. + - Bring down one of the brick down in the replica pair + - Do some IO and create files on the mount point + - Add a pair of bricks to the volume + - Initiate rebalance + - Bring back the brick which was down. + - After self heal happens, all the files should be present. + """ + # Log the volume info and status before brick is down. + log_volume_info_and_status(self.mnode, self.volname) + + # Bring one fo the bricks offline + brick_list = get_all_bricks(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, choice(brick_list)) + + # Log the volume info and status after brick is down. + log_volume_info_and_status(self.mnode, self.volname) + + # Create files at mountpoint. + cmd = ( + "/usr/bin/env python %s create_files " + "-f 2000 --fixed-file-size 1k --base-file-name file %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async( + self.mounts[0].client_system, cmd, user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete. + self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]), + "IO failed on some of the clients") + g.log.info("IO completed on the clients") + + # Compute the arequal checksum before bringing all bricks online + arequal_before_all_bricks_online = collect_mounts_arequal(self.mounts) + + # Log the volume info and status before expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Expand the volume. + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Log the voluem info after expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Start Rebalance. + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the volume %s", + self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + # Log the voluem info and status before bringing all bricks online + log_volume_info_and_status(self.mnode, self.volname) + + # Bring all bricks online. + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Not able to start volume with force option") + g.log.info("Volume start with force option successful.") + + # Log the volume info and status after bringing all beicks online + log_volume_info_and_status(self.mnode, self.volname) + + # Monitor heal completion. + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "heal has not yet completed") + g.log.info("Self heal completed") + + # Compute the arequal checksum after all bricks online. + arequal_after_all_bricks_online = collect_mounts_arequal(self.mounts) + + # Comparing arequal checksum before and after the operations. + self.assertEqual(arequal_before_all_bricks_online, + arequal_after_all_bricks_online, + "arequal checksum is NOT MATCHING") + g.log.info("arequal checksum is SAME") + + def tearDown(self): + """tear Down callback""" + # Unmount Volume and cleanup. + g.log.info("Starting to Unmount Volume and Cleanup") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Filed to Unmount Volume and " + "Cleanup Volume") + g.log.info("Successful in Unmount Volume and cleanup.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rebalance_with_hidden_files.py b/tests/functional/dht/test_rebalance_with_hidden_files.py index b0cb9b340..40fe7b976 100644 --- a/tests/functional/dht/test_rebalance_with_hidden_files.py +++ b/tests/functional/dht/test_rebalance_with_hidden_files.py @@ -91,16 +91,19 @@ class RebalanceValidation(GlusterBaseClass): # Start IO on mounts g.log.info("Starting IO on all mounts...") self.all_mounts_procs = [] + counter = 1 for mount_obj in self.mounts: g.log.info("Starting IO on %s:%s", mount_obj.client_system, mount_obj.mountpoint) cmd = ("/usr/bin/env python %s create_files " - "--base-file-name . -f 99 %s" % ( + "--base-file-name .file%d -f 99 %s" % ( self.script_upload_path, + counter, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) self.all_mounts_procs.append(proc) + counter += 100 # validate IO self.assertTrue( @@ -173,7 +176,8 @@ class RebalanceValidation(GlusterBaseClass): # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", diff --git a/tests/functional/dht/test_rebalance_with_quota.py b/tests/functional/dht/test_rebalance_with_quota.py new file mode 100644 index 000000000..5abb2ca1a --- /dev/null +++ b/tests/functional/dht/test_rebalance_with_quota.py @@ -0,0 +1,188 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Rebalance with quota on mountpoint. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.rebalance_ops import (rebalance_start, + get_rebalance_status, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, + expand_volume) +from glustolibs.gluster.quota_ops import ( + quota_enable, + quota_set_hard_timeout, + quota_set_soft_timeout, + quota_limit_usage) +from glustolibs.gluster.quota_libs import quota_validate +from glustolibs.io.utils import ( + wait_for_io_to_complete, + collect_mounts_arequal) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['distributed', 'distributed-replicated', 'distributed-dispersed'], + ['glusterfs']]) +class TestRebalanceWithQuotOnRoot(GlusterBaseClass): + """ Rebalance with quota enabled on mountpoint """ + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + + # Setup and Mount the volume + g.log.info("Starting to Setup volume and mount it.") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup Volume and mount it") + + # Upload IO script for running IO on mounts + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.mounts[0].client_system, + self.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to client") + + def test_rebalance_with_quota_enabled(self): + """ + Test rebalance with quota enabled on root. + 1. Create Volume of type distribute + 2. Set Quota limit on the root directory + 3. Do some IO to reach the Hard limit + 4. After IO ends, compute arequal checksum + 5. Add bricks to the volume. + 6. Start rebalance + 7. After rebalance is completed, check arequal checksum + """ + # Enable Quota + ret, _, _ = quota_enable(self.mnode, self.volname) + self.assertEqual( + ret, 0, ("Failed to enable quota on the volume %s", self.volname)) + g.log.info("Successfully enabled quota on volume %s", self.volname) + + # Set the Quota timeouts to 0 for strict accounting + ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0) + self.assertEqual( + ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname)) + ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0) + self.assertEqual( + ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname)) + g.log.info( + "Quota soft and hard timeout has been set to 0 for %s", + self.volname) + + # Set the quota limit of 1 GB on root dir of the volume + ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "1GB") + self.assertEqual(ret, 0, "Failed to set Quota for dir root") + g.log.info("Successfully set quota limit for dir root") + + # Do some IO until hard limit is reached. + cmd = ( + "/usr/bin/env python %s create_files " + "-f 1024 --fixed-file-size 1M --base-file-name file %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async( + self.mounts[0].client_system, cmd, user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete and validate IO + self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]), + "IO failed on some of the clients") + g.log.info("IO completed on the clients") + + # Validate quota + ret = quota_validate(self.mnode, self.volname, + path='/', hard_limit=1073741824, + sl_exceeded=True, hl_exceeded=True) + self.assertTrue(ret, "Quota validate Failed for '/'") + g.log.info("Quota Validated for path '/'") + + # Compute arequal checksum. + arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts) + + # Log Volume info and status before expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Expand the volume. + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Log volume info and status after expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Perform rebalance start operation. + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Rebalance started.") + + # Check rebalance is in progress + rebalance_status = get_rebalance_status(self.mnode, self.volname) + ret = rebalance_status['aggregate']['statusStr'] + self.assertEqual(ret, "in progress", ("Rebalance is not in " + "'in progress' state, either " + "rebalance is in completed state" + " or failed to get rebalance " + "status")) + g.log.info("Rebalance is 'in progress' state") + + # Wait till rebalance ends. + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + # Validate quota + ret = quota_validate(self.mnode, self.volname, + path='/', hard_limit=1073741824, + sl_exceeded=True, hl_exceeded=True) + self.assertTrue(ret, "Quota validate Failed for '/'") + g.log.info("Quota Validated for path '/'") + + # Compute arequal checksum. + arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts) + + # Comparing arequals checksum before and after rebalance. + self.assertEqual(arequal_checksum_before_rebalance, + arequal_checksum_after_rebalance, + "arequal checksum is NOT MATCHING") + g.log.info("arequal checksum is SAME") + + def tearDown(self): + """tear Down Callback""" + # Unmount volume and do cleanup + g.log.info("Starting to Unmount volume and cleanup") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Faile to Unmount and cleanup volume") + g.log.info("Successful in Unmount and cleanup of volumes") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rebalance_with_quota_on_subdirectory.py b/tests/functional/dht/test_rebalance_with_quota_on_subdirectory.py new file mode 100644 index 000000000..bddb9aeb6 --- /dev/null +++ b/tests/functional/dht/test_rebalance_with_quota_on_subdirectory.py @@ -0,0 +1,195 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Rebalance with quota on subdirectory. +""" + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.rebalance_ops import (rebalance_start, + get_rebalance_status, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, + expand_volume) +from glustolibs.gluster.quota_ops import ( + quota_enable, + quota_set_hard_timeout, + quota_set_soft_timeout, + quota_limit_usage) +from glustolibs.gluster.quota_libs import quota_validate +from glustolibs.io.utils import ( + wait_for_io_to_complete, + collect_mounts_arequal) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['distributed', 'distributed-replicated', 'distributed-dispersed'], + ['glusterfs']]) +class TestRebalanceWithQuotaOnSubDir(GlusterBaseClass): + """ Rebalance with quota enabled on subdirectory """ + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + + # Setup and Mount the volume + g.log.info("Starting to Setup volume and mount it.") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup Volume and mount it") + + # Upload IO script for running IO on mounts + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.mounts[0].client_system, + self.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to client") + + def test_rebalance_with_quota_enabled_on_subdirectory(self): + """ + Test rebalance with quota enabled on subdirectory. + 1. Create Volume of type distribute + 2. Set Quota limit on subdirectory + 3. Do some IO to reach the Hard limit + 4. After IO ends, compute arequal checksum + 5. Add bricks to the volume. + 6. Start rebalance + 7. After rebalance is completed, check arequal checksum + """ + # Creating main directory. + ret = mkdir(self.mounts[0].client_system, + "{}/main".format(self.mounts[0].mountpoint)) + self.assertTrue(ret, "mkdir of dir main failed") + + # Enable Quota + ret, _, _ = quota_enable(self.mnode, self.volname) + self.assertEqual( + ret, 0, ("Failed to enable quota on the volume %s", self.volname)) + g.log.info("Successfully enabled quota on volume %s", self.volname) + + # Set the Quota timeouts to 0 for strict accounting + ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0) + self.assertEqual( + ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname)) + ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0) + self.assertEqual( + ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname)) + g.log.info( + "Quota soft and hard timeout has been set to 0 for %s", + self.volname) + + # Set the quota limit of 1 GB on /main dir of the volume + ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/main", + "1GB") + self.assertEqual(ret, 0, "Failed to set Quota for dir /main") + g.log.info("Successfully set quota limit for dir /main") + + # Do some IO until hard limit is reached. + cmd = ( + "/usr/bin/env python %s create_files " + "-f 1024 --fixed-file-size 1M --base-file-name file %s/main/" + % (self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async( + self.mounts[0].client_system, cmd, user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete and validate IO + self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]), + "IO failed on some of the clients") + g.log.info("IO completed on the clients") + + # Validate quota + ret = quota_validate(self.mnode, self.volname, + path='/main', hard_limit=1073741824, + sl_exceeded=True, hl_exceeded=True) + self.assertTrue(ret, "Quota validate Failed for '/main'") + g.log.info("Quota Validated for path '/main'") + + # Compute arequal checksum. + arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts) + + # Log Volume info and status before expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Expand the volume. + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Log volume info and status after expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Perform rebalance start operation. + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Rebalance started.") + + # Check rebalance is in progress + rebalance_status = get_rebalance_status(self.mnode, self.volname) + ret = rebalance_status['aggregate']['statusStr'] + self.assertEqual(ret, "in progress", ("Rebalance is not in " + "'in progress' state, either " + "rebalance is in completed state" + " or failed to get rebalance " + "status")) + + # Wait till rebalance ends. + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + # Validate quota + ret = quota_validate(self.mnode, self.volname, + path='/main', hard_limit=1073741824, + sl_exceeded=True, hl_exceeded=True) + self.assertTrue(ret, "Quota validate Failed for '/main'") + g.log.info("Quota Validated for path '/main'") + + # Compute arequal checksum. + arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts) + + # Comparing arequals checksum before and after rebalance. + self.assertEqual(arequal_checksum_before_rebalance, + arequal_checksum_after_rebalance, + "arequal checksum is NOT MATCHING") + g.log.info("arequal checksum is SAME") + + def tearDown(self): + """tear Down Callback""" + # Unmount volume and do cleanup + g.log.info("Starting to Unmount volume and cleanup") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Faile to Unmount and cleanup volume") + g.log.info("Successful in Unmount and cleanup of volumes") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rebalance_with_special_files.py b/tests/functional/dht/test_rebalance_with_special_files.py new file mode 100644 index 000000000..c3cb33cca --- /dev/null +++ b/tests/functional/dht/test_rebalance_with_special_files.py @@ -0,0 +1,158 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +""" +Description: + Rebalance with special files +""" + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, + get_rebalance_status, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_libs import ( + expand_volume, + log_volume_info_and_status) +from glustolibs.io.utils import wait_for_io_to_complete +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['distributed', 'distributed-arbiter', 'distributed-replicated', + 'distributed-dispersed'], ['glusterfs']]) +class TestRebalanceWithSpecialFiles(GlusterBaseClass): + """ Rebalance with special files""" + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + self.all_mounts_procs = [] + + # Setup and mount the volume + g.log.info("Starting to setup and mount the volume") + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup Volume and Mount it") + + # Upload IO script for running IO on mounts + self.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(self.mounts[0].client_system, + self.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients") + + def test_rebalance_with_special_files(self): + """ + Rebalance with special files + - Create Volume and start it. + - Create some special files on mount point. + - Once it is complete, start some IO. + - Add brick into the volume and start rebalance + - All IO should be successful. + """ + # Create pipe files at mountpoint. + cmd = ( + "for i in {1..500};do mkfifo %s/fifo${i}; done" + % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to create pipe files") + g.log.info("Pipe files created successfully") + + # Create block device files at mountpoint. + cmd = ( + "for i in {1..500};do mknod %s/blk${i} blockfile 1 5;done" + % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to create block files") + g.log.info("Block files created successfully") + + # Create character device files at mountpoint. + cmd = ( + "for i in {1..500};do mknod %s/charc${i} characterfile 1 5;done" + % (self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to create character files") + g.log.info("Character files created successfully") + + # Create files at mountpoint. + cmd = ( + "/usr/bin/env python %s create_files " + "-f 1000 --fixed-file-size 1M --base-file-name file %s" + % (self.script_upload_path, self.mounts[0].mountpoint)) + proc = g.run_async( + self.mounts[0].client_system, cmd, user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Log the volume info and status before expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Expand the volume. + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Log the volume info after expanding volume. + log_volume_info_and_status(self.mnode, self.volname) + + # Start Rebalance. + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + g.log.info("Successfully started rebalance on the volume %s", + self.volname) + + # Check rebalance is in progress + rebalance_status = get_rebalance_status(self.mnode, self.volname) + ret = rebalance_status['aggregate']['statusStr'] + self.assertEqual(ret, "in progress", ("Rebalance is not in " + "'in progress' state, either " + "rebalance is in completed state" + " or failed to get rebalance " + "status")) + g.log.info("Rebalance is in 'in progress' state") + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, ("Rebalance is not yet complete on the volume " + "%s", self.volname)) + g.log.info("Rebalance is successfully complete on the volume %s", + self.volname) + + # Wait for IO to complete. + self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs, + self.mounts[0]), + "IO failed on some of the clients") + g.log.info("IO completed on the clients") + + def tearDown(self): + """tear Down callback""" + # Unmount Volume and cleanup. + g.log.info("Starting to Unmount Volume and Cleanup") + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and " + "Cleanup Volume") + g.log.info("Successful in Unmount Volume and cleanup.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_remove_brick_command_opitons.py b/tests/functional/dht/test_remove_brick_command_opitons.py new file mode 100644 index 000000000..2e5b0c81a --- /dev/null +++ b/tests/functional/dht/test_remove_brick_command_opitons.py @@ -0,0 +1,113 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_ops import remove_brick +from glustolibs.gluster.volume_libs import shrink_volume +from glustolibs.gluster.volume_libs import form_bricks_list_to_remove_brick +from glustolibs.io.utils import collect_mounts_arequal + + +@runs_on([['distributed-replicated', 'distributed-dispersed', + 'distributed-arbiter', 'distributed'], ['glusterfs']]) +class TestRemoveBrickCommandOptions(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def _run_io_on_mount_point(self, fname="file"): + """Create a few files on mount point""" + cmd = ("cd {};for i in `seq 1 5`; do mkdir dir$i;" + "for j in `seq 1 10`;do touch {}$j;done;done" + .format(self.mounts[0].mountpoint, fname)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertFalse(ret, "Failed to do I/O on mount point") + + def test_remove_brick_command_basic(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create some data on the volume. + 3. Run remove-brick start, status and finally commit. + 4. Check if there is any data loss or not. + """ + # Create some data on the volume + self._run_io_on_mount_point() + + # Collect arequal checksum before ops + arequal_checksum_before = collect_mounts_arequal(self.mounts[0]) + + # Run remove-brick start, status and finally commit + ret = shrink_volume(self.mnode, self.volname) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + # Check for data loss by comparing arequal before and after ops + arequal_checksum_after = collect_mounts_arequal(self.mounts[0]) + self.assertEqual(arequal_checksum_before, arequal_checksum_after, + "arequal checksum is NOT MATCHNG") + g.log.info("arequal checksum is SAME") + + def test_remove_brick_command_force(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create some data on the volume. + 3. Run remove-brick with force. + 4. Check if bricks are still seen on volume or not + """ + # Create some data on the volume + self._run_io_on_mount_point() + + # Remove-brick on the volume with force option + brick_list_to_remove = form_bricks_list_to_remove_brick(self.mnode, + self.volname) + self.assertIsNotNone(brick_list_to_remove, "Brick list is empty") + + ret, _, _ = remove_brick(self.mnode, self.volname, + brick_list_to_remove, option="force") + self.assertFalse(ret, "Failed to run remove-brick with force") + g.log.info("Successfully run remove-brick with force") + + # Get a list of all bricks + brick_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(brick_list, "Brick list is empty") + + # Check if bricks removed brick are present or not in brick list + for brick in brick_list_to_remove: + self.assertNotIn(brick, brick_list, + "Brick still present in brick list even " + "after removing") diff --git a/tests/functional/dht/test_remove_brick_no_commit_followed_by_rebalance.py b/tests/functional/dht/test_remove_brick_no_commit_followed_by_rebalance.py new file mode 100644 index 000000000..dc80a3544 --- /dev/null +++ b/tests/functional/dht/test_remove_brick_no_commit_followed_by_rebalance.py @@ -0,0 +1,169 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.io.utils import collect_mounts_arequal, validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_libs import (form_bricks_list_to_remove_brick, + expand_volume) +from glustolibs.gluster.brick_ops import remove_brick +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) + + +@runs_on([['distributed', 'distributed-replicated', + 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class TestRemoveBrickNoCommitFollowedByRebalance(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup and Mount Volume") + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_remove_brick_no_commit_followed_by_rebalance(self): + """ + Description: Tests to check that there is no data loss when + remove-brick operation is stopped and then new bricks + are added to the volume. + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Create files and dirs on the mount-point. + 4) Calculate the arequal-checksum on the mount-point + 5) Start remove-brick operation on the volume. + 6) While migration is in progress, stop the remove-brick + operation. + 7) Add-bricks to the volume and trigger rebalance. + 8) Wait for rebalance to complete. + 9) Calculate the arequal-checksum on the mount-point. + """ + # Start IO on mounts + m_point = self.mounts[0].mountpoint + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 " + "--num-of-files 50 --file-type empty-file %s" % ( + self.script_upload_path, m_point)) + proc = g.run_async(self.mounts[0].client_system, + cmd, user=self.mounts[0].user) + g.log.info("IO on %s:%s is started successfully", + self.mounts[0].client_system, m_point) + + # Validate IO + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + "IO failed on some of the clients" + ) + + # Calculate arequal-checksum before starting remove-brick + ret, arequal_before = collect_mounts_arequal(self.mounts[0]) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Form bricks list for volume shrink + remove_brick_list = form_bricks_list_to_remove_brick( + self.mnode, self.volname, subvol_name=1) + self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to " + "form bricks list for " + "shrink", self.volname)) + g.log.info("Volume %s: Formed bricks list for shrink", self.volname) + + # Shrink volume by removing bricks + ret, _, _ = remove_brick(self.mnode, self.volname, + remove_brick_list, "start") + self.assertEqual(ret, 0, ("Volume %s shrink failed ", + self.volname)) + g.log.info("Volume %s shrink started ", self.volname) + + # Log remove-brick status + ret, out, _ = remove_brick(self.mnode, self.volname, + remove_brick_list, "status") + self.assertEqual(ret, 0, ("Remove-brick status failed on %s ", + self.volname)) + + # Check if migration is in progress + if r'in progress' in out: + # Stop remove-brick process + g.log.info("Stop removing bricks from volume") + ret, out, _ = remove_brick(self.mnode, self.volname, + remove_brick_list, "stop") + self.assertEqual(ret, 0, "Failed to stop remove-brick process") + g.log.info("Stopped remove-brick process successfully") + else: + g.log.error("Migration for remove-brick is complete") + + # Sleep for 30 secs so that any running remove-brick process stops + sleep(30) + + # Add bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname)) + g.log.info("Volume %s: Add-brick successful", self.volname) + + # Tigger rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance", + self.volname)) + g.log.info("Volume %s: Rebalance started ", self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + self.assertTrue(ret, "Rebalance has not completed") + g.log.info("Rebalance has completed successfully") + + # Calculate arequal-checksum on mount-point + ret, arequal_after = collect_mounts_arequal(self.mounts[0]) + self.assertTrue(ret, "Collecting arequal-checksum failed") + + # Check if there is any data loss + self.assertEqual(set(arequal_before), set(arequal_after), + ("There is data loss")) + g.log.info("The checksum before and after rebalance is same." + " There is no data loss.") diff --git a/tests/functional/dht/test_remove_brick_with_open_fd.py b/tests/functional/dht/test_remove_brick_with_open_fd.py new file mode 100644 index 000000000..053114295 --- /dev/null +++ b/tests/functional/dht/test_remove_brick_with_open_fd.py @@ -0,0 +1,107 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterfile import get_md5sum +from glustolibs.gluster.volume_libs import get_subvols, shrink_volume +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.io.utils import validate_io_procs, wait_for_io_to_complete + + +@runs_on([['distributed-replicated', 'distributed-dispersed', + 'distributed-arbiter', 'distributed'], ['glusterfs']]) +class TestRemoveBrickWithOpenFD(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + self.is_copy_running = False + + def tearDown(self): + + # If I/O processes are running wait from them to complete + if self.is_copy_running: + if not wait_for_io_to_complete(self.list_of_io_processes, + self.mounts): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_remove_brick_with_open_fd(self): + """ + Test case: + 1. Create volume, start it and mount it. + 2. Open file datafile on mount point and start copying /etc/passwd + line by line(Make sure that the copy is slow). + 3. Start remove-brick of the subvol to which has datafile is hashed. + 4. Once remove-brick is complete compare the checksum of /etc/passwd + and datafile. + """ + # Open file datafile on mount point and start copying /etc/passwd + # line by line + ret, out, _ = g.run(self.mounts[0].client_system, + "cat /etc/passwd | wc -l") + self.assertFalse(ret, "Failed to get number of lines of /etc/passwd") + cmd = ("cd {}; exec 30<> datafile ;for i in `seq 1 {}`; do " + "head -n $i /etc/passwd | tail -n 1 >> datafile; sleep 10; done" + .format(self.mounts[0].mountpoint, out.strip())) + + self.list_of_io_processes = [ + g.run_async(self.mounts[0].client_system, cmd)] + self.is_copy_running = True + + # Start remove-brick of the subvol to which has datafile is hashed + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + number = find_hashed_subvol(subvols, "/", 'datafile')[1] + + ret = shrink_volume(self.mnode, self.volname, subvol_num=number) + self.assertTrue(ret, "Failed to remove-brick from volume") + g.log.info("Remove-brick rebalance successful") + + # Validate if I/O was successful or not. + ret = validate_io_procs(self.list_of_io_processes, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + self.is_copy_running = False + + # Compare md5checksum of /etc/passwd and datafile + md5_of_orginal_file = get_md5sum(self.mounts[0].client_system, + '/etc/passwd') + self.assertIsNotNone(md5_of_orginal_file, + 'Unable to get md5 checksum of orignial file') + md5_of_copied_file = get_md5sum( + self.mounts[0].client_system, '{}/datafile'.format( + self.mounts[0].mountpoint)) + self.assertIsNotNone(md5_of_copied_file, + 'Unable to get md5 checksum of copied file') + self.assertEqual(md5_of_orginal_file.split(" ")[0], + md5_of_copied_file.split(" ")[0], + "md5 checksum of original and copied file didn't" + " match") + g.log.info("md5 checksum of original and copied files are same") diff --git a/tests/functional/dht/test_rename_directory.py b/tests/functional/dht/test_rename_directory.py index ef2eae258..3486bbf8b 100644 --- a/tests/functional/dht/test_rename_directory.py +++ b/tests/functional/dht/test_rename_directory.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -34,13 +34,14 @@ from glustolibs.gluster.glusterfile import file_exists, move_file @runs_on([['distributed-replicated', 'replicated', 'distributed', - 'dispersed', 'distributed-dispersed'], - ['glusterfs', 'nfs']]) + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], + ['glusterfs']]) class TestDHTRenameDirectory(GlusterBaseClass): """DHT Tests - rename directory Scenarios: - 1 - Rename directory when destination is not presented - 2 - Rename directory when destination is presented + 1 - Rename directory when destination is not present + 2 - Rename directory when destination is present """ def setUp(self): @@ -97,8 +98,7 @@ class TestDHTRenameDirectory(GlusterBaseClass): return True def test_rename_directory_no_destination_folder(self): - """Test rename directory with no destination folder - """ + """Test rename directory with no destination folder""" dirs = { 'initial': '{root}/folder_{client_index}', 'new_folder': '{root}/folder_renamed{client_index}' @@ -107,7 +107,6 @@ class TestDHTRenameDirectory(GlusterBaseClass): for mount_index, mount_obj in enumerate(self.mounts): client_host = mount_obj.client_system mountpoint = mount_obj.mountpoint - initial_folder = dirs['initial'].format( root=mount_obj.mountpoint, client_index=mount_index @@ -125,6 +124,7 @@ class TestDHTRenameDirectory(GlusterBaseClass): self.assertTrue(file_exists(client_host, initial_folder)) g.log.info('Created source directory %s on mount point %s', initial_folder, mountpoint) + # Create files and directories ret = self.create_files(client_host, initial_folder, self.files, content='Textual content') @@ -135,7 +135,7 @@ class TestDHTRenameDirectory(GlusterBaseClass): ret = validate_files_in_dir(client_host, mountpoint, test_type=FILE_ON_HASHED_BRICKS) self.assertTrue(ret, "Expected - Files and dirs are stored " - "on hashed bricks") + "on hashed bricks") g.log.info('Files and dirs are stored on hashed bricks') new_folder_name = dirs['new_folder'].format( @@ -237,8 +237,9 @@ class TestDHTRenameDirectory(GlusterBaseClass): ret = validate_files_in_dir(client_host, mountpoint, test_type=FILE_ON_HASHED_BRICKS) self.assertTrue(ret, "Expected - Files and dirs are stored " - "on hashed bricks") + "on hashed bricks") g.log.info('Files and dirs are stored on hashed bricks') + # Rename source folder to destination ret = move_file(client_host, initial_folder, new_folder_name) diff --git a/tests/functional/dht/test_rename_files_with_brick_down.py b/tests/functional/dht/test_rename_files_with_brick_down.py new file mode 100644 index 000000000..9a13bbe5d --- /dev/null +++ b/tests/functional/dht/test_rename_files_with_brick_down.py @@ -0,0 +1,172 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import get_all_bricks, bring_bricks_offline +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import get_volume_type +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['replicated', 'arbiter', + 'distributed', 'distributed-arbiter', + 'distributed-replicated'], + ['glusterfs']]) +class TestRenameFilesBrickDown(GlusterBaseClass): + + # pylint: disable=too-many-statements + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + + # Setup Volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + g.log.error("Failed to Setup and Mount Volume") + raise ExecutionError("Failed to Setup and Mount Volume") + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rename_files_with_brick_down(self): + """ + Description: Tests to check that there is no data loss when rename is + performed with a brick of volume down. + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Create 1000 files on the mount point. + 4) Create the soft-link for file{1..100} + 5) Create the hard-link for file{101..200} + 6) Check for the file count on the mount point. + 7) Begin renaming the files, in multiple iterations. + 8) Let few iterations of the rename complete successfully. + 9) Then while rename is still in progress, kill a brick part of the + volume. + 10) Let the brick be down for sometime, such that the a couple + of rename iterations are completed. + 11) Bring the brick back online. + 12) Wait for the IO to complete. + 13) Check if there is any data loss. + 14) Check if all the files are renamed properly. + """ + # Creating 1000 files on volume root + m_point = self.mounts[0].mountpoint + command = 'touch ' + m_point + '/file{1..1000}_0' + ret, _, _ = g.run(self.clients[0], command) + self.assertEqual(ret, 0, "File creation failed on %s" + % m_point) + g.log.info("Files successfully created on the mount point") + + # Create soft links for a few files + for i in range(1, 100): + ret = create_link_file(self.clients[0], + '{}/file{}_0'.format(m_point, i), + '{}/soft_link_file{}_0'.format(m_point, i), + soft=True) + self.assertTrue(ret, "Failed to create soft links for files") + g.log.info("Created soft links for files successfully") + + # Create hard links for a few files + for i in range(101, 200): + ret = create_link_file(self.clients[0], + '{}/file{}_0'.format(m_point, i), + '{}/hard_link_file{}_0'.format(m_point, i), + soft=False) + self.assertTrue(ret, "Failed to create hard links for files") + g.log.info("Created hard links for files successfully") + + # Calculate file count for the mount-point + cmd = ("ls -lR %s/ | wc -l" % m_point) + ret, count_before, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to get file count") + g.log.info("File count before rename is:%s", count_before) + + # Start renaming the files in multiple iterations + g.log.info("Starting to rename the files") + all_mounts_procs = [] + cmd = ('for i in `seq 1 1000`; do for j in `seq 0 5`;do mv -f ' + '%s/file$i\\_$j %s/file$i\\_$(expr $j + 1); done; done' + % (m_point, m_point)) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + all_mounts_procs.append(proc) + + # Waiting for some time for a iteration of rename to complete + g.log.info("Waiting for few rename iterations to complete") + sleep(120) + + # Get the information about the bricks part of the volume + brick_list = get_all_bricks(self.mnode, self.volname) + + # Kill a brick part of the volume + ret = bring_bricks_offline(self.volname, choice(brick_list)) + self.assertTrue(ret, "Failed to bring brick offline") + g.log.info("Successfully brought brick offline") + + # Let the brick be down for some time + g.log.info("Keeping brick down for few minutes") + sleep(60) + + # Bring the brick online using gluster v start force + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Volume start with force failed") + g.log.info("Volume start with force successful") + + # Close connection and check if rename has completed + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Rename is not completed") + g.log.info("Rename is completed") + + # Do lookup on the files + # Calculate file count from mount + cmd = ("ls -lR %s/ | wc -l" % m_point) + ret, count_after, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to do lookup and" + "get file count") + g.log.info("Lookup successful. File count after" + " rename is:%s", count_after) + + # Check if there is any data loss + self.assertEqual(int(count_before), int(count_after), + "The file count before and after" + " rename is not same. There is data loss.") + g.log.info("The file count before and after rename is same." + " No data loss occurred.") + + # Checking if all files were renamed Successfully + ret = get_volume_type(brick_list[0] + "/") + if ret in ("Replicate", "Disperse", "Arbiter", "Distributed-Replicate", + "Distribute-Disperse", "Distribute-Arbiter"): + cmd = ("ls -lR %s/file*_6 | wc -l" % m_point) + ret, out, _ = g.run(self.clients[0], cmd) + self.assertEqual(int(out), 1000, "Rename failed on some files") + g.log.info("All the files are renamed successfully") diff --git a/tests/functional/dht/test_rename_with_brick_min_free_limit_crossed.py b/tests/functional/dht/test_rename_with_brick_min_free_limit_crossed.py new file mode 100644 index 000000000..0e481fce0 --- /dev/null +++ b/tests/functional/dht/test_rename_with_brick_min_free_limit_crossed.py @@ -0,0 +1,82 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['distributed'], ['glusterfs']]) +class TestRenameWithBricksMinFreeLimitCrossed(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 1 + self.volume['voltype']['dist_count'] = 1 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + self.first_client = self.mounts[0].client_system + self.mount_point = self.mounts[0].mountpoint + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_rename_with_brick_min_free_limit_crossed(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Calculate the usable size and fill till it reachs min free limit + 3. Rename the file + 4. Try to perfrom I/O from mount point.(This should fail) + """ + bricks = get_all_bricks(self.mnode, self.volname) + + # Calculate the usable size and fill till it reachs + # min free limit + usable_size = get_usable_size_per_disk(bricks[0]) + ret, _, _ = g.run(self.first_client, "fallocate -l {}G {}/file" + .format(usable_size, self.mount_point)) + self.assertFalse(ret, "Failed to fill disk to min free limit") + g.log.info("Disk filled up to min free limit") + + # Rename the file + ret, _, _ = g.run(self.first_client, "mv {}/file {}/Renamedfile" + .format(self.mount_point, self.mount_point)) + self.assertFalse(ret, "Rename failed on file to Renamedfile") + g.log.info("File renamed successfully") + + # Try to perfrom I/O from mount point(This should fail) + ret, _, _ = g.run(self.first_client, + "fallocate -l 5G {}/mfile".format(self.mount_point)) + self.assertTrue(ret, + "Unexpected: Able to do I/O even when disks are " + "filled to min free limit") + g.log.info("Expected: Unable to perfrom I/O as min free disk is hit") diff --git a/tests/functional/dht/test_restart_glusterd_after_rebalance.py b/tests/functional/dht/test_restart_glusterd_after_rebalance.py index 5c278318d..408e309a1 100644 --- a/tests/functional/dht/test_restart_glusterd_after_rebalance.py +++ b/tests/functional/dht/test_restart_glusterd_after_rebalance.py @@ -131,7 +131,8 @@ class RebalanceValidation(GlusterBaseClass): # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") - ret = wait_for_rebalance_to_complete(self.mnode, self.volname) + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=1800) self.assertTrue(ret, ("Rebalance is not yet complete on the volume " "%s", self.volname)) g.log.info("Rebalance is successfully complete on the volume %s", @@ -163,14 +164,14 @@ class RebalanceValidation(GlusterBaseClass): g.log.info("Rebalance is NOT triggered on %s after glusterd " "restart", server) - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass tearDown - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_rmdir_subvol_down.py b/tests/functional/dht/test_rmdir_subvol_down.py new file mode 100755 index 000000000..d029bfc99 --- /dev/null +++ b/tests/functional/dht/test_rmdir_subvol_down.py @@ -0,0 +1,362 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Description: + Test cases in this module tests directory rmdir with subvol down +""" + +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import bring_bricks_offline +from glustolibs.gluster.volume_libs import get_subvols, volume_start +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.gluster.dht_test_utils import (find_hashed_subvol, + find_nonhashed_subvol, + create_brickobjectlist) +from glustolibs.gluster.dht_test_utils import validate_files_in_dir +from glustolibs.gluster.constants import FILETYPE_DIRS +from glustolibs.gluster.constants import (TEST_LAYOUT_IS_COMPLETE as + LAYOUT_IS_COMPLETE) +from glustolibs.gluster.glusterdir import mkdir, rmdir +from glustolibs.gluster.mount_ops import umount_volume, mount_volume + + +@runs_on([['distributed-replicated', 'distributed', + 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class TestLookupDir(GlusterBaseClass): + # Create Volume and mount according to config file + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + # Change the dist count to 4 in case of 'distributed-replicated' , + # 'distributed-dispersed' and 'distributed-arbiter' + if self.volume_type in ("distributed-replicated", + "distributed-dispersed", + "distributed-arbiter"): + self.volume['voltype']['dist_count'] = 4 + + # Setup Volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to Setup and Mount Volume") + mount_obj = self.mounts[0] + self.mountpoint = mount_obj.mountpoint + + # Collect subvols + self.subvols = (get_subvols + (self.mnode, self.volname))['volume_subvols'] + + def test_rmdir_child_when_nonhash_vol_down(self): + """ + case -1: + - create parent + - bring down a non-hashed subvolume for directory child + - create parent/child + - rmdir /mnt/parent will fail with ENOTCONN + """ + # pylint: disable=protected-access + # pylint: disable=too-many-statements + # pylint: disable=unsubscriptable-object + # Find a non hashed subvolume(or brick) + + # Create parent dir + parent_dir = self.mountpoint + '/parent' + child_dir = parent_dir + '/child' + ret = mkdir(self.clients[0], parent_dir) + self.assertTrue(ret, "mkdir failed") + g.log.info("mkdir of parent directory %s successful", parent_dir) + + # Find a non hashed subvolume(or brick) + nonhashed_subvol, count = find_nonhashed_subvol(self.subvols, + "parent", "child") + self.assertIsNotNone(nonhashed_subvol, + "Error in finding nonhashed value") + g.log.info("nonhashed_subvol %s", nonhashed_subvol._host) + + # Bring nonhashed_subbvol offline + ret = bring_bricks_offline(self.volname, self.subvols[count]) + self.assertTrue(ret, ("Error in bringing down subvolume %s" + % self.subvols[count])) + g.log.info('target subvol %s is offline', self.subvols[count]) + + # Create child-dir + ret = mkdir(self.clients[0], child_dir) + self.assertTrue(ret, ('mkdir failed for %s ' % child_dir)) + g.log.info("mkdir of child directory %s successful", child_dir) + + # 'rmdir' on parent should fail with ENOTCONN + ret = rmdir(self.clients[0], parent_dir) + self.assertFalse(ret, ('Expected rmdir to fail for %s' % parent_dir)) + g.log.info("rmdir of parent directory %s failed as expected", + parent_dir) + + # Cleanup + # Bring up the subvol - restart volume + ret = volume_start(self.mnode, self.volname, force=True) + self.assertTrue(ret, "Error in force start the volume") + g.log.info('Volume restart success') + sleep(10) + + # Delete parent_dir + ret = rmdir(self.clients[0], parent_dir, force=True) + self.assertTrue(ret, ('rmdir failed for %s ' % parent_dir)) + g.log.info("rmdir of directory %s successful", parent_dir) + + def test_rmdir_dir_when_hash_nonhash_vol_down(self): + """ + case -2: + - create dir1 and dir2 + - bring down hashed subvol for dir1 + - bring down a non-hashed subvol for dir2 + - rmdir dir1 should fail with ENOTCONN + - rmdir dir2 should fail with ENOTCONN + """ + # pylint: disable=protected-access + # pylint: disable=too-many-statements + # pylint: disable=unsubscriptable-object + + # Create dir1 and dir2 + directory_list = [] + for number in range(1, 3): + directory_list.append('{}/dir{}'.format(self.mountpoint, number)) + ret = mkdir(self.clients[0], directory_list[-1]) + self.assertTrue(ret, ('mkdir failed for %s ' + % directory_list[-1])) + g.log.info("mkdir of directory %s successful", + directory_list[-1]) + + # Find a non hashed subvolume(or brick) + nonhashed_subvol, count = find_nonhashed_subvol(self.subvols, "/", + "dir1") + self.assertIsNotNone(nonhashed_subvol, + "Error in finding nonhashed value") + g.log.info("nonhashed_subvol %s", nonhashed_subvol._host) + + # Bring nonhashed_subbvol offline + ret = bring_bricks_offline(self.volname, self.subvols[count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s' + % self.subvols[count])) + g.log.info('target subvol %s is offline', self.subvols[count]) + + # 'rmdir' on dir1 should fail with ENOTCONN + ret = rmdir(self.clients[0], directory_list[0]) + self.assertFalse(ret, ('Expected rmdir to fail for %s' + % directory_list[0])) + g.log.info("rmdir of directory %s failed as expected", + directory_list[0]) + + # Bring up the subvol - restart volume + ret = volume_start(self.mnode, self.volname, force=True) + self.assertTrue(ret, "Error in force start the volume") + g.log.info('Volume restart success') + sleep(10) + + # Unmounting and Mounting the volume back to Heal + ret, _, err = umount_volume(self.clients[1], self.mountpoint) + self.assertFalse(ret, "Error in creating temp mount %s" % err) + + ret, _, err = mount_volume(self.volname, + mtype='glusterfs', + mpoint=self.mountpoint, + mserver=self.servers[0], + mclient=self.clients[1]) + self.assertFalse(ret, "Error in creating temp mount") + + ret, _, _ = g.run(self.clients[1], ("ls %s/dir1" % self.mountpoint)) + self.assertEqual(ret, 0, "Error in lookup for dir1") + g.log.info("lookup successful for dir1") + + # This confirms that healing is done on dir1 + ret = validate_files_in_dir(self.clients[0], + directory_list[0], + test_type=LAYOUT_IS_COMPLETE, + file_type=FILETYPE_DIRS) + self.assertTrue(ret, "validate_files_in_dir for dir1 failed") + g.log.info("healing successful for dir1") + + # Bring down the hashed subvol + # Find a hashed subvolume(or brick) + hashed_subvol, count = find_hashed_subvol(self.subvols, "/", "dir2") + self.assertIsNotNone(hashed_subvol, + "Error in finding nonhashed value") + g.log.info("hashed_subvol %s", hashed_subvol._host) + + # Bring hashed_subbvol offline + ret = bring_bricks_offline(self.volname, self.subvols[count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s', + self.subvols[count])) + g.log.info('target subvol %s is offline', self.subvols[count]) + + # 'rmdir' on dir2 should fail with ENOTCONN + ret = rmdir(self.clients[0], directory_list[1]) + self.assertFalse(ret, ('Expected rmdir to fail for %s' + % directory_list[1])) + g.log.info("rmdir of dir2 directory %s failed as expected", + directory_list[1]) + + # Cleanup + # Bring up the subvol - restart the volume + ret = volume_start(self.mnode, self.volname, force=True) + self.assertTrue(ret, "Error in force start the volume") + g.log.info('Volume restart success') + sleep(10) + + # Delete dirs + for directory in directory_list: + ret = rmdir(self.clients[0], directory) + self.assertTrue(ret, ('rmdir failed for %s ' % directory)) + g.log.info("rmdir of directory %s successful", directory) + + def test_rm_file_when_nonhash_vol_down(self): + """ + case -3: + - create parent + - mkdir parent/child + - touch parent/child/file + - bringdown a subvol where file is not present + - rm -rf parent + - Only file should be deleted + - rm -rf of parent should fail with ENOTCONN + """ + # pylint: disable=protected-access + # pylint: disable=too-many-statements + # pylint: disable=unsubscriptable-object + + # Find a non hashed subvolume(or brick) + # Create parent dir + parent_dir = self.mountpoint + '/parent' + child_dir = parent_dir + '/child' + ret = mkdir(self.clients[0], parent_dir) + self.assertTrue(ret, ('mkdir failed for %s ' % parent_dir)) + g.log.info("mkdir of parent directory %s successful", parent_dir) + + # Create child dir + ret = mkdir(self.clients[0], child_dir) + self.assertTrue(ret, ('mkdir failed for %s ' % child_dir)) + g.log.info("mkdir of child directory %s successful", child_dir) + + # Create a file under child_dir + file_one = child_dir + '/file_one' + ret, _, err = g.run(self.clients[0], ("touch %s" % file_one)) + self.assertFalse(ret, ('touch failed for %s err: %s' % + (file_one, err))) + + # Find a non hashed subvolume(or brick) + nonhashed_subvol, count = find_nonhashed_subvol(self.subvols, + "parent/child", + "file_one") + self.assertIsNotNone(nonhashed_subvol, + "Error in finding nonhashed value") + g.log.info("nonhashed_subvol %s", nonhashed_subvol._host) + + # Bring nonhashed_subbvol offline + ret = bring_bricks_offline(self.volname, self.subvols[count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s' + % self.subvols[count])) + g.log.info('target subvol %s is offline', self.subvols[count]) + + # 'rm -rf' on parent should fail with ENOTCONN + ret = rmdir(self.clients[0], parent_dir) + self.assertFalse(ret, ('Expected rmdir to fail for %s' % parent_dir)) + g.log.info("rmdir of parent directory %s failed as expected" + " with err %s", parent_dir, err) + + brickobject = create_brickobjectlist(self.subvols, "parent/child") + self.assertIsNotNone(brickobject, + "could not create brickobject list") + # Make sure file_one is deleted + for brickdir in brickobject: + dir_path = "%s/parent/child/file_one" % brickdir.path + brick_path = dir_path.split(":") + self.assertTrue((file_exists(brickdir._host, brick_path[1])) == 0, + ('Expected file %s not to exist on servers' + % parent_dir)) + g.log.info("file is deleted as expected") + + # Cleanup + # Bring up the subvol - restart volume + ret = volume_start(self.mnode, self.volname, force=True) + self.assertTrue(ret, "Error in force start the volume") + g.log.info('Volume restart success.') + sleep(10) + + # Delete parent_dir + ret = rmdir(self.clients[0], parent_dir, force=True) + self.assertTrue(ret, ('rmdir failed for %s ' % parent_dir)) + g.log.info("rmdir of directory %s successful", parent_dir) + + def test_rmdir_parent_pre_nonhash_vol_down(self): + """ + case -4: + - Bring down a non-hashed subvol for parent_dir + - mkdir parent + - rmdir parent should fails with ENOTCONN + """ + # pylint: disable=protected-access + # pylint: disable=too-many-statements + # pylint: disable=unsubscriptable-object + + nonhashed_subvol, count = find_nonhashed_subvol(self.subvols, + "/", "parent") + self.assertIsNotNone(nonhashed_subvol, + 'Error in finding nonhashed subvol') + g.log.info("nonhashed subvol %s", nonhashed_subvol._host) + + # Bring nonhashed_subbvol offline + ret = bring_bricks_offline(self.volname, self.subvols[count]) + self.assertTrue(ret, ('Error in bringing down subvolume %s' + % self.subvols[count])) + g.log.info('target subvol %s is offline', self.subvols[count]) + + parent_dir = self.mountpoint + '/parent' + ret = mkdir(self.clients[0], parent_dir) + self.assertTrue(ret, ('mkdir failed for %s ' % parent_dir)) + g.log.info("mkdir of parent directory %s successful", parent_dir) + + # 'rmdir' on parent should fail with ENOTCONN + ret = rmdir(self.clients[0], parent_dir) + self.assertFalse(ret, ('Expected rmdir to fail for %s' % parent_dir)) + g.log.info("rmdir of parent directory %s failed as expected", + parent_dir) + + # Cleanup + # Bring up the subvol - restart volume + ret = volume_start(self.mnode, self.volname, force=True) + self.assertTrue(ret, "Error in force start the volume") + g.log.info('Volume restart success.') + sleep(10) + + # Delete parent_dir + ret = rmdir(self.clients[0], parent_dir, force=True) + self.assertTrue(ret, ('rmdir failed for %s ' % parent_dir)) + g.log.info("rmdir of directory %s successful", parent_dir) + + def tearDown(self): + """ + Unmount Volume and Cleanup Volume + """ + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_sparse_file_creation_and_deletion.py b/tests/functional/dht/test_sparse_file_creation_and_deletion.py new file mode 100644 index 000000000..7404ece90 --- /dev/null +++ b/tests/functional/dht/test_sparse_file_creation_and_deletion.py @@ -0,0 +1,156 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.lib_utils import get_size_of_mountpoint + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed'], ['glusterfs']]) +class TestSparseFileCreationAndDeletion(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 5 + self.volume['voltype']['dist_count'] = 5 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + # Assign a variable for the first_client + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def _create_two_sparse_files(self): + """Create 2 sparse files from /dev/zero and /dev/null""" + + # Create a tuple to hold both the file names + self.sparse_file_tuple = ( + "{}/sparse_file_zero".format(self.mounts[0].mountpoint), + "{}/sparse_file_null".format(self.mounts[0].mountpoint) + ) + + # Create 2 spares file where one is created from /dev/zero and + # another is created from /dev/null + for filename, input_file in ((self.sparse_file_tuple[0], "/dev/zero"), + (self.sparse_file_tuple[1], "/dev/null")): + cmd = ("dd if={} of={} bs=1M seek=5120 count=1000" + .format(input_file, filename)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, 'Failed to create %s ' % filename) + + g.log.info("Successfully created sparse_file_zero and" + " sparse_file_null") + + def _check_du_and_ls_of_sparse_file(self): + """Check du and ls -lks on spare files""" + + for filename in self.sparse_file_tuple: + + # Fetch output of ls -lks for the sparse file + cmd = "ls -lks {}".format(filename) + ret, out, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "Failed to get ls -lks for file %s " + % filename) + ls_value = out.split(" ")[5] + + # Fetch output of du for the sparse file + cmd = "du --block-size=1 {}".format(filename) + ret, out, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "Failed to get du for file %s " + % filename) + du_value = out.split("\t")[0] + + # Compare du and ls -lks value + self. assertNotEqual(ls_value, du_value, + "Unexpected: Sparse file size coming up same " + "for du and ls -lks") + + g.log.info("Successfully checked sparse file size using ls and du") + + def _delete_two_sparse_files(self): + """Delete sparse files""" + + for filename in self.sparse_file_tuple: + cmd = "rm -rf {}".format(filename) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, 'Failed to delete %s ' % filename) + + g.log.info("Successfully remove both sparse files") + + def test_sparse_file_creation_and_deletion(self): + """ + Test case: + 1. Create volume with 5 sub-volumes, start and mount it. + 2. Check df -h for available size. + 3. Create 2 sparse file one from /dev/null and one from /dev/zero. + 4. Find out size of files and compare them through du and ls. + (They shouldn't match.) + 5. Check df -h for available size.(It should be less than step 2.) + 6. Remove the files using rm -rf. + """ + # Check df -h for avaliable size + available_space_at_start = get_size_of_mountpoint( + self.first_client, self.mounts[0].mountpoint) + self.assertIsNotNone(available_space_at_start, + "Failed to get available space on mount point") + + # Create 2 sparse file one from /dev/null and one from /dev/zero + self._create_two_sparse_files() + + # Find out size of files and compare them through du and ls + # (They shouldn't match) + self._check_du_and_ls_of_sparse_file() + + # Check df -h for avaliable size(It should be less than step 2) + available_space_now = get_size_of_mountpoint( + self.first_client, self.mounts[0].mountpoint) + self.assertIsNotNone(available_space_now, + "Failed to get avaliable space on mount point") + ret = (int(available_space_at_start) > int(available_space_now)) + self.assertTrue(ret, "Available space at start not less than " + "available space now") + + # Remove the files using rm -rf + self._delete_two_sparse_files() + + # Sleep for 180 seconds for the meta data in .glusterfs directory + # to be removed + sleep(180) + + # Check df -h after removing sparse files + available_space_now = get_size_of_mountpoint( + self.first_client, self.mounts[0].mountpoint) + self.assertIsNotNone(available_space_now, + "Failed to get avaliable space on mount point") + ret = int(available_space_at_start) - int(available_space_now) < 1500 + self.assertTrue(ret, "Available space at start and available space now" + " is not equal") diff --git a/tests/functional/dht/test_stack_overflow.py b/tests/functional/dht/test_stack_overflow.py new file mode 100644 index 000000000..c371add63 --- /dev/null +++ b/tests/functional/dht/test_stack_overflow.py @@ -0,0 +1,131 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import (set_volume_options, + reset_volume_option) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_fix_layout_to_complete) +from glustolibs.gluster.glusterfile import move_file + + +@runs_on([['distributed', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'replicated', + 'arbiter', 'distributed-arbiter'], + ['glusterfs']]) +class TestStackOverflow(GlusterBaseClass): + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + + # Setup Volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + g.log.error("Failed to Setup and Mount Volume") + raise ExecutionError("Failed to Setup and Mount Volume") + + def tearDown(self): + # Reset the volume options set inside the test + vol_options = ['performance.parallel-readdir', + 'performance.readdir-ahead'] + for opt in vol_options: + ret, _, _ = reset_volume_option(self.mnode, self.volname, opt) + if ret: + raise ExecutionError("Failed to reset the volume option %s" + % opt) + g.log.info("Successfully reset the volume options") + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_stack_overflow(self): + """ + Description: Tests to check that there is no stack overflow + in readdirp with parallel-readdir enabled. + Steps : + 1) Create a volume. + 2) Mount the volume using FUSE. + 3) Enable performance.parallel-readdir and + performance.readdir-ahead on the volume. + 4) Create 10000 files on the mount point. + 5) Add-brick to the volume. + 6) Perform fix-layout on the volume (not rebalance). + 7) From client node, rename all the files, this will result in creation + of linkto files on the newly added brick. + 8) Do ls -l (lookup) on the mount-point. + """ + # pylint: disable=too-many-statements + # Enable performance.parallel-readdir and + # performance.readdir-ahead on the volume + options = {"performance.parallel-readdir": "enable", + "performance.readdir-ahead": "enable"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, "Failed to set volume options") + g.log.info("Successfully set volume options") + + # Creating 10000 files on volume root + m_point = self.mounts[0].mountpoint + command = 'touch ' + m_point + '/file{1..10000}_0' + ret, _, _ = g.run(self.clients[0], command) + self.assertEqual(ret, 0, "File creation failed on %s" + % m_point) + g.log.info("Files successfully created on the mount point") + + # Add bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, ("Failed to expand the volume %s", + self.volname)) + g.log.info("Expanding volume is successful on " + "volume %s", self.volname) + + # Perform fix-layout on the volume + ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True) + self.assertEqual(ret, 0, 'Failed to start rebalance') + g.log.info('Rebalance is started') + + # Wait for fix-layout to complete + ret = wait_for_fix_layout_to_complete(self.mnode, self.volname, + timeout=3000) + self.assertTrue(ret, ("Fix-layout failed on volume %s", + self.volname)) + g.log.info("Fix-layout is successful on " + "volume %s", self.volname) + + # Rename all files from client node + for i in range(1, 10000): + ret = move_file(self.clients[0], + '{}/file{}_0'.format(m_point, i), + '{}/file{}_1'.format(m_point, i)) + self.assertTrue(ret, "Failed to rename files") + g.log.info("Files renamed successfully") + + # Perform lookup from the mount-point + cmd = "ls -lR " + m_point + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to lookup") + g.log.info("Lookup successful") diff --git a/tests/functional/dht/test_stop_glusterd_while_rebalance_in_progress.py b/tests/functional/dht/test_stop_glusterd_while_rebalance_in_progress.py index ba689f3c6..350cb4e1a 100644 --- a/tests/functional/dht/test_stop_glusterd_while_rebalance_in_progress.py +++ b/tests/functional/dht/test_stop_glusterd_while_rebalance_in_progress.py @@ -167,7 +167,7 @@ class RebalanceValidation(GlusterBaseClass): # Wait for rebalance to complete g.log.info("Waiting for rebalance to complete") ret = wait_for_rebalance_to_complete(self.mnode, self.volname, - timeout=600) + timeout=1800) self.assertTrue(ret, ("Rebalance is either timed out or failed" "%s", self.volname)) g.log.info("Volume %s: Rebalance completed successfully", @@ -205,14 +205,12 @@ class RebalanceValidation(GlusterBaseClass): if not ret: raise ExecutionError("All peers are in connected state") - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") # Calling GlusterBaseClass tearDown - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_time_taken_for_ls.py b/tests/functional/dht/test_time_taken_for_ls.py new file mode 100644 index 000000000..7c9653999 --- /dev/null +++ b/tests/functional/dht/test_time_taken_for_ls.py @@ -0,0 +1,105 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed'], ['glusterfs']]) +class TestTimeForls(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup and mount volume") + + self.is_io_running = False + + def tearDown(self): + + if self.is_io_running: + self._validate_io() + + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _validate_io(self): + """Validare I/O threads running on mount point""" + io_success = [] + for proc in self.proc_list: + try: + ret, _, _ = proc.async_communicate() + if ret: + io_success.append(False) + break + io_success.append(True) + except ValueError: + io_success.append(True) + return all(io_success) + + def test_time_taken_for_ls(self): + """ + Test case: + 1. Create a volume of type distributed-replicated or + distributed-arbiter or distributed-dispersed and start it. + 2. Mount the volume to clients and create 2000 directories + and 10 files inside each directory. + 3. Wait for I/O to complete on mount point and perform ls + (ls should complete within 10 seconds). + """ + # Creating 2000 directories on the mount point + ret, _, _ = g.run(self.mounts[0].client_system, + "cd %s; for i in {1..2000};do mkdir dir$i;done" + % self.mounts[0].mountpoint) + self.assertFalse(ret, 'Failed to create 2000 dirs on mount point') + + # Create 5000 files inside each directory + dirs = ('{1..100}', '{101..200}', '{201..300}', '{301..400}', + '{401..500}', '{501..600}', '{601..700}', '{701..800}', + '{801..900}', '{901..1000}', '{1001..1100}', '{1101..1200}', + '{1201..1300}', '{1301..1400}', '{1401..1500}', '{1501..1600}', + '{1801..1900}', '{1901..2000}') + self.proc_list, counter = [], 0 + while counter < 18: + for mount_obj in self.mounts: + ret = g.run_async(mount_obj.client_system, + "cd %s;for i in %s;do " + "touch dir$i/file{1..10};done" + % (mount_obj.mountpoint, dirs[counter])) + self.proc_list.append(ret) + counter += 1 + self.is_io_running = True + + # Check if I/O is successful or not + ret = self._validate_io() + self.assertTrue(ret, "Failed to create Files and dirs on mount point") + self.is_io_running = False + g.log.info("Successfully created files and dirs needed for the test") + + # Run ls on mount point which should get completed within 10 seconds + ret, _, _ = g.run(self.mounts[0].client_system, + "cd %s; timeout 10 ls" + % self.mounts[0].mountpoint) + self.assertFalse(ret, '1s taking more than 10 seconds') + g.log.info("ls completed in under 10 seconds") diff --git a/tests/functional/dht/test_verify_create_hash.py b/tests/functional/dht/test_verify_create_hash.py index 83f6a7777..5ed2a97a0 100644 --- a/tests/functional/dht/test_verify_create_hash.py +++ b/tests/functional/dht/test_verify_create_hash.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -104,7 +104,7 @@ class TestCreateFile(GlusterBaseClass): for brickdir in brickobject: count += 1 ret = brickdir.hashrange_contains_hash(filehash) - if ret == 1: + if ret: hash_subvol = subvols[count] ret, _, err = g.run(brickdir._host, ("stat %s/file1" % brickdir._fqpath)) @@ -153,14 +153,14 @@ class TestCreateFile(GlusterBaseClass): ret, _, _ = g.run(self.clients[0], ("touch %s" % file_one)) self.assertTrue(ret, "Expected file creation to fail") - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # Calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py b/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py new file mode 100644 index 000000000..f6228c122 --- /dev/null +++ b/tests/functional/dht/test_verify_permissions_on_root_dir_when_brick_down.py @@ -0,0 +1,134 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterfile import set_file_permissions +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline, + bring_bricks_online) + + +@runs_on([['distributed', 'distributed-replicated', 'distributed-dispersed', + 'distributed-arbiter'], + ['glusterfs']]) +class TestVerifyPermissionChanges(GlusterBaseClass): + def setUp(self): + """ + Setup and mount volume + """ + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume_and_mount_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Failed to Setup and Mount Volume") + + def _set_root_dir_permission(self, permission): + """ Sets the root dir permission to the given value""" + m_point = self.mounts[0].mountpoint + ret = set_file_permissions(self.clients[0], m_point, permission) + self.assertTrue(ret, "Failed to set root dir permissions") + + def _get_dir_permissions(self, host, directory): + """ Returns dir permissions""" + cmd = 'stat -c "%a" {}'.format(directory) + ret, out, _ = g.run(host, cmd) + self.assertEqual(ret, 0, "Failed to get permission on {}".format(host)) + return out.strip() + + def _get_root_dir_permission(self, expected=None): + """ Returns the root dir permission """ + permission = self._get_dir_permissions(self.mounts[0].client_system, + self.mounts[0].mountpoint) + if not expected: + return permission.strip() + self.assertEqual(permission, expected, "The permissions doesn't match") + return True + + def _bring_a_brick_offline(self): + """ Brings down a brick from the volume""" + brick_to_kill = get_all_bricks(self.mnode, self.volname)[-1] + ret = bring_bricks_offline(self.volname, brick_to_kill) + self.assertTrue(ret, "Failed to bring brick offline") + return brick_to_kill + + def _bring_back_brick_online(self, brick): + """ Brings back down brick from the volume""" + ret = bring_bricks_online(self.mnode, self.volname, brick) + self.assertTrue(ret, "Failed to bring brick online") + + def _verify_mount_dir_and_brick_dir_permissions(self, expected, + down_brick=None): + """ Verifies the mount directory and brick dir permissions are same""" + # Get root dir permission and verify + self._get_root_dir_permission(expected) + + # Verify brick dir permission + brick_list = get_all_bricks(self.mnode, self.volname) + for brick in brick_list: + brick_node, brick_path = brick.split(":") + if down_brick and down_brick.split(":")[-1] != brick_path: + actual_perm = self._get_dir_permissions(brick_node, + brick_path) + self.assertEqual(actual_perm, expected, + "The permissions are not same") + + def test_verify_root_dir_permission_changes(self): + """ + 1. create pure dist volume + 2. mount on client + 3. Checked default permission (should be 755) + 4. Change the permission to 444 and verify + 5. Kill a brick + 6. Change root permission to 755 + 7. Verify permission changes on all bricks, except down brick + 8. Bring back the brick and verify the changes are reflected + """ + + # Verify the default permission on root dir is 755 + self._verify_mount_dir_and_brick_dir_permissions("755") + + # Change root permission to 444 + self._set_root_dir_permission("444") + + # Verify the changes were successful + self._verify_mount_dir_and_brick_dir_permissions("444") + + # Kill a brick + offline_brick = self._bring_a_brick_offline() + + # Change root permission to 755 + self._set_root_dir_permission("755") + + # Verify the permission changed to 755 on mount and brick dirs + self._verify_mount_dir_and_brick_dir_permissions("755", offline_brick) + + # Bring brick online + self._bring_back_brick_online(offline_brick) + + # Verify the permission changed to 755 on mount and brick dirs + self._verify_mount_dir_and_brick_dir_permissions("755") + + def tearDown(self): + # Unmount and cleanup original volume + if not self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]): + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_volume_start_stop_while_rebalance_in_progress.py b/tests/functional/dht/test_volume_start_stop_while_rebalance_in_progress.py index 73f9949ef..55099c811 100644 --- a/tests/functional/dht/test_volume_start_stop_while_rebalance_in_progress.py +++ b/tests/functional/dht/test_volume_start_stop_while_rebalance_in_progress.py @@ -131,8 +131,8 @@ class RebalanceValidation(GlusterBaseClass): # Log Volume Info and Status before expanding the volume. g.log.info("Logging volume info and Status before expanding volume") ret = log_volume_info_and_status(self.mnode, self.volname) - g.log.error(ret, "Logging volume info and status failed on " - "volume %s", self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume %s", self.volname)) g.log.info("Logging volume info and status was successful for volume " "%s", self.volname) @@ -208,14 +208,14 @@ class RebalanceValidation(GlusterBaseClass): self.volname)) g.log.info("Volume %s state is \"Started\"", self.volname) - @classmethod - def tearDownClass(cls): - # Unmount Volume and Cleanup Volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Volume %s unmount and cleanup: Success", cls.volname) + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # Calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/dht/test_wipe_out_directory_permissions.py b/tests/functional/dht/test_wipe_out_directory_permissions.py new file mode 100644 index 000000000..485aaf0d5 --- /dev/null +++ b/tests/functional/dht/test_wipe_out_directory_permissions.py @@ -0,0 +1,132 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import get_file_stat, get_fattr + + +@runs_on([['distributed'], ['glusterfs']]) +class TestDhtWipeOutDirectoryPremissions(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Changing dist_count to 1 + self.volume['voltype']['dist_count'] = 1 + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + # Assign a variable for the first_client + self.first_client = self.mounts[0].client_system + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def _check_permissions_of_dir(self): + """Check permissions of dir created.""" + for brick_path in get_all_bricks(self.mnode, self.volname): + node, path = brick_path.split(":") + ret = get_file_stat(node, "{}/dir".format(path)) + self.assertEqual(int(ret["access"]), 755, + "Unexpected:Permissions of dir is %s and not %d" + % (ret["access"], 755)) + g.log.info("Permissions of dir directory is proper on all bricks") + + def _check_trusted_glusterfs_dht_on_all_bricks(self): + """Check trusted.glusterfs.dht xattr on the backend bricks""" + bricks = get_all_bricks(self.mnode, self.volname) + possible_values = ["0x000000000000000000000000ffffffff", + "0x00000000000000000000000000000000"] + for brick_path in bricks: + node, path = brick_path.split(":") + ret = get_fattr(node, "{}/dir".format(path), + "trusted.glusterfs.dht") + self.assertEqual( + ret, possible_values[bricks.index(brick_path)], + "Value of trusted.glusterfs.dht is not as expected") + g.log.info("Successfully checked value of trusted.glusterfs.dht.") + + def test_wipe_out_directory_permissions(self): + """ + Test case: + 1. Create a 1 brick pure distributed volume. + 2. Start the volume and mount it on a client node using FUSE. + 3. Create a directory on the mount point. + 4. Check trusted.glusterfs.dht xattr on the backend brick. + 5. Add brick to the volume using force. + 6. Do lookup from the mount point. + 7. Check the directory permissions from the backend bricks. + 8. Check trusted.glusterfs.dht xattr on the backend bricks. + 9. From mount point cd into the directory. + 10. Check the directory permissions from backend bricks. + 11. Check trusted.glusterfs.dht xattr on the backend bricks. + """ + # Create a directory on the mount point + self.dir_path = "{}/dir".format(self.mounts[0].mountpoint) + ret = mkdir(self.first_client, self.dir_path) + self.assertTrue(ret, "Failed to create directory dir") + + # Check trusted.glusterfs.dht xattr on the backend brick + self._check_trusted_glusterfs_dht_on_all_bricks() + + # Add brick to the volume using force + brick_list = form_bricks_list(self.mnode, self.volname, 1, + self.servers, self.all_servers_info) + self.assertIsNotNone(brick_list, + "Failed to get available space on mount point") + ret, _, _ = add_brick(self.mnode, self.volname, brick_list, force=True) + self.assertEqual(ret, 0, ("Volume {}: Add-brick failed".format + (self.volname))) + + # Do a lookup from the mount point + cmd = "ls -lR {}".format(self.dir_path) + ret, _, _ = g.run(self.first_client, cmd) + self.assertEqual(ret, 0, "Failed to lookup") + g.log.info("Lookup successful") + + # Check the directory permissions from the backend bricks + self._check_permissions_of_dir() + + # Check trusted.glusterfs.dht xattr on the backend bricks + self._check_trusted_glusterfs_dht_on_all_bricks() + + # From mount point cd into the directory + ret, _, _ = g.run(self.first_client, "cd {};cd ..;cd {}" + .format(self.dir_path, self.dir_path)) + self.assertEqual(ret, 0, "Unable to cd into dir from mount point") + + # Check the directory permissions from backend bricks + self._check_permissions_of_dir() + + # Check trusted.glusterfs.dht xattr on the backend bricks + self._check_trusted_glusterfs_dht_on_all_bricks() diff --git a/tests/functional/disperse/test_disperse_eager_lock.py b/tests/functional/disperse/test_disperse_eager_lock.py new file mode 100644 index 000000000..7f7ee84f5 --- /dev/null +++ b/tests/functional/disperse/test_disperse_eager_lock.py @@ -0,0 +1,71 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +import string + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestDisperseEagerLock(GlusterBaseClass): + def setUp(self): + ret = self.setup_volume() + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + @staticmethod + def get_random_string(chars, str_len=4): + return ''.join((choice(chars) for _ in range(str_len))) + + def test_disperse_eager_lock_cli(self): + """ + Testcase Steps: + 1.Create an EC volume + 2.Set the eager lock option by turning + on disperse.eager-lock by using different inputs: + - Try non boolean values(Must fail) + - Try boolean values + """ + # Set the eager lock option by turning + # on disperse.eager-lock by using different inputs + key = 'disperse.eager-lock' + + # Set eager lock option with non-boolean value + for char_type in (string.ascii_letters, string.punctuation, + string.printable, string.digits): + temp_val = self.get_random_string(char_type) + value = "{}".format(temp_val) + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertFalse(ret, "Unexpected: Erroneous value {}, to option " + "{} should result in failure".format(value, key)) + + # Set eager lock option with boolean value + for value in ('1', '0', 'off', 'on', 'disable', 'enable'): + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertTrue(ret, "Unexpected: Boolean value {}," + " to option {} shouldn't result in failure" + .format(value, key)) + g.log.info("Only Boolean values are accpeted by eager lock.") + + def tearDown(self): + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") diff --git a/tests/functional/disperse/test_ec_all_healtypes.py b/tests/functional/disperse/test_ec_all_healtypes.py new file mode 100644 index 000000000..f3210b6a7 --- /dev/null +++ b/tests/functional/disperse/test_ec_all_healtypes.py @@ -0,0 +1,285 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests FOps and Data Deletion on a healthy EC volume +""" +from random import sample +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs, collect_mounts_arequal +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list, + bring_bricks_offline) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.heal_libs import monitor_heal_completion + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcAllHealTypes(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_all_healtypes(self): + """ + Test steps: + - Create directory dir1 + - Create files inside dir1 + - Rename all file inside dir1 + - Create softlink and hardlink of files in mountpoint + - Create tiny, small, medium nd large file + - Get arequal of dir1 + - Create directory dir2 + - Creating files on dir2 + - Bring down other bricks to max redundancy + - Create directory dir3 + - Start pumping IO to dir3 + - Validating IO's on dir2 and waiting to complete + - Bring bricks online + - Wait for bricks to come online + - Check if bricks are online + - Monitor heal completion + - Get arequal of dir1 + - Compare arequal of dir1 + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + mountpoint = self.mounts[0].mountpoint + client = self.mounts[0].client_system + + # Creating dir1 + ret = mkdir(client, "%s/dir1" + % mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create files inside dir1 + cmd = ('touch %s/dir1/file{1..5};' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Rename all files inside dir1 + cmd = ('cd %s/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to rename files on " + "client") + g.log.info("Successfully renamed files on client") + + # Create softlink and hardlink of files in mountpoint + cmd = ('cd %s/dir1/; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' + 'done;' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME; cd ~;' + 'done;' + % mountpoint) + ret, _, _ = g.run(client, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Get arequal of dir1 + ret, result_before_brick_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Creating dir2 + ret = mkdir(self.mounts[0].client_system, "%s/dir2" + % mountpoint) + self.assertTrue(ret, "Failed to create dir2") + g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) + + # Creating files on dir2 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir2" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Bring down other bricks to max redundancy + # Bringing bricks offline + bricks_to_offline = sample(bricks_list, 2) + ret = bring_bricks_offline(self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Bricks not offline') + g.log.info('Bricks are offline successfully') + + # Creating dir3 + ret = mkdir(self.mounts[0].client_system, "%s/dir3" + % mountpoint) + self.assertTrue(ret, "Failed to create dir2") + g.log.info("Directory dir2 on %s created successfully", self.mounts[0]) + + # Start pumping IO to dir3 + cmd = ("cd %s/dir3; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writing on files while a brick is DOWN') + + appendcmd = ("cd %s/dir3; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=1 oflag=append conv=notrunc;done" % mountpoint) + + readcmd = ("cd %s/dir3; for i in `seq 1 100` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + + ret, _, err = g.run(self.mounts[0].client_system, appendcmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished append on files after redundant bricks offline') + + ret, _, err = g.run(self.mounts[0].client_system, readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished read on files after redundant bricks offline') + + # Validating IO's on dir2 and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all IO's") + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Bricks not brought online') + g.log.info('Bricks are online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Bricks are not online") + g.log.info("EXPECTED : Bricks are online") + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + g.log.info('Heal has completed successfully') + + # Get arequal of dir1 + ret, result_after_brick_up = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Comparing arequals of dir1 + self.assertEqual(result_before_brick_down, + result_after_brick_up, + 'Arequals are not equals before and after ' + 'bringing down redundant bricks') + g.log.info('Arequals are equals before before and after ' + 'bringing down redundant bricks') diff --git a/tests/functional/disperse/test_ec_brick_consumable_size.py b/tests/functional/disperse/test_ec_brick_consumable_size.py index e2cee80b1..c37dc834b 100644..100755 --- a/tests/functional/disperse/test_ec_brick_consumable_size.py +++ b/tests/functional/disperse/test_ec_brick_consumable_size.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,8 +21,10 @@ EcBrickConsumableSize: 'number of data bricks * least of brick size'. """ +from unittest import skip from glusto.core import Glusto as g -from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_libs import (get_all_bricks, + bring_bricks_offline) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import (get_volume_info) from glustolibs.gluster.lib_utils import get_size_of_mountpoint @@ -41,68 +43,96 @@ class EcBrickConsumableSize(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup Volume and Mount Volume") - # Test Case - def test_disperse_vol_size(self): - # pylint: disable=too-many-locals - mnode = self.mnode - volname = self.volname - client = self.mounts[0].client_system - mountpoint = self.mounts[0].mountpoint - - # Obtain the volume size - vol_size = get_size_of_mountpoint(client, mountpoint) - self.assertIsNotNone(vol_size, ("Unable to get the volsize \ - of %s.", volname)) - - # Retrieve the minimum brick size + def _get_min_brick(self): + # Returns the brick with min size + bricks_list = get_all_bricks(self.mnode, self.volname) min_brick_size = -1 - bricks_list = get_all_bricks(mnode, volname) + min_size_brick = None for brick in bricks_list: brick_node, brick_path = brick.split(":") brick_size = get_size_of_mountpoint(brick_node, brick_path) if ((brick_size is not None) and (min_brick_size == -1) or (int(min_brick_size) > int(brick_size))): min_brick_size = brick_size + min_size_brick = brick + return min_size_brick, min_brick_size - # Calculate the consumable size - vol_info = get_volume_info(mnode, volname) + def _get_consumable_vol_size(self, min_brick_size): + # Calculates the consumable size of the volume created + vol_info = get_volume_info(self.mnode, self.volname) self.assertIsNotNone(vol_info, ("Unable to get the volinfo \ - of %s.", volname)) - - disp_data_bricks = (int(vol_info[volname]['disperseCount']) - - int(vol_info[volname]['redundancyCount'])) - dist_count = (int(vol_info[volname]['brickCount']) / - int(vol_info[volname]['disperseCount'])) + of %s.", self.volname)) + disp_data_bricks = (int(vol_info[self.volname]['disperseCount']) - + int(vol_info[self.volname]['redundancyCount'])) + dist_count = (int(vol_info[self.volname]['brickCount']) / + int(vol_info[self.volname]['disperseCount'])) consumable_size = ((int(min_brick_size) * int(disp_data_bricks)) * int(dist_count)) + return consumable_size, dist_count + + @skip('Skipping this test due to Bug 1883429') + def test_disperse_vol_size(self): + # pylint: disable=too-many-locals + client = self.mounts[0].client_system + mount_point = self.mounts[0].mountpoint + + # Obtain the volume size + vol_size = get_size_of_mountpoint(client, mount_point) + self.assertIsNotNone(vol_size, ("Unable to get the volsize " + "of %s.", self.volname)) + + # Retrieve the minimum brick size + min_size_brick, min_brick_size = self._get_min_brick() + + # Calculate the consumable size + consumable_size, dist_count = ( + self._get_consumable_vol_size(min_brick_size)) # Verify the volume size is in allowable range # Volume size should be above 98% of consumable size. delta = (100 - ((float(vol_size)/float(consumable_size)) * 100)) - self.assertTrue(delta < 2, ("Volume size is not in allowable range")) - + self.assertTrue(delta < 2, "Volume size is not in allowable range") g.log.info("Volume size is in allowable range") # Write to the available size block_size = 1024 - write_size = ((int(vol_size) * (0.95) * int(block_size)) / + write_size = ((int(vol_size) * 0.95 * int(block_size)) / (int(dist_count))) for i in range(1, int(dist_count)): - ret, _, _ = g.run(client, "fallocate -l {} {}/testfile{} \ - ".format(int(write_size), mountpoint, i)) - self.assertTrue(ret == 0, ("Writing file of available size failed \ - on volume %s", volname)) + ret, _, _ = g.run(client, "fallocate -l {} {}/testfile{} " + .format(int(write_size), mount_point, i)) + self.assertTrue(ret == 0, ("Writing file of available size " + "failed on volume %s", self.volname)) g.log.info("Successfully verified volume size") # Try writing more than the available size write_size = ((int(vol_size) * int(block_size)) * 1.2) - ret, _, _ = g.run(client, "fallocate -l {} {}/testfile1 \ - ".format(int(write_size), mountpoint)) - self.assertTrue(ret != 0, ("Writing file of more than available \ - size passed on volume %s", volname)) - + ret, _, _ = g.run(client, "fallocate -l {} {}/testfile1 " + .format(int(write_size), mount_point)) + self.assertTrue(ret != 0, ("Writing file of more than available " + "size passed on volume %s", self.volname)) g.log.info("Successfully verified brick consumable size") + # Cleanup the mounts to verify + cmd = ('rm -rf %s' % mount_point) + ret, _, _ = g.run(client, cmd) + if ret: + g.log.error("Failed to cleanup vol data on %s", mount_point) + # Bring down the smallest brick + ret = bring_bricks_offline(self.volname, min_size_brick) + self.assertTrue(ret, "Failed to bring down the smallest brick") + + # Find the volume size post brick down + post_vol_size = get_size_of_mountpoint(client, mount_point) + self.assertIsNotNone(post_vol_size, ("Unable to get the volsize " + "of %s.", self.volname)) + + # Vol size after bringing down the brick with smallest size should + # not be greater than the actual size + self.assertGreater(vol_size, post_vol_size, + ("The volume size after bringing down the volume " + "is greater than the initial")) + # Method to cleanup test setup def tearDown(self): # Stopping the volume diff --git a/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py b/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py new file mode 100755 index 000000000..dd5f3b6da --- /dev/null +++ b/tests/functional/disperse/test_ec_check_lock_granted_to_2_different_client.py @@ -0,0 +1,135 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import time +import itertools +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class EcVerifyLock(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + cls.script = "/usr/share/glustolibs/io/scripts/file_lock.py" + if not upload_scripts(cls.clients, [cls.script]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Setup Volume and Mount Volume + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def test_verify_lock_granted_from_2_clients(self): + """ + - Create disperse volume and mount it to 2 clients` + - Create file from 1 client on mount point + - Take lock from client 1 => Lock is acquired + - Try taking lock from client 2=> Lock is blocked (as already + being taken by client 1) + - Release lock from client1=> Lock is released + - Take lock from client2 + - Again try taking lock from client 1 + - verify test with once, by disabling eagerlock and other eager lock + and once by leaving eager and other eagerlock enabled(by default) + """ + mpoint = self.mounts[0].mountpoint + + # Create a file on client 1 + cmd = "touch {}/test_file".format(mpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create file on client 1") + + # Verifying OCL as ON + option = "optimistic-change-log" + option_dict = get_volume_options(self.mnode, self.volname, option) + self.assertIsNotNone(option_dict, ("Failed to get %s volume option" + " for volume %s" + % (option, self.volname))) + self.assertEqual(option_dict['disperse.optimistic-change-log'], 'on', + ("%s is not ON for volume %s" % (option, + self.volname))) + g.log.info("Succesfully verified %s value for volume %s", + option, self.volname) + + # Repeat the test with eager-lock and other-eager-lock 'on' & 'off' + for lock_status in ('on', 'off'): + options = {'disperse.eager-lock': lock_status, + 'disperse.other-eager-lock': lock_status} + ret = set_volume_options(self.mnode, self.volname, options) + + self.assertTrue(ret, ("failed to set eagerlock and other " + "eagerlock value as %s " % lock_status)) + g.log.info("Successfully set eagerlock and other eagerlock value" + " to %s", lock_status) + + # Repeat the test for both the combinations of clients + for client_1, client_2 in list(itertools.permutations( + [self.mounts[0].client_system, + self.mounts[1].client_system], r=2)): + # Get lock to file from one client + lock_cmd = ("/usr/bin/env python {} -f {}/" + "test_file -t 30".format(self.script, mpoint)) + proc = g.run_async(client_1, lock_cmd) + time.sleep(5) + + # As the lock is been acquired by one client, + # try to get lock from the other + ret, _, _ = g.run(client_2, lock_cmd) + self.assertEqual(ret, 1, ("Unexpected: {} acquired the lock " + "before been released by {}" + .format(client_2, client_1))) + g.log.info("Expected : Lock can't be acquired by %s before " + "being released by %s", client_2, client_1) + + # Wait for first client to release the lock. + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, ("File lock process failed on %s:%s", + client_1, mpoint)) + + # Try taking the lock from other client and releasing it + lock_cmd = ("/usr/bin/env python {} -f " + "{}/test_file -t 1".format(self.script, mpoint)) + ret, _, _ = g.run(client_2, lock_cmd) + self.assertEqual(ret, 0, + ("Unexpected:{} Can't acquire the lock even " + "after its been released by {}" + .format(client_2, client_1))) + g.log.info("Successful, Lock acquired by %s after being " + "released by %s", client_2, client_1) + + def tearDown(self): + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup " + "Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/disperse/test_ec_data_delete.py b/tests/functional/disperse/test_ec_data_delete.py new file mode 100644 index 000000000..662a94b57 --- /dev/null +++ b/tests/functional/disperse/test_ec_data_delete.py @@ -0,0 +1,270 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests FOps and Data Deletion on a healthy EC volume +""" + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcDataDelete(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_data_delete(self): + """ + Test steps: + - Create directory dir1 + - Create 5 dir and 5 files in each dir in directory 1 + - Rename all file inside dir1 + - Truncate at any dir in mountpoint inside dir1 + - Create softlink and hardlink of files in mountpoint + - Delete op for deleting all file in one of the dirs + - chmod, chown, chgrp inside dir1 + - Create tiny, small, medium nd large file + - Creating files on client side for dir1 + - Validating IO's and waiting to complete + - Deleting dir1 + - Check .glusterfs/indices/xattrop is empty + - Check if brickpath is empty + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Get the bricks from the volume + bricks_list = get_all_bricks(self.mnode, self.volname) + g.log.info("Brick List : %s", bricks_list) + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start + 1))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + start += 5 + + # chmod, chown, chgrp inside dir1 + # start and end used as offset to access diff files + # at diff clients. + start, end = 2, 5 + for mount_obj in self.mounts: + dir_file_range = '%s..%s' % (str(start), str(end)) + cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing mode of files has failed") + g.log.info("Mode of files have been changed successfully") + + cmd = ('chown root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing owner of files has failed") + g.log.info("Owner of files have been changed successfully") + + cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing group of files has failed") + g.log.info("Group of files have been changed successfully") + start += 5 + end += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all IO's") + + # Deleting dir1 + cmd = ('rm -rf -v %s/dir1' % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to delete directory1") + g.log.info("Directory 1 deleted successfully for %s", self.mounts[0]) + + # Check .glusterfs/indices/xattrop is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s/.glusterfs/indices/xattrop/ | " + "grep -ve \"xattrop-\" | wc -l" % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), ".glusterfs/indices/" + "xattrop is not empty") + g.log.info("No pending heals on %s", brick) + + # Check if brickpath is empty + for brick in bricks_list: + brick_node, brick_path = brick.split(":") + cmd = ("ls -1 %s |wc -l " % brick_path) + ret, out, _ = g.run(brick_node, cmd) + self.assertEqual(0, int(out.strip()), "Brick path {} is not empty " + "in node {}".format(brick_path, brick_node)) + g.log.info("Brick path is empty in node %s", brick_node) diff --git a/tests/functional/disperse/test_ec_data_intergrity.py b/tests/functional/disperse/test_ec_data_intergrity.py new file mode 100644 index 000000000..5241e8d80 --- /dev/null +++ b/tests/functional/disperse/test_ec_data_intergrity.py @@ -0,0 +1,314 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests Data Consistency and Intergrity +""" +from random import sample +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs, collect_mounts_arequal +from glustolibs.gluster.brick_libs import (are_bricks_offline, + bring_bricks_offline, + bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list) +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.volume_libs import get_subvols + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcDataIntegrity(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def _bring_redundant_bricks_offline(self, mnode, volname): + """ + Bring redundant bricks offline + """ + brickset_to_offline = [] + # List two bricks in each subvol + all_subvols_dict = get_subvols(mnode, volname) + subvols = all_subvols_dict['volume_subvols'] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + brickset_to_offline.extend(sample(subvol, 2)) + + # Bring two bricks of each subvol down + ret = bring_bricks_offline(volname, brickset_to_offline) + self.assertTrue(ret, "Bricks are still online") + + # Validating the bricks are offline + ret = are_bricks_offline(mnode, volname, + brickset_to_offline) + self.assertTrue(ret, "Few of the bricks are still online in" + " {} ".format(brickset_to_offline)) + return brickset_to_offline + + def test_ec_data_integrity(self): + """ + Test steps: + - Create directory dir1 + - Create 5 dir and 5 files in each dir in directory 1 + - Rename all file inside dir1 + - Truncate at any dir in mountpoint inside dir1 + - Create softlink and hardlink of files in mountpoint + - chmod, chown, chgrp inside dir1 + - Create tiny, small, medium nd large file + - Creating files on client side for dir1 + - Validating IO's and waiting to complete + - Get arequal of dir1 + - Bring redundant bricks offline + - Get arequal of dir1 after 1st set of bricks down + - Bring redundant bricks offline + - Get arequal of dir1 after 2nd set of bricks down + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + brickset_to_offline = [] + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint + start = 1 + for mount_obj in self.mounts: + for link_type, ln_mode in (('softlink', 'ln -s'), + ('hardlink', 'ln')): + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do %s $FILENAME %s_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start), ln_mode, + link_type)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating %s have failed" % link_type) + g.log.info("%s of files created successfully", link_type) + start += 5 + + # chmod, chown, chgrp inside dir1 + # start and end used as offset to access diff files + # at diff clients. + start, end = 2, 5 + for mount_obj in self.mounts: + dir_file_range = '%s..%s' % (str(start), str(end)) + cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing mode of files has failed") + g.log.info("Mode of files have been changed successfully") + + cmd = ('chown root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing owner of files has failed") + g.log.info("Owner of files have been changed successfully") + + cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing group of files has failed") + g.log.info("Group of files have been changed successfully") + start += 5 + end += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + for size, filename in (('100', 'tiny_file'), ('20M', 'small_file'), + ('200M', 'medium_file'), + ('1G', 'large_file')): + cmd = 'fallocate -l {} {}{}.txt'.format(size, filename, offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for files failed") + g.log.info("Fallocate for files successfully") + offset += 1 + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all IO's") + + # Get arequal of dir1 + ret, result_before_bricks_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Bring redundant bricks offline + brickset_to_offline = self._bring_redundant_bricks_offline( + self.mnode, self.volname) + + # Get arequal of dir1 after 1st set of bricks down + ret, result_after_1st_brickset_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Bring bricks online + ret = bring_bricks_online(self.mnode, self.volname, + brickset_to_offline) + self.assertTrue(ret, 'Bricks not brought online') + g.log.info('Bricks are online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Bricks are not online") + g.log.info("EXPECTED : Bricks are online") + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Bring redundant bricks offline + brickset_to_offline = self._bring_redundant_bricks_offline( + self.mnode, self.volname) + + # Get arequal of dir1 after 2nd set of bricks down + ret, result_after_2nd_brickset_down = ( + collect_mounts_arequal(self.mounts[0], path='dir1/')) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal of dir1 ' + 'is successful') + + # Comparing arequals + self.assertEqual(result_before_bricks_down, + result_after_1st_brickset_down, + 'Arequals are not equals before brickset ' + 'down and after 1st brickset down') + g.log.info('Arequals are equals before brickset down ' + 'and after brickset down') + + self.assertEqual(result_after_2nd_brickset_down, + result_after_1st_brickset_down, + 'Arequals are not equals before 2nd set ' + 'brick down and after 1st set brick down') + g.log.info('Arequals are equals for 2nd brickset down ' + 'and 1st brickset down') diff --git a/tests/functional/disperse/test_ec_eager_lock_functional_validation.py b/tests/functional/disperse/test_ec_eager_lock_functional_validation.py new file mode 100644 index 000000000..b4fb4c9d9 --- /dev/null +++ b/tests/functional/disperse/test_ec_eager_lock_functional_validation.py @@ -0,0 +1,161 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Verify Eager lock reduces the number of locks + being taken when writing to the file continuosly +""" +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.profile_ops import (profile_start, profile_stop) +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.lib_utils import get_extended_attributes_info + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class EagerlockFunctionalValidationTest(GlusterBaseClass): + # Method to setup the environment for test case + + def setUp(self): + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=True) + if not ret: + raise ExecutionError("Failed to setup and mount volume") + + def _check_dirty_xattr(self, filename): + """Get trusted.ec.dirty xattr value to validate eagerlock behavior""" + # Find the hashed subvol of the file created + # for distributed disperse case + subvols_info = get_subvols(self.mnode, self.volname) + subvols_info = subvols_info['volume_subvols'] + if len(subvols_info) > 1: + _, hashed_subvol = find_hashed_subvol(subvols_info, + '', filename) + if hashed_subvol is None: + g.log.error("Error in finding hash value of %s", filename) + return None + else: + hashed_subvol = 0 + + # Collect ec.dirty xattr value from each brick + result = [] + for subvol in subvols_info[hashed_subvol]: + host, brickpath = subvol.split(':') + brickpath = brickpath + '/' + filename + ret = get_extended_attributes_info(host, [brickpath], + encoding='hex', + attr_name='trusted.ec.dirty') + ret = ret[brickpath]['trusted.ec.dirty'] + result.append(ret) + + # Check if xattr values are same across all bricks + if result.count(result[0]) == len(result): + return ret + g.log.error("trusted.ec.dirty value is not consistent across the " + "disperse set %s", result) + return None + + def _file_create_and_profile_info(self, status): + """Create a file and check the volume profile for inode lock count.""" + # Creating file + mountpoint = self.mounts[0].mountpoint + client = self.mounts[0].client_system + + filename = 'f1_EagerLock_' + status + cmd = ("dd if=/dev/urandom of=%s/%s bs=100M count=10" + % (mountpoint, filename)) + + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Failed to create file on mountpoint") + g.log.info("Successfully created files on mountpoint") + + # Getting and checking output of profile info. + cmd = "gluster volume profile %s info | grep -i INODELK" % self.volname + ret, rout, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to grep INODELK count from profile " + "info") + g.log.info("The lock counts on all bricks with eager-lock %s: %s", + status, rout) + + return filename + + def test_validate_profile_for_inodelk(self): + """ + Test Steps: + 1) Create an ecvolume and mount it + 2) Set the eagerlock option + 3) Create a 1GB file + 4) View the profile of the volume for INODELK count must be about + 2-10 locks for each brick. + 5) check backend bricks for trusted.ec.dirty xattr must be non-zero + 6) Disable the eagerlock option + 7) Repeat steps 3-5 and now dirty xattr must be zero and + INODELK count in range of 100-5k. + """ + + # Enable EagerLock + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock': 'on', + 'disperse.eager-lock-timeout': '10'}) + self.assertTrue(ret, "Failed to turn on eagerlock" + "on %s" % self.volname) + + # Start profile on volume. + ret, _, _ = profile_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start profile on volume: %s" + % self.volname) + + # Test behavior with EagerLock on + filename = self._file_create_and_profile_info("on") + self.assertIsNotNone(filename, "Failed to get filename") + + # Test dirty bit with EagerLock on + ret = self._check_dirty_xattr(filename) + self.assertEqual(ret, '0x00000000000000010000000000000001', + "Unexpected dirty xattr value is %s on %s" + % (ret, filename)) + + # Disable EagerLock + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock': 'off'}) + self.assertTrue(ret, "Failed to turn off eagerlock " + "on %s" % self.volname) + + # Test behavior with EagerLock off + filename = self._file_create_and_profile_info("off") + self.assertIsNotNone(filename, "Failed to get filename") + + # Test dirty bit with EagerLock off + ret = self._check_dirty_xattr(filename) + self.assertEqual(ret, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (ret, filename)) + + # Stop profile on volume. + ret, _, _ = profile_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop profile on volume: %s" + % self.volname) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") diff --git a/tests/functional/disperse/test_ec_eagerlock.py b/tests/functional/disperse/test_ec_eagerlock.py new file mode 100644 index 000000000..3da2d67b7 --- /dev/null +++ b/tests/functional/disperse/test_ec_eagerlock.py @@ -0,0 +1,264 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Verify Eagerlock and other-eagerlock behavior +""" +from unittest import SkipTest +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.dht_test_utils import find_hashed_subvol +from glustolibs.gluster.glusterdir import rmdir +from glustolibs.gluster.lib_utils import (append_string_to_file, + get_extended_attributes_info) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import (set_volume_options, + get_volume_options) +from glustolibs.misc.misc_libs import (yum_install_packages, + upload_scripts) + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class DisperseEagerlockTest(GlusterBaseClass): + # Method to setup the environment for test case + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + # Check for availability of atleast 4 clients + if len(cls.clients) < 4: + raise SkipTest("This test requires atleast 4 clients") + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients {}" + .format(cls.clients)) + # Install time package on all clients needed for measurement of ls + + ret = yum_install_packages(cls.clients, 'time') + if not ret: + raise ExecutionError("Failed to install TIME package on all nodes") + + def setUp(self): + """ + setUp method + """ + # Setup_Volume + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to setup and mount volume") + g.log.info("Volume %s has been setup successfully", self.volname) + + def _filecreate_and_hashcheck(self, timeoutval): + """Create a file and check on which subvol it is hashed to""" + # Create and write to a file to test the eagerlock timeout behavior + objectname = 'EagerLockTimeoutCheck-file-' + timeoutval + objectpath = ("{}/{}".format(self.mounts[0].mountpoint, objectname)) + ret = append_string_to_file(self.mounts[0].client_system, + objectpath, 'EagerLockTest') + self.assertTrue(ret, 'create and append of %s failed' % objectname) + ret = get_subvols(self.mnode, self.volname) + # Find the hashed subvol of the file created + if len(ret['volume_subvols']) > 1: + _, hashed_subvol = find_hashed_subvol(ret['volume_subvols'], + '', objectname) + if hashed_subvol is None: + g.log.error("Error in finding hash value of %s", objectname) + return None + return (objectname, ret['volume_subvols'], hashed_subvol) + # Set subvol to 0 for plain(non-distributed) disperse volume + hashed_subvol = 0 + return (objectname, ret['volume_subvols'], hashed_subvol) + + @staticmethod + def _get_dirty_xattr_value(ret, hashed_subvol, objectname): + """Get trusted.ec.dirty xattr value to validate eagerlock behavior""" + # Collect ec.dirty xattr value from each brick + hashvals = [] + for subvol in ret[hashed_subvol]: + host, brickpath = subvol.split(':') + brickpath = brickpath + '/' + objectname + ret = get_extended_attributes_info(host, [brickpath], + encoding='hex', + attr_name='trusted.ec.dirty') + ret = ret[brickpath]['trusted.ec.dirty'] + hashvals.append(ret) + # Check if xattr values are same across all bricks + if hashvals.count(hashvals[0]) == len(hashvals): + del hashvals + return ret + g.log.error("trusted.ec.dirty value is not consistent across the " + "disperse set %s", hashvals) + return None + + def _change_eagerlock_timeouts(self, timeoutval): + """Change eagerlock and other-eagerlock timeout values as per input""" + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock-timeout': timeoutval, + 'disperse.other-eager-lock-timeout': + timeoutval}) + self.assertTrue(ret, 'failed to change eager-lock timeout values to ' + '%s sec on %s' % (timeoutval, self.volname)) + g.log.info("SUCCESS:Changed eager-lock timeout vals to %s sec on %s", + timeoutval, self.volname) + + def _file_dir_create(self, clients, mountpoint): + """Create Directories and files which will be used for + checking response time of lookups""" + client = choice(clients) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num 0 " + "--dir-depth 2 " + "--dir-length 4 " + "--max-num-of-dirs 4 " + "--num-of-files 100 %s" % (self.script_upload_path, mountpoint)) + ret, _, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "FAILED to create data needed for lookups") + + @staticmethod + def _lookup_response_time(clients, mountpoint): + """ Check lookup response time which should be around 2-3 sec """ + # Sleeping to allow some cache timeout + sleep(60) + cmd = '/usr/bin/time -f "%e" ls -lRt ' + mountpoint + ' >>/dev/null' + results = g.run_parallel(clients, cmd) + # Checking the actual time taken for lookup + for ret_values in results.values(): + _, _, ret = ret_values + calc = float(ret.strip()) + if calc > 2: + g.log.error("lookups taking more than 2 seconds." + " Actual time: %s", calc) + + def _rmdir_on_mountpoint(self, clients, mountpoint): + """ Perform rm of created files as part of Sanity Check """ + # Skipping below lines of code as running rm -rf parallely + # from multiple clients is a known bug Refer BZ-1787328 + # cmd = 'rm -rf ' + mountpoint + # results = g.run_parallel(clients, cmd) + # for client, ret_values in results.items(): + # ret, out, err = ret_values + # self.assertEqual(ret, 0, "rm -rf failed on %s with %s" + # % (client, err)) + ret = rmdir(choice(clients), mountpoint + '/*', force=True) + self.assertTrue(ret, "rm -rf failed") + ret, out, err = g.run(choice(clients), 'ls ' + mountpoint) + self.assertEqual((ret, out, err), (0, '', ''), + "Some entries still exist even after rm -rf ;" + " the entries are %s and error msg is %s" + % (out, err)) + g.log.info("rm -rf was successful") + + def test_eagerlock(self): + """ + Test Steps: + 1) Create an ecvolume + 2) Test EagerLock and Other-EagerLock default values and timeout-values + 3) Set the timeout values to 60 + 4) Write to a file and check backend brick for + "trusted.ec.dirty=0x00000000000000000000000000000000", must be non-zero + 5) Create some dirs and files in each dir + 6) Do ls -lRt * --> must not take more than 2-3sec + 7) disable eager lock + 8) retest write to a file and this time lock must be released + immediately with dirty.xattr value all zeros + """ + # Get list of clients + clients = [] + for mount_obj in self.mounts: + clients.append(mount_obj.client_system) + mountpoint = mount_obj.mountpoint + + # Check if EC Eagerlock set of options enabled with correct values + ret = get_volume_options(self.mnode, self.volname) + self.assertTrue(bool((ret['disperse.eager-lock'] == + ret['disperse.other-eager-lock'] == 'on') and + (ret['disperse.eager-lock-timeout'] == + ret['disperse.other-eager-lock-timeout'] == + '1')), + 'Some EC-eagerlock options set are not correct') + # Test behavior with default timeout value of 1sec + objectname, ret, hashed_subvol = self._filecreate_and_hashcheck('1sec') + sleep(2) + ret = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(ret, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (ret, objectname)) + self._file_dir_create(clients, mountpoint) + # Now test the performance issue wrt lookups + self._lookup_response_time(clients, mountpoint) + # Do rm -rf of created data as sanity test + self._rmdir_on_mountpoint(clients, mountpoint) + + # Increasing timeout values to 60sec in order to test the functionality + self._change_eagerlock_timeouts('60') + self._file_dir_create(clients, mountpoint) + objectname, ret, hashed_subvol =\ + self._filecreate_and_hashcheck('60seconds') + # Check in all the bricks "trusted.ec.dirty" value + # It should be "0x00000000000000010000000000000001" + _ = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(_, '0x00000000000000010000000000000001', + "Unexpected dirty xattr value %s on %s" + % (_, objectname)) + # Sleep 60sec after which dirty_val should reset to "0x00000..." + sleep(62) + _ = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(_, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (_, objectname)) + # Test the performance issue wrt lookups + self._lookup_response_time(clients, mountpoint) + # Do rm -rf of created data as sanity test + self._rmdir_on_mountpoint(clients, mountpoint) + + # Disable EagerLock and other-Eagerlock + ret = set_volume_options(self.mnode, self.volname, + {'disperse.eager-lock': 'off', + 'disperse.other-eager-lock': 'off'}) + self.assertTrue(ret, "failed to turn off eagerlock and " + "other eagerlock on %s" % self.volname) + g.log.info("SUCCESS: Turned off eagerlock and other-eagerlock on %s", + self.volname) + # Again create same dataset and retest ls -lRt, shouldnt take much time + self._file_dir_create(clients, mountpoint) + # Create a new file see the dirty flag getting unset immediately + objectname, ret, hashed_subvol = self._filecreate_and_hashcheck( + 'Eagerlock_Off') + # Check in all the bricks "trusted.ec.dirty value" + # It should be "0x00000000000000000000000000000000" + ret = self._get_dirty_xattr_value(ret, hashed_subvol, objectname) + self.assertEqual(ret, '0x00000000000000000000000000000000', + "Unexpected dirty xattr value is %s on %s" + % (ret, objectname)) + # Test the performance issue wrt ls + self._lookup_response_time(clients, mountpoint) + # Cleanup created data as sanity test + self._rmdir_on_mountpoint(clients, mountpoint) + + def tearDown(self): + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") diff --git a/tests/functional/disperse/test_ec_file_rename_on_brick_down.py b/tests/functional/disperse/test_ec_file_rename_on_brick_down.py new file mode 100644 index 000000000..be82ceeed --- /dev/null +++ b/tests/functional/disperse/test_ec_file_rename_on_brick_down.py @@ -0,0 +1,221 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-131 USA. + +from random import choice +from time import sleep + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import get_all_bricks, bring_bricks_offline +from glustolibs.gluster.volume_libs import volume_start +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestECRenameFilesOnBrickDown(GlusterBaseClass): + + # pylint: disable=too-many-statements,too-many-locals + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Remove on fixing BZ 1596165 + if 'dispersed' in self.volname: + self.skipTest("Test will fail due to BZ 1596165") + + # Setup and mount volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Unmount and cleanup volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + + self.get_super_method(self, 'tearDown')() + + def create_links(self, client, path): + + # Soft links + for i in range(4, 7): + ret = create_link_file(client, + '{}/file{}_or'.format(path, i), + '{}/file{}_sl'.format(path, i), soft=True) + self.assertTrue(ret, "Fail: Not able to create soft link for " + "{}/file{}_or".format(path, i)) + g.log.info("Created soft links for files successfully") + + # Hard links + for i in range(7, 10): + ret = create_link_file(client, + '{}/file{}_or'.format(path, i), + '{}/file{}_hl'.format(path, i),) + self.assertTrue(ret, "Fail: Not able to create hard link for " + "{}/file{}_or".format(path, i)) + g.log.info("Created hard links for files successfully") + + def test_ec_rename_files_with_brick_down(self): + """ + Description: Test to check no errors on file/dir renames when one of + the bricks is down in the volume. + Steps: + 1. Create an EC volume + 2. Mount the volume using FUSE on two different clients + 3. Create ~9 files from one of the client + 4. Create ~9 dir with ~9 files each from another client + 5. Create soft-links, hard-links for file{4..6}, file{7..9} + 6. Create soft-links for dir{4..6} + 7. Begin renaming the files, in multiple iterations + 8. Bring down a brick while renaming the files + 9. Bring the brick online after renaming some of the files + 10. Wait for renaming of the files + 11. Validate no data loss and files are renamed successfully + """ + + # Creating ~9 files from client 1 on mount + m_point = self.mounts[0].mountpoint + cmd = 'cd %s; touch file{1..9}_or' % m_point + ret, _, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Fail: Not able to create files on " + "{}".format(m_point)) + g.log.info("Files created successfully on mount point") + + # Creating 9 dir X 9 files in each dir from client 2 + cmd = ('cd %s; mkdir -p dir{1..9}_or; touch ' + 'dir{1..9}_or/file{1..9}_or' % m_point) + ret, _, _ = g.run(self.clients[1], cmd) + self.assertEqual(ret, 0, "Fail: Not able to create dir with files on " + "{}".format(m_point)) + g.log.info("Dirs with files are created successfully on mount point") + + # Create required soft links and hard links from client 1 on mount + client, path = self.clients[0], m_point + self.create_links(client, path) + + client = self.clients[1] + for i in range(1, 10): + + # Create required soft and hard links in nested dirs + path = '{}/dir{}_or'.format(m_point, i) + self.create_links(client, path) + + # Create soft links for dirs + path = m_point + for i in range(4, 7): + ret = create_link_file(client, + '{}/dir{}_or'.format(path, i), + '{}/dir{}_sl'.format(path, i), soft=True) + self.assertTrue(ret, "Fail: Not able to create soft link for " + "{}/dir{}_or".format(path, i)) + g.log.info("Created nested soft and hard links for files successfully") + + # Calculate all file count against each section orginal, hard, soft + # links + cmd = ('cd %s; arr=(or sl hl); ' + 'for i in ${arr[*]}; do find . -name "*$i" | wc -l ; ' + 'done; ' % m_point) + ret, out, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Not able get list of soft and hard links " + "created on the mount point") + all_org, all_soft, all_hard = out.split() + + # Rename 2 out of 3 dir's soft links from client 1 + client = self.clients[0] + cmd = ('cd %s; sl=0; ' + 'for line in `ls -R | grep -P "dir(4|5)_sl"`; ' + 'do mv -f "$line" "$line""_renamed"; ((sl++)); done; ' + 'echo $sl;' % m_point) + ret, out, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Not able to rename directory soft links") + temp_soft = out.strip() + + # Start renaming original files from client 1 and + # softlinks, hardlinks from client 2 + cmd = ('cd %s; arr=(. dir{1..9}_or); or=0; ' + 'for item in ${arr[*]}; do ' + 'cd $item; ' + 'for line in `ls | grep -P "file(1|2)_or"`; ' + 'do mv -f "$line" "$line""_renamed"; ((or++)); sleep 2; done;' + 'cd - > /dev/null; sleep 1; done; echo $or ' % m_point) + proc_or = g.run_async(client, cmd) + + client = self.clients[1] + cmd = ('cd %s; arr=(. dir{1..9}_or); sl=0; hl=0; ' + 'for item in ${arr[*]}; do ' + 'cd $item; ' + 'for line in `ls | grep -P "file(4|5)_sl"`; ' + 'do mv -f "$line" "$line""_renamed"; ((sl++)); sleep 1; done; ' + 'for line in `ls | grep -P "file(7|8)_hl"`; ' + 'do mv -f "$line" "$line""_renamed"; ((hl++)); sleep 1; done; ' + 'cd - > /dev/null; sleep 1; done; echo $sl $hl; ' % m_point) + proc_sl_hl = g.run_async(client, cmd) + + # Wait for some files to be renamed + sleep(20) + + # Kill one of the bricks + brick_list = get_all_bricks(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, choice(brick_list)) + self.assertTrue(ret, "Failed to bring one of the bricks offline") + + # Wait for some more files to be renamed + sleep(20) + + # Bring brick online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Not able to start Volume with force option") + + # Wait for rename to complete and take count of file operations + ret, out, _ = proc_or.async_communicate() + self.assertEqual(ret, 0, "Fail: Origianl files are not renamed") + ren_org = out.strip() + + ret, out, _ = proc_sl_hl.async_communicate() + self.assertEqual(ret, 0, "Fail: Soft and Hard links are not renamed") + ren_soft, ren_hard = out.strip().split() + ren_soft = str(int(ren_soft) + int(temp_soft)) + + # Count actual data of renaming links/files + cmd = ('cd %s; arr=(or or_renamed sl sl_renamed hl hl_renamed); ' + 'for i in ${arr[*]}; do find . -name "*$i" | wc -l ; ' + 'done; ' % m_point) + ret, out, _ = g.run(client, cmd) + self.assertEqual(ret, 0, "Not able to get count of original and link " + "files after brick was brought up") + (act_org, act_org_ren, act_soft, + act_soft_ren, act_hard, act_hard_ren) = out.split() + + # Validate count of expected and actual rename of + # links/files is matching + for exp, act, msg in ((ren_org, act_org_ren, 'original'), + (ren_soft, act_soft_ren, 'soft links'), + (ren_hard, act_hard_ren, 'hard links')): + self.assertEqual(exp, act, "Count of {} files renamed while brick " + "was offline is not matching".format(msg)) + + # Validate no data is lost in rename process + for exp, act, msg in ( + (int(all_org)-int(act_org_ren), int(act_org), 'original'), + (int(all_soft)-int(act_soft_ren), int(act_soft), 'soft links'), + (int(all_hard)-int(act_hard_ren), int(act_hard), 'hard links'), + ): + self.assertEqual(exp, act, "Count of {} files which are not " + "renamed while brick was offline " + "is not matching".format(msg)) diff --git a/tests/functional/disperse/test_ec_heal_on_file_appends.py b/tests/functional/disperse/test_ec_heal_on_file_appends.py new file mode 100644 index 000000000..b39e6dc0f --- /dev/null +++ b/tests/functional/disperse/test_ec_heal_on_file_appends.py @@ -0,0 +1,186 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +from time import sleep + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import ( + bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + validate_xattr_on_all_bricks, + get_online_bricks_list) +from glustolibs.gluster.glusterfile import get_file_stat +from glustolibs.misc.misc_libs import kill_process + + +@runs_on([['dispersed'], ['glusterfs']]) +class TestHealOnFileAppends(GlusterBaseClass): + """ + Test to verify heal on dispersed volume on file appends + """ + + def setUp(self): + + self.get_super_method(self, 'setUp')() + self.mount_obj = self.mounts[0] + self.client = self.mount_obj.client_system + + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=[self.mount_obj]) + if not ret: + raise ExecutionError("Failed to create and mount volume") + g.log.info("Created and Mounted volume successfully") + + self.offline_bricks = [] + self.is_io_started = False + self.file_name = 'test_file' + + def tearDown(self): + + # Kill the IO on client + if self.is_io_started: + ret = kill_process(self.client, process_names=[self.file_name]) + if not ret: + raise ExecutionError("Not able to kill/stop IO in client") + g.log.info('Successfully stopped IO in client') + + if self.offline_bricks: + ret = bring_bricks_online(self.mnode, self.volname, + self.offline_bricks) + if not ret: + raise ExecutionError(ret, 'Not able to bring bricks {} ' + 'online'.format(self.offline_bricks)) + + # Cleanup and unmount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mount_obj]) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + g.log.info("Unmount and Cleanup of volume is successful") + + self.get_super_method(self, 'tearDown')() + + def test_heal_on_file_appends(self): + """ + Test steps: + - create and mount EC volume 4+2 + - start append to a file from client + - bring down one of the bricks (say b1) + - wait for ~minute and bring down another brick (say b2) + - after ~minute bring up first brick (b1) + - check the xattrs 'ec.size', 'ec.version' + - xattrs of online bricks should be same as an indication to heal + """ + + # Get bricks list + bricks_list = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(bricks_list, 'Not able to get bricks list') + + # Creating a file, generate and append data to the file + self.file_name = 'test_file' + cmd = ("cd %s ;" + "while true; do " + "cat /dev/urandom | tr -dc [:space:][:print:] " + "| head -c 4K >> %s; sleep 2; " + "done;" + % (self.mount_obj.mountpoint, self.file_name)) + ret = g.run_async(self.client, cmd, + user=self.mount_obj.user) + self.assertIsNotNone(ret, "Not able to start IO on client") + g.log.info('Started generating and appending data to the file') + self.is_io_started = True + + # Select 3 bricks, 2 need to be offline and 1 will be healthy + brick_1, brick_2, brick_3 = sample(bricks_list, 3) + + # Wait for IO to fill the bricks + sleep(30) + + # Bring first brick offline and validate + ret = bring_bricks_offline(self.volname, [brick_1]) + self.assertTrue( + ret, 'Failed to bring brick {} offline'.format(brick_1)) + ret = are_bricks_offline(self.mnode, self.volname, [brick_1]) + self.assertTrue(ret, 'Not able to validate brick {} being ' + 'offline'.format(brick_1)) + g.log.info("Brick %s is brought offline successfully", brick_1) + self.offline_bricks.append(brick_1) + + # Wait for IO to fill the bricks + sleep(30) + + # Bring second brick offline and validate + ret = bring_bricks_offline(self.volname, [brick_2]) + self.assertTrue( + ret, 'Failed to bring brick {} offline'.format(brick_2)) + ret = are_bricks_offline(self.mnode, self.volname, [brick_2]) + self.assertTrue(ret, 'Not able to validate brick {} being ' + 'offline'.format(brick_2)) + g.log.info("Brick %s is brought offline successfully", brick_2) + self.offline_bricks.append(brick_2) + + # Wait for IO to fill the bricks + sleep(30) + + # Bring first brick online and validate peer status + ret = bring_bricks_online( + self.mnode, + self.volname, + [brick_1], + bring_bricks_online_methods=['glusterd_restart']) + self.assertTrue(ret, 'Not able to bring brick {} ' + 'online'.format(brick_1)) + g.log.info("Offlined brick %s is brought online successfully", brick_1) + ret = self.validate_peers_are_connected() + self.assertTrue(ret, "Peers are not in connected state after bringing " + "an offline brick to online via `glusterd restart`") + g.log.info("Successfully validated peers are in connected state") + + # To catchup onlined brick with healthy bricks + sleep(30) + + # Validate the xattr to be same on onlined and healthy bric + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, 'Unable to fetch online bricks') + g.log.info('All online bricks are fetched successfully') + for xattr in ('trusted.ec.size', 'trusted.ec.version'): + ret = validate_xattr_on_all_bricks( + [brick_1, brick_3], self.file_name, xattr) + self.assertTrue(ret, "{} is not same on all online " + "bricks".format(xattr)) + + # Get epoch time on the client + ret, prev_ctime, _ = g.run(self.client, 'date +%s') + self.assertEqual(ret, 0, 'Not able to get epoch time from client') + + # Headroom for file ctime to get updated + sleep(5) + + # Validate file was being apended while checking for xattrs + ret = get_file_stat( + self.client, + '{}/{}'.format(self.mount_obj.mountpoint, self.file_name)) + self.assertIsNotNone(ret, "Not able to get stats of the file") + curr_ctime = ret['epoch_ctime'] + self.assertGreater(int(curr_ctime), int(prev_ctime), "Not able " + "to validate data is appended to the file " + "while checking for xaatrs") + + g.log.info("Data on all online bricks is healed and consistent") diff --git a/tests/functional/disperse/test_ec_io_continuity.py b/tests/functional/disperse/test_ec_io_continuity.py new file mode 100644 index 000000000..2a1510ce0 --- /dev/null +++ b/tests/functional/disperse/test_ec_io_continuity.py @@ -0,0 +1,215 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from datetime import datetime, timedelta +from time import sleep + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import bring_bricks_offline +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import create_link_file +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.heal_ops import heal_info +from glustolibs.gluster.volume_libs import get_subvols, volume_start +from glustolibs.io.utils import (collect_mounts_arequal, validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs', 'nfs']]) +class TestIOsOnECVolume(GlusterBaseClass): + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + cls.script_path = '/usr/share/glustolibs/io/scripts' + for file_ops in ('file_dir_ops.py', 'fd_writes.py'): + ret = upload_scripts(cls.clients, + '{}/{}'.format(cls.script_path, file_ops)) + if not ret: + raise ExecutionError('Failed to upload IO scripts to client') + + def setUp(self): + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + self.all_mounts_procs = [] + if not ret: + raise ExecutionError('Failed to setup and mount volume') + + def tearDown(self): + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, + [self.mounts[1]] * + len(self.all_mounts_procs)) + if not ret: + raise ExecutionError('Wait for IO completion failed on some ' + 'of the clients') + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Not able to unmount and cleanup volume") + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_online_and_monitor_heal(self, bricks): + """Bring the bricks online and monitor heal until completion""" + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, 'Not able to force start volume') + ret = monitor_heal_completion(self.mnode, + self.volname, + bricks=list(bricks)) + self.assertTrue(ret, 'Heal is not complete for {}'.format(bricks)) + + # pylint: disable=too-many-locals + def test_io_with_cyclic_brick_down(self): + """ + Description: To check heal process on EC volume when brick is brought + down in a cyclic fashion + Steps: + - Create, start and mount an EC volume in two clients + - Create multiple files and directories including all file types on one + directory from client 1 + - Take arequal check sum of above data + - Create another folder and pump different fops from client 2 + - Fail and bring up redundant bricks in a cyclic fashion in all of the + subvols maintaining a minimum delay between each operation + - In every cycle create new dir when brick is down and wait for heal + - Validate heal info on volume when brick down erroring out instantly + - Validate arequal on brining the brick offline + """ + + # Create a directory structure on mount from client 1 + mount_obj = self.mounts[0] + cmd = ('/usr/bin/env python {}/file_dir_ops.py ' + 'create_deep_dirs_with_files --dir-depth 3 ' + '--max-num-of-dirs 5 --fixed-file-size 10k ' + '--num-of-files 9 {}'.format( + self.script_path, + mount_obj.mountpoint, + )) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertEqual(ret, 0, 'Not able to create directory structure') + dir_name = 'user1' + for i in range(5): + ret = create_link_file( + mount_obj.client_system, + '{}/{}/testfile{}.txt'.format(mount_obj.mountpoint, dir_name, + i), + '{}/{}/testfile{}_sl.txt'.format(mount_obj.mountpoint, + dir_name, i), + soft=True) + self.assertTrue(ret, 'Not able to create soft links') + for i in range(5, 9): + ret = create_link_file( + mount_obj.client_system, + '{}/{}/testfile{}.txt'.format(mount_obj.mountpoint, dir_name, + i), + '{}/{}/testfile{}_hl.txt'.format(mount_obj.mountpoint, + dir_name, i)) + self.assertTrue(ret, 'Not able to create hard links') + g.log.info('Successfully created directory structure consisting all ' + 'file types on mount') + + # Take note of arequal checksum + ret, exp_arequal = collect_mounts_arequal(mount_obj, path=dir_name) + self.assertTrue(ret, 'Failed to get arequal checksum on mount') + + # Get all the subvols in the volume + subvols = get_subvols(self.mnode, self.volname) + self.assertTrue(subvols.get('volume_subvols'), 'Not able to get ' + 'subvols of the volume') + + # Create a dir, pump IO in that dir, offline b1, wait for IO and + # online b1, wait for heal of b1, bring b2 offline... + m_point, m_client = (self.mounts[1].mountpoint, + self.mounts[1].client_system) + cur_off_bricks = '' + for count, off_brick in enumerate(zip(*subvols.get('volume_subvols')), + start=1): + + # Bring offline bricks online by force starting volume + if cur_off_bricks: + self._bring_bricks_online_and_monitor_heal(cur_off_bricks) + + # Create a dir for running IO + ret = mkdir(m_client, '{}/dir{}'.format(m_point, count)) + self.assertTrue( + ret, 'Not able to create directory for ' + 'starting IO before offline of brick') + + # Start IO in the newly created directory + cmd = ('/usr/bin/env python {}/fd_writes.py -n 10 -t 480 -d 5 -c ' + '16 --dir {}/dir{}'.format(self.script_path, m_point, + count)) + proc = g.run_async(m_client, cmd) + self.all_mounts_procs.append(proc) + + # Wait IO to partially fill the dir + sleep(10) + + # Bring a single brick offline from all of subvols + ret = bring_bricks_offline(self.volname, list(off_brick)) + self.assertTrue(ret, + 'Not able to bring {} offline'.format(off_brick)) + + # Validate heal info errors out, on brining bricks offline in < 5s + start_time = datetime.now().replace(microsecond=0) + ret, _, _ = heal_info(self.mnode, self.volname) + end_time = datetime.now().replace(microsecond=0) + self.assertEqual( + ret, 0, 'Not able to query heal info status ' + 'of volume when a brick is offline') + self.assertLess( + end_time - start_time, timedelta(seconds=5), + 'Query of heal info of volume when a brick is ' + 'offline is taking more than 5 seconds') + + # Wait for some more IO to fill dir + sleep(10) + + # Validate arequal on initial static dir + ret, act_arequal = collect_mounts_arequal(mount_obj, path=dir_name) + self.assertTrue( + ret, 'Failed to get arequal checksum on bringing ' + 'a brick offline') + self.assertEqual( + exp_arequal, act_arequal, 'Mismatch of arequal ' + 'checksum before and after killing a brick') + + cur_off_bricks = off_brick + + # Take note of ctime on mount + ret, prev_ctime, _ = g.run(m_client, 'date +%s') + self.assertEqual(ret, 0, 'Not able to get epoch time from client') + + self._bring_bricks_online_and_monitor_heal(cur_off_bricks) + + # Validate IO was happening during brick operations + # and compare ctime of recent file to current epoch time + ret = validate_io_procs(self.all_mounts_procs, + [self.mounts[0]] * len(self.all_mounts_procs)) + self.assertTrue(ret, 'Not able to validate completion of IO on mounts') + self.all_mounts_procs *= 0 # don't validate IO in tearDown + ret, curr_ctime, _ = g.run( + m_client, "find {} -printf '%C@\n' -type f | " + 'sort -r | head -n 1'.format(m_point)) + self.assertEqual( + ret, 0, 'Not able to get ctime of last edited file from the mount') + self.assertGreater( + float(curr_ctime), float(prev_ctime), 'Not able ' + 'to validate IO was happening during brick operations') + + g.log.info('Completed IO continuity test on EC volume successfully') diff --git a/tests/functional/disperse/test_ec_lookup_and_move_operations.py b/tests/functional/disperse/test_ec_lookup_and_move_operations.py new file mode 100644 index 000000000..f9925bfae --- /dev/null +++ b/tests/functional/disperse/test_ec_lookup_and_move_operations.py @@ -0,0 +1,259 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +from unittest import SkipTest + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline, + are_bricks_online) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs', 'nfs']]) +class TestEcLookupAndMoveOperations(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Check for availability of atleast 3 clients + if len(cls.clients) < 3: + raise SkipTest("This test requires atleast 3 clients") + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ( + "/usr/share/glustolibs/io/scripts/file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + if self.mount_procs: + ret = wait_for_io_to_complete(self.mount_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + def _run_create_files(self, file_count, base_name, mpoint, client): + """Run create files using file_dir_op.py""" + cmd = ("/usr/bin/env python {} create_files -f {} --fixed-file-size" + " 1k --base-file-name {} {}".format(self.script_upload_path, + file_count, base_name, + mpoint)) + proc = g.run_async(client, cmd) + self.mount_procs.append(proc) + + def test_ec_lookup_and_move_operations_all_bricks_online(self): + """ + Test Steps: + 1. Create volume and mount the volume on 3 clients, c1(client1), + c2(client2), and, c3(client3) + 2. On c1, mkdir /c1/dir + 3. On c2, Create 4000 files on mount point i.e. "/" + 4. After step 3, Create next 4000 files on c2 on mount point i.e. "/" + 5. On c1 Create 10000 files on /dir/ + 6. On c3 start moving 4000 files created on step 3 from mount point + to /dir/ + 7. On c3, start ls in a loop for 20 iterations + """ + # Create directory on client1 + dir_on_mount = self.mounts[0].mountpoint + '/dir' + ret = mkdir(self.mounts[0].client_system, dir_on_mount) + self.assertTrue(ret, "unable to create directory on client" + "1 {}".format(self.mounts[0].client_system)) + g.log.info("Directory created on %s successfully", + self.mounts[0].client_system) + + # Create 4000 files on the mountpoint of client2 + cmd = ("/usr/bin/env python {} create_files -f 4000" + " --fixed-file-size 10k --base-file-name file_from_client2_" + " {}".format(self.script_upload_path, + self.mounts[1].mountpoint)) + ret, _, err = g.run(self.mounts[1].client_system, cmd) + self.assertEqual(ret, 0, "File creation on {} failed with {}". + format(self.mounts[1].client_system, err)) + g.log.info("File creation successful on %s", + self.mounts[1].client_system) + + # Next IO to be ran in the background so using mount_procs list + self.mount_procs = [] + # Create next 4000 files on the mountpoint of client2 + self._run_create_files(file_count=4000, + base_name="files_on_client2_background_", + mpoint=self.mounts[1].mountpoint, + client=self.mounts[1].client_system) + + # Create 10000 files from client 1 on dir1 + self._run_create_files(file_count=10000, + base_name="files_on_client1_background_", + mpoint=dir_on_mount, + client=self.mounts[0].client_system) + + # Move the files created on client2 to dir from client3 + cmd = ("for i in `seq 0 3999`; do mv {}/file_from_client2_$i.txt {}; " + "done".format(self.mounts[2].mountpoint, dir_on_mount)) + proc = g.run_async(self.mounts[2].client_system, cmd) + self.mount_procs.append(proc) + + # Perform a lookup in loop from client3 for 20 iterations + cmd = ("ls -R {}".format(self.mounts[2].mountpoint)) + counter = 20 + while counter: + ret, _, err = g.run(self.mounts[2].client_system, cmd) + self.assertEqual(ret, 0, "ls while mv operation being carried" + " failed with {}".format(err)) + g.log.debug("ls successful for the %s time", 21-counter) + counter -= 1 + + self.assertTrue(validate_io_procs(self.mount_procs, self.mounts), + "IO failed on the clients") + # Emptying mount_procs for not validating IO in tearDown + self.mount_procs *= 0 + + def test_ec_lookup_and_move_operations_few_bricks_are_offline(self): + """ + Test Steps: + 1. Mount this volume on 3 mount point, c1, c2, and c3 + 2. Bring down two bricks offline in each subvol. + 3. On client1: under dir1 create files f{1..10000} run in background + 4. On client2: under root dir of mountpoint touch x{1..1000} + 5. On client3: after step 4 action completed, start creating + x{1001..10000} + 6. Bring bricks online which were offline(brought up all the bricks + which were down (2 in each of the two subvols) + 7. While IO on Client1 and Client3 were happening, On client2 move all + the x* files into dir1 + 8. Perform lookup from client 3 + """ + # List two bricks in each subvol + all_subvols_dict = get_subvols(self.mnode, self.volname) + subvols = all_subvols_dict['volume_subvols'] + bricks_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + bricks_to_bring_offline.extend(sample(subvol, 2)) + + # Bring two bricks of each subvol offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, "Bricks are still online") + g.log.info("Bricks are offline %s", bricks_to_bring_offline) + + # Validating the bricks are offline or not + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Few of the bricks are still online in" + " {} in".format(bricks_to_bring_offline)) + g.log.info("%s bricks are offline as expected", + bricks_to_bring_offline) + + # Create directory on client1 + dir_on_mount = self.mounts[0].mountpoint + '/dir1' + ret = mkdir(self.mounts[0].client_system, dir_on_mount) + self.assertTrue(ret, "unable to create directory on client" + " 1 {}".format(self.mounts[0].client_system)) + g.log.info("Dir1 created on %s successfully", + self.mounts[0].client_system) + + # Next IO to be ran in the background so using mount_procs + # and run_async. + self.mount_procs = [] + + # On client1: under dir1 create files f{1..10000} run in background + self._run_create_files(file_count=10000, base_name="f_", + mpoint=dir_on_mount, + client=self.mounts[0].client_system) + + # On client2: under root dir of the mountpoint touch x{1..1000} + cmd = ("/usr/bin/env python {} create_files -f 1000 --fixed-file-size" + " 10k --base-file-name x {}".format(self.script_upload_path, + self.mounts[1].mountpoint)) + ret, _, err = g.run(self.mounts[1].client_system, cmd) + self.assertEqual(ret, 0, "File creation failed on {} with {}". + format(self.mounts[1].client_system, err)) + g.log.info("File creation successful on %s", + self.mounts[1].client_system) + + # On client3: start creating x{1001..10000} + cmd = ("cd {}; for i in `seq 1000 10000`; do touch x$i; done; " + "cd -".format(self.mounts[2].mountpoint)) + proc = g.run_async(self.mounts[2].client_system, cmd) + self.mount_procs.append(proc) + + # Bring bricks online with volume start force + ret, _, err = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, err) + g.log.info("Volume: %s started successfully", self.volname) + + # Check whether bricks are online or not + ret = are_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Bricks {} are still offline". + format(bricks_to_bring_offline)) + g.log.info("Bricks %s are online now", bricks_to_bring_offline) + + # From client2 move all the files with name starting with x into dir1 + cmd = ("for i in `seq 0 999`; do mv {}/x$i.txt {}; " + "done".format(self.mounts[1].mountpoint, dir_on_mount)) + proc = g.run_async(self.mounts[1].client_system, cmd) + self.mount_procs.append(proc) + + # Perform a lookup in loop from client3 for 20 iterations + cmd = ("ls -R {}".format(self.mounts[2].mountpoint)) + counter = 20 + while counter: + ret, _, err = g.run(self.mounts[2].client_system, cmd) + self.assertEqual(ret, 0, "ls while mv operation being carried" + " failed with {}".format(err)) + g.log.debug("ls successful for the %s time", 21-counter) + counter -= 1 + + self.assertTrue(validate_io_procs(self.mount_procs, self.mounts), + "IO failed on the clients") + # Emptying mount_procs for not validating IO in tearDown + self.mount_procs *= 0 + + # Wait for heal to complete + ret = monitor_heal_completion(self.mnode, self.volname,) + self.assertTrue(ret, "Heal didn't completed in the expected time") + g.log.info("Heal completed successfully on %s volume", self.volname) diff --git a/tests/functional/disperse/test_ec_open_fd.py b/tests/functional/disperse/test_ec_open_fd.py new file mode 100644 index 000000000..218713c6f --- /dev/null +++ b/tests/functional/disperse/test_ec_open_fd.py @@ -0,0 +1,174 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests open FD heal for EC volume +""" + +import os +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.brick_libs import (bring_bricks_online, + bring_bricks_offline, + validate_xattr_on_all_bricks) +from glustolibs.gluster.heal_ops import disable_heal +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_libs import (get_subvols, + log_volume_info_and_status) +from glustolibs.gluster.glusterfile import check_if_pattern_in_file +from glustolibs.io.utils import open_file_fd + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcOpenFd(GlusterBaseClass): + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + def test_ec_open_fd(self): + """ + Test Steps: + - disable server side heal + - Create a file + - Set volume option to implement open FD on file + - Bring a brick down,say b1 + - Open FD on file + - Bring brick b1 up + - write to open FD file + - Monitor heal + - Check xattr , ec.version and ec.size of file + - Check stat of file + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + + mountpoint = self.mounts[0].mountpoint + + # Disable server side heal + ret = disable_heal(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to disable server side heal")) + g.log.info("Successfully disabled server side heal") + + # Log Volume Info and Status after disabling server side heal + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed " + "on volume %s", self.volname)) + + # Create a file + cmd = ("cd %s; touch 'file_openfd';" % mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished creating a file while all the bricks are UP') + + # Set volume options + ret = set_volume_options(self.mnode, self.volname, + {"performance.read-after-open": "yes"}) + self.assertTrue(ret, 'Failed to set volume {}' + ' options'.format(self.volname)) + g.log.info('Successfully set %s volume options', self.volname,) + + # Bringing brick b1 offline + sub_vols = get_subvols(self.mnode, self.volname) + subvols_list = sub_vols['volume_subvols'] + bricks_list1 = subvols_list[0] + brick_b1_down = choice(bricks_list1) + ret = bring_bricks_offline(self.volname, + brick_b1_down) + self.assertTrue(ret, 'Brick %s is not offline' % brick_b1_down) + g.log.info('Brick %s is offline successfully', brick_b1_down) + + node = self.mounts[0].client_system + # Open FD + proc = open_file_fd(mountpoint, time=100, + client=node) + + # Bring brick b1 online + ret = bring_bricks_online(self.mnode, self.volname, + [brick_b1_down], + 'glusterd_restart') + self.assertTrue(ret, 'Brick {} is not brought ' + 'online'.format(brick_b1_down)) + g.log.info('Brick %s is online successfully', brick_b1_down) + + # Validate peers are connected + ret = self.validate_peers_are_connected() + self.assertTrue(ret, "Peers are not in connected state after bringing" + " an offline brick to online via `glusterd restart`") + g.log.info("Successfully validated peers are in connected state") + + # Check if write to FD is successful + g.log.info('Open FD on file successful') + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Write to FD is successful") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + g.log.info('Heal has completed successfully') + + file_openfd = os.path.join(mountpoint, 'file_openfd') + + # Check if data exists on file + ret = check_if_pattern_in_file(node, 'xyz', file_openfd) + self.assertEqual(ret, 0, 'xyz does not exists in file') + g.log.info('xyz exists in file') + + file_fd = 'file_openfd' + + # Check if EC version is same on all bricks which are up + ret = validate_xattr_on_all_bricks(bricks_list1, file_fd, + 'trusted.ec.version') + self.assertTrue(ret, "Healing not completed and EC version is " + "not updated") + g.log.info("Healing is completed and EC version is updated") + + # Check if EC size is same on all bricks which are up + ret = validate_xattr_on_all_bricks(bricks_list1, file_fd, + 'trusted.ec.size') + self.assertTrue(ret, "Healing not completed and EC size is " + "not updated") + g.log.info("Healing is completed and EC size is updated") + + # Check stat of file + cmd = "cd %s; du -kh file_openfd" % mountpoint + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info('File %s is accessible', file_fd) diff --git a/tests/functional/disperse/test_ec_quorumcount_5.py b/tests/functional/disperse/test_ec_quorumcount_5.py new file mode 100644 index 000000000..c4aadd602 --- /dev/null +++ b/tests/functional/disperse/test_ec_quorumcount_5.py @@ -0,0 +1,309 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test Disperse Quorum Count Set to 5 +""" +from random import sample, choice +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brick_libs import (bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list, + bring_bricks_offline) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import (volume_reset, + set_volume_options) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, expand_volume, + get_subvols) + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcQuorumCount5(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def generate_read_cmd(self, mountpoint, start, end): + """Function which generates readcmd""" + self.readcmd = ("cd {}; for i in `seq {} {}` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done".format(mountpoint, start, end)) + + def test_ec_quorumcount_5(self): + """ + Test Steps: + - Write IO's when all bricks are online + - Get subvol from which bricks to be brought down + - Set volume disperse quorum count to 5 + - Start writing and reading IO's + - Bring a brick down,say b1 + - Validate write and read is successful + - Bring a brick down,say b2 + - Validate write has failed and read is successful + - Start IO's again while quorum is not met on volume + write should fail and read should pass + - Add-brick and log + - Start Rebalance + - Wait for rebalance,which should fail as quorum is not met + - Bring brick online + - Wait for brick to come online + - Check if bricks are online + - Start IO's again when all bricks are online + - IO's should complete successfully + - Start IO's again and reset volume + - Bring down other bricks to max redundancy + - Validating IO's and waiting to complete + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + + mountpoint = self.mounts[0].mountpoint + client1 = self.mounts[0].client_system + client2 = self.mounts[1].client_system + + # Write IO's when all bricks are online + writecmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writes on files sucessfully') + + # Select a subvol from which bricks to be brought down + sub_vols = get_subvols(self.mnode, self.volname) + bricks_list1 = list(choice(sub_vols['volume_subvols'])) + brick_1, brick_2 = sample(bricks_list1, 2) + + # Set volume disperse quorum count to 5 + ret = set_volume_options(self.mnode, self.volname, + {"disperse.quorum-count": "5"}) + self.assertTrue(ret, 'Failed to set volume {}' + ' options'.format(self.volname)) + g.log.info('Successfully set disperse quorum on %s', self.volname) + + # Start writing and reading IO's + procwrite, procread, count = [], [], 1 + for mount_obj in self.mounts: + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 5 " + "--dir-length 10 --max-num-of-dirs 2 " + "--num-of-files 15 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, writecmd, + user=mount_obj.user) + procwrite.append(proc) + count += 10 + + self.generate_read_cmd(mountpoint, '1', '10') + ret = g.run_async(client2, self.readcmd) + procread.append(ret) + + # Brick 1st brick down + ret = bring_bricks_offline(self.volname, + brick_1) + self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1)) + g.log.info('Brick %s is offline successfully', brick_1) + + writecmd = ("cd %s; for i in `seq 101 110` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writes on files sucessfully') + + self.generate_read_cmd(mountpoint, '101', '110') + ret, _, err = g.run(client1, self.readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished reads on files sucessfully') + + # Brick 2nd brick down + ret = bring_bricks_offline(self.volname, + brick_2) + self.assertTrue(ret, 'Brick {} is not offline'.format(brick_2)) + g.log.info('Brick %s is offline successfully', brick_2) + + # Validate write has failed and read is successful + ret = validate_io_procs(procwrite, self.mounts) + self.assertFalse(ret, 'Write successful even after disperse quorum is ' + 'not met') + g.log.info('EXPECTED - Writes failed as disperse quroum is not met') + + ret = validate_io_procs(procread, self.mounts[1]) + self.assertTrue(ret, 'Read operation failed on the client') + g.log.info('Reads on files successful') + + # Start IO's again while quorum is not met on volume + procwrite = [] + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num 20 --dir-depth 1 " + "--dir-length 10 --max-num-of-dirs 1 " + "--num-of-files 10 %s" % ( + self.script_upload_path, + mountpoint)) + proc = g.run_async(client1, writecmd) + procwrite.append(proc) + ret = validate_io_procs(procwrite, self.mounts[0]) + self.assertFalse(ret, 'Write successful even after disperse quorum is ' + 'not met') + g.log.info('EXPECTED - Writes failed as disperse quroum is not met') + + self.generate_read_cmd(mountpoint, '1', '100') + ret, _, err = g.run(client2, self.readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Reads on files successful') + + # Add brick + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info, force=True) + self.assertTrue(ret, ("Failed to expand the volume {}".format + (self.volname))) + g.log.info("Expanding volume %s is successful", self.volname) + + # Log Volume Info and Status after expanding the volume + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume {}".format(self.volname))) + g.log.info("Successful in logging volume info and status of volume %s", + self.volname) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ('Rebalance failed on the volume' + ' {}'.format(self.volname))) + g.log.info('Rebalance has started on volume %s', + self.volname) + + # Wait for rebalance to complete + # Which should also fail as quorum is not met + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=600) + self.assertFalse(ret, "Rebalance passed though disperse quorum " + "is not met on volume") + g.log.info("Expected: Rebalance failed on the volume %s,disperse" + " quorum is not met", self.volname) + + # Bring brick online + brick_list = brick_1, brick_2 + ret = bring_bricks_online(self.mnode, self.volname, + brick_list) + self.assertTrue(ret, 'Brick not brought online') + g.log.info('Brick brought online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, 'Bricks are not online') + g.log.info('EXPECTED : Bricks are online') + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Start IO's again when all bricks are online + writecmd = ("cd %s; for i in `seq 101 200` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + self.generate_read_cmd(mountpoint, '101', '120') + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Writes on client % successful', client1) + + ret, _, err = g.run(client2, self.readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Read on client % successful', client2) + + # Start IO's again + all_mounts_procs, count = [], 30 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Reset volume + ret, _, err = volume_reset(self.mnode, self.volname) + self.assertEqual(ret, 0, err) + g.log.info('Reset of volume %s successful', self.volname) + + # Bring down other bricks to max redundancy + # Bringing bricks offline + bricks_to_offline = sample(bricks_list1, 2) + ret = bring_bricks_offline(self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Redundant bricks not offline') + g.log.info('Redundant bricks are offline successfully') + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, 'IO failed on some of the clients') + g.log.info("Successfully validated all IO's") diff --git a/tests/functional/disperse/test_ec_quorumcount_6.py b/tests/functional/disperse/test_ec_quorumcount_6.py new file mode 100644 index 000000000..5ccc59180 --- /dev/null +++ b/tests/functional/disperse/test_ec_quorumcount_6.py @@ -0,0 +1,286 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test Disperse Quorum Count Set to 6 +""" +from random import sample, choice +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brick_libs import (bring_bricks_online, + wait_for_bricks_to_be_online, + get_offline_bricks_list, + bring_bricks_offline) +from glustolibs.gluster.rebalance_ops import (rebalance_start, + wait_for_rebalance_to_complete) +from glustolibs.gluster.volume_ops import (volume_reset, + set_volume_options) +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, expand_volume, + get_subvols) + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcQuorumCount6(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_quorumcount_6(self): + """ + Test Steps: + - Write IO's when all bricks are online + - Get subvol from which bricks to be brought down + - Set volume disperse quorum count to 6 + - Start writing and reading IO's + - Bring a brick down,say b1 + - Validate write has failed and read is successful + - Start IO's again while quorum is not met on volume + write should fail and read should pass + - Add-brick and log + - Start Rebalance + - Wait for rebalance,which should fail as quorum is not met + - Bring brick online + - Wait for brick to come online + - Check if bricks are online + - Start IO's again when all bricks are online + - IO's should complete successfully + - Start IO's again and reset volume + - Bring down other bricks to max redundancy + - Validating IO's and waiting to complete + """ + + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + + mountpoint = self.mounts[0].mountpoint + client1 = self.mounts[0].client_system + client2 = self.mounts[1].client_system + + # Write IO's when all bricks are online + writecmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Finished writes on files sucessfully') + + # Select a subvol from which bricks to be brought down + sub_vols = get_subvols(self.mnode, self.volname) + bricks_list1 = list(choice(sub_vols['volume_subvols'])) + brick_1 = sample(bricks_list1, 1) + + # Set volume disperse quorum count to 6 + ret = set_volume_options(self.mnode, self.volname, + {"disperse.quorum-count": "6"}) + self.assertTrue(ret, 'Failed to set volume {}' + ' options'.format(self.volname)) + g.log.info('Successfully set disperse quorum on %s', self.volname) + + # Start writing and reading IO's + procwrite, procread, count = [], [], 1 + for mount_obj in self.mounts: + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 1 " + "--dir-length 10 --max-num-of-dirs 1 " + "--num-of-files 10 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, writecmd, + user=mount_obj.user) + procwrite.append(proc) + count = count + 10 + + readcmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + ret = g.run_async(client2, readcmd) + procread.append(ret) + + # Brick 1st brick down + ret = bring_bricks_offline(self.volname, + brick_1) + self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1)) + g.log.info('Brick %s is offline successfully', brick_1) + + # Validate write has failed and read is successful + ret = validate_io_procs(procwrite, self.mounts) + self.assertFalse(ret, 'Write successful even after disperse quorum is ' + 'not met') + g.log.info('EXPECTED - Writes failed as disperse quroum is not met') + + ret = validate_io_procs(procread, self.mounts[1]) + self.assertTrue(ret, 'Read operation failed on the client') + g.log.info('Reads on files successful') + + # Start IO's again while quorum is not met on volume + + writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num 20 --dir-depth 1 " + "--dir-length 10 --max-num-of-dirs 1 " + "--num-of-files 10 %s" % ( + self.script_upload_path, + mountpoint)) + readcmd = ("cd %s; for i in `seq 1 100` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + + ret, _, err = g.run(client1, writecmd) + self.assertNotEqual(ret, 0, 'Writes passed even after disperse quorum ' + 'not met') + g.log.info('Expected: Writes failed as disperse quorum is not ' + 'met with %s error', err) + + ret, _, err = g.run(client2, readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Reads on files successful') + + # Add brick + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info, force=True) + self.assertTrue(ret, ("Failed to expand the volume {}".format + (self.volname))) + g.log.info("Expanding volume %s is successful", self.volname) + + # Log Volume Info and Status after expanding the volume + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume {}".format(self.volname))) + g.log.info("Successful in logging volume info and status of volume %s", + self.volname) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ('Rebalance failed on the volume' + ' {}'.format(self.volname))) + g.log.info('Rebalance has started on volume %s', + self.volname) + + # Wait for rebalance to complete + # Which should also fail as quorum is not met + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=600) + self.assertFalse(ret, "Rebalance passed though disperse quorum " + "is not met on volume") + g.log.info("Expected: Rebalance failed on the volume %s,disperse" + " quorum is not met", self.volname) + + # Bring brick online + ret = bring_bricks_online(self.mnode, self.volname, + brick_1) + self.assertTrue(ret, 'Brick not brought online') + g.log.info('Brick brought online successfully') + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, 'Bricks are not online') + g.log.info('EXPECTED : Bricks are online') + + # Check if bricks are online + ret = get_offline_bricks_list(self.mnode, self.volname) + self.assertListEqual(ret, [], 'All bricks are not online') + g.log.info('All bricks are online') + + # Start IO's again when all bricks are online + writecmd = ("cd %s; for i in `seq 101 200` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % mountpoint) + readcmd = ("cd %s; for i in `seq 101 200` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % mountpoint) + + # IO's should complete successfully + ret, _, err = g.run(client1, writecmd) + self.assertEqual(ret, 0, err) + g.log.info('Writes on client % successful', client1) + + ret, _, err = g.run(client2, readcmd) + self.assertEqual(ret, 0, err) + g.log.info('Read on client % successful', client2) + + # Start IO's again + all_mounts_procs, count = [], 30 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 10 --max-num-of-dirs 5 " + "--num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Reset volume + ret, _, err = volume_reset(self.mnode, self.volname) + self.assertEqual(ret, 0, err) + g.log.info('Reset of volume %s successful', self.volname) + + # Bring down other bricks to max redundancy + # Bringing bricks offline + bricks_to_offline = sample(bricks_list1, 2) + ret = bring_bricks_offline(self.volname, + bricks_to_offline) + self.assertTrue(ret, 'Redundant bricks not offline') + g.log.info('Redundant bricks are offline successfully') + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, 'IO failed on some of the clients') + g.log.info("Successfully validated all IO's") diff --git a/tests/functional/disperse/test_ec_quota.py b/tests/functional/disperse/test_ec_quota.py new file mode 100644 index 000000000..8b3440780 --- /dev/null +++ b/tests/functional/disperse/test_ec_quota.py @@ -0,0 +1,159 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test quota on an EC volume +""" + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.quota_ops import (quota_enable, + quota_disable, + quota_set_soft_timeout, + quota_set_hard_timeout, + quota_limit_usage) +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcQuota(GlusterBaseClass): + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Disable Quota + ret, _, _ = quota_disable(self.mnode, self.volname) + if ret: + raise ExecutionError("Failed to disable quota on the volume %s") + g.log.info("Successfully disabled quota on the volume %") + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def set_quota_limit(self, limit): + """ + Set Quota limit on the volume + """ + # Path to set quota limit + path = "/" + + # Set Quota limit + ret, _, _ = quota_limit_usage(self.mnode, self.volname, + path, limit=limit) + self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of " + "the volume %s", path, self.volname)) + g.log.info("Successfully set the Quota limit on %s of the volume %s", + path, self.volname) + + def read_write_files(self, files, mount_dir, client): + """ + Read and write files on the volume + """ + # Write files + for i in range(1, 5): + writecmd = ("cd %s/dir%s; for i in `seq 1 %s` ;" + "do dd if=/dev/urandom of=file$i bs=1M " + "count=5;done" % (mount_dir, i, files)) + ret, _, _ = g.run(client, writecmd) + self.assertEqual(ret, 0, "Unexpected: File creation failed ") + g.log.info("Expected: File creation succeeded") + + # Reading files + for i in range(1, 5): + readcmd = ("cd %s/dir%s; for i in `seq 1 %s` ;" + "do dd if=file$i of=/dev/null bs=1M " + "count=5;done" % (mount_dir, i, files)) + ret, _, _ = g.run(client, readcmd) + self.assertEqual(ret, 0, "Unexpected: Reading of file failed ") + g.log.info("Expected: Able to read file successfully") + + def test_ec_quota(self): + """ + - Enable quota on the volume + - Set a limit of 4 GB on the root of the volume + - Set Quota soft-timeout to 0 seconds + - Set Quota hard-timeout to 0 second + - Create 10 directories from the mount point + - Create files of around 2.5 GB + - Reading files + - Decrease quota limit to 3 GB on the root of the volume + - Writing files of around 500 MB + - Reading files + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Enable quota on the volume + ret, _, _ = quota_enable(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to enable quota on the volume %s", + self.volname)) + g.log.info("Successfully enabled quota on the volume %s", self.volname) + + # Set a limit of 4 GB on the root of the volume + self.set_quota_limit(limit="4GB") + + # Set Quota soft-timeout to 0 seconds + ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, '0sec') + self.assertEqual(ret, 0, "Failed to set soft timeout") + g.log.info("Quota soft timeout set successful") + + # Set Quota hard-timeout to 0 second + ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, '0sec') + self.assertEqual(ret, 0, "Failed to set hard timeout") + g.log.info("Quota hard timeout set successful") + + # Create 10 directories from the mount point + mount_obj = self.mounts[0] + mount_dir = mount_obj.mountpoint + client = mount_obj.client_system + + for i in range(1, 11): + ret = mkdir(client, "%s/dir%s" % (mount_dir, i)) + self.assertTrue(ret, ("Failed to create dir under %s-%s", + client, mount_dir)) + g.log.info("Directory 'dir%s' created successfully", i) + g.log.info("Successfully created directories on %s:%s", + client, mount_dir) + + # Create files of around 2.5 GB and reading + self.read_write_files(files=100, mount_dir=mount_dir, + client=client) + + # Decrease quota limit to 3 GB on the root of the volume + self.set_quota_limit(limit="3GB") + + # Writing files of around 500 MB and reading + self.read_write_files(files=10, mount_dir=mount_dir, + client=client) diff --git a/tests/functional/disperse/test_ec_quota_errors.py b/tests/functional/disperse/test_ec_quota_errors.py new file mode 100644 index 000000000..8aa333878 --- /dev/null +++ b/tests/functional/disperse/test_ec_quota_errors.py @@ -0,0 +1,415 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from math import ceil +from random import sample +from time import sleep, time +from unittest import SkipTest + +from glusto.core import Glusto as g + +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + get_online_bricks_list) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import remove_file +from glustolibs.gluster.lib_utils import (append_string_to_file, + get_disk_usage, + search_pattern_in_file) +from glustolibs.gluster.quota_ops import (quota_enable, quota_fetch_list, + quota_limit_usage, + quota_set_alert_time, + quota_set_hard_timeout, + quota_set_soft_timeout) +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.io.utils import validate_io_procs, wait_for_io_to_complete +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestEcQuotaError(GlusterBaseClass): + """ + Description: To check EIO errors changes to EDQUOTE errors when the + specified quota limits are breached + """ + # pylint: disable=too-many-instance-attributes, too-many-statements + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + if cls.volume_type == 'distributed-dispersed': + raise SkipTest('BZ #1707813 limits the functionality of fallocate') + if cls.volume_type == 'dispersed': + raise SkipTest('BZ #1339144 is being hit intermittently') + cls.script_path = '/usr/share/glustolibs/io/scripts/fd_writes.py' + ret = upload_scripts(cls.clients, cls.script_path) + if not ret: + raise ExecutionError('Failed to upload IO script to client') + + def setUp(self): + self.get_super_method(self, 'setUp')() + self.num_of_dirs = 2 + + # For test_ec_quota_errors_on_limit only one client is needed + if 'on_limit' in self.id().split('.')[-1]: + self.num_of_dirs = 1 + self.mounts = [self.mounts[0]] + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + self.all_mount_procs = [] + self.offline_bricks = [] + if not ret: + raise ExecutionError('Failed to setup and mount volume') + + def tearDown(self): + if self.offline_bricks: + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + if ret: + raise ExecutionError('Not able to force start volume to bring ' + 'offline bricks online') + if self.all_mount_procs: + ret = wait_for_io_to_complete(self.all_mount_procs, self.mounts) + if not ret: + raise ExecutionError('Wait for IO completion failed') + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError('Failed to unmount and cleanup volume') + self.get_super_method(self, 'tearDown')() + + def _get_space_in_gb(self, host, path, size='free'): + """ + Return available or total space on the provided `path` + Kwargs: + size (str) : total/free(default) size to be queried on `path` + """ + space_avail = get_disk_usage(host, path) + self.assertIsNotNone( + space_avail, 'Failed to get disk usage stats of ' + '{} on {}'.format(host, path)) + if size == 'total': + return int(ceil(space_avail['total'])) + return int(ceil(space_avail['free'])) + + def _insert_bp(self, host, logpath): + """ + Generates and inserts a breakpoint in the given logpath on the host + """ + append_string = self.bp_text + str(self.bp_count) + ret = append_string_to_file(host, logpath, append_string) + self.assertTrue( + ret, 'Not able to append string to the file {} ' + 'on {}'.format(logpath, host)) + self.bp_count += 1 + + def _fallocate_file(self): + """ + Perform `fallocate -l <alloc_size> <fqpath>` on <client> + """ + + # Delete the files if exists (sparsefile is created on absolute sizes) + ret = remove_file(self.client, self.fqpath + '*', force=True) + self.assertTrue( + ret, 'Unable to delete existing file for `fallocate` of new file') + sleep(5) + + # Create multiple sparsefiles rather than one big file + sizes = [self.alloc_size] + if self.alloc_size >= self.brick_size: + sizes = ([self.brick_size // 2] * + (self.alloc_size // self.brick_size)) + sizes *= 2 + sizes.append(self.alloc_size % self.brick_size) + rem_size = self.alloc_size - sum(sizes) + if rem_size: + sizes.append(rem_size) + + for count, size in enumerate(sizes, start=1): + ret, _, _ = g.run( + self.client, + 'fallocate -l {}G {}{}'.format(size, self.fqpath, count)) + self.assertEqual( + ret, 0, 'Not able to fallocate {}* file on {}'.format( + self.fqpath, self.client)) + count += 1 + + def _validate_error_in_mount_log(self, pattern, exp_pre=True): + """ + Validate type of error from mount log on setting quota + """ + assert_method = self.assertTrue + assert_msg = ('Fail: Not able to validate presence of "{}" ' + 'in mount log'.format(pattern)) + if not exp_pre: + assert_method = self.assertFalse + assert_msg = ('Fail: Not able to validate absence of "{}" ' + 'in mount log'.format(pattern)) + ret = search_pattern_in_file(self.client, pattern, self.logpath, + self.bp_text + str(self.bp_count - 2), + self.bp_text + str(self.bp_count - 1)) + assert_method(ret, assert_msg) + + # Validate against `quota list` command + if 'quota' in pattern.lower(): + dir_path = '/dir/dir1' + ret = quota_fetch_list(self.mnode, self.volname) + self.assertIsNotNone( + ret.get(dir_path), + 'Not able to get quota list for the path {}'.format(dir_path)) + ret = ret.get(dir_path) + verified = False + if ret['sl_exceeded'] is exp_pre and ret['hl_exceeded'] is exp_pre: + verified = True + self.assertTrue( + verified, 'Failed to validate Quota list command against ' + 'soft and hard limits') + + def _perform_quota_ops_before_brick_down(self): + """ + Refactor of common test steps across three test functions + """ + self.client, self.m_point = (self.mounts[0].client_system, + self.mounts[0].mountpoint) + ret = mkdir(self.client, '%s/dir/dir1' % self.m_point, parents=True) + self.assertTrue(ret, 'Failed to create first dir on mountpoint') + if self.num_of_dirs == 2: + ret = mkdir(self.client, '%s/dir/dir' % self.m_point) + self.assertTrue(ret, 'Failed to create second dir on mountpoint') + + # Types of errors + self.space_error = 'Input/output error|No space left on device' + self.quota_error = 'Disk quota exceeded' + + # Start IO from the clients + cmd = ('/usr/bin/env python {} -n 10 -t 480 -d 10 -c 256 --dir ' + '{}/dir/dir{}') + for count, mount in enumerate(self.mounts, start=1): + proc = g.run_async( + mount.client_system, + cmd.format(self.script_path, mount.mountpoint, count)) + self.all_mount_procs.append(proc) + + # fallocate a large file and perform IO on remaining space + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, 'Failed to get list of online ' + 'bricks') + brick_node, brick_path = online_bricks[0].split(':') + self.brick_size = self._get_space_in_gb(brick_node, + brick_path, + size='total') + self.free_disk_size = self._get_space_in_gb(self.client, self.m_point) + self.fqpath = self.m_point + '/sparsefile' + self.rem_size = 1 # Only 1G will be available to the mount + self.alloc_size = self.free_disk_size - self.rem_size + self._fallocate_file() + + # Insert breakpoint in the log + self.bp_text = 'breakpoint_' + str(ceil(time())) + '_' + self.bp_count = 1 + self.logpath = ('/var/log/glusterfs/mnt-' + self.volname + + '_glusterfs.log') + self._insert_bp(self.client, self.logpath) + + # Create file with size greater than available mount space + self.cmd = ('cd {}; cat /dev/urandom | tr -dc [:space:][:print:] ' + '| head -c {}G > datafile_{};') + self.fqpath = self.m_point + '/dir/dir1' + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 2, self.bp_count)) + self.assertFalse( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Process should not allow data more ' + 'than available space to be written') + sleep(10) + self._insert_bp(self.client, self.logpath) + + # Validate space error in the mount log + self._validate_error_in_mount_log(pattern=self.space_error) + + # Enable quota and set all alert timeouts to 0secs + ret, _, _ = quota_enable(self.mnode, self.volname) + self.assertEqual(ret, 0, 'Not able to enable quota on the volume') + for alert_type, msg in ((quota_set_alert_time, + 'alert'), (quota_set_soft_timeout, 'soft'), + (quota_set_hard_timeout, 'hard')): + ret, _, _ = alert_type(self.mnode, self.volname, '0sec') + self.assertEqual( + ret, 0, 'Failed to set quota {} timeout to 0sec'.format(msg)) + + # Expose only 20G and set quota's on the dir + self.rem_size = 20 # Only 20G will be available to whole mount + self.alloc_size = self.free_disk_size - self.rem_size + self.fqpath = self.m_point + '/sparsefile' + self._fallocate_file() + + self._insert_bp(self.client, self.logpath) + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir1', + limit='10GB') + self.assertEqual(ret, 0, 'Not able to set quota limit on /dir/dir1') + if self.num_of_dirs == 2: + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir2', + limit='5GB') + self.assertEqual(ret, 0, 'Not able to set quota limit on ' + '/dir/dir2') + + # Write data more than available quota and validate error + sleep(10) + self.rem_size = 1 # Only 1G will be availble to /dir/dir1 + self.alloc_size = 9 + self.fqpath = self.m_point + '/dir/dir1/sparsefile' + self._fallocate_file() + + self.fqpath = self.m_point + '/dir/dir1' + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 2, self.bp_count)) + self.assertFalse( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Process should not allow data more ' + 'than available space to be written') + sleep(10) + self._insert_bp(self.client, self.logpath) + self._validate_error_in_mount_log(pattern=self.quota_error) + self._validate_error_in_mount_log(pattern=self.space_error, + exp_pre=False) + + def _perform_quota_ops_after_brick_down(self): + """ + Refactor of common test steps across three test functions + """ + # Increase the quota limit on dir/dir1 and validate no errors on writes + self.alloc_size = self.free_disk_size - 50 + self.fqpath = self.m_point + '/sparsefile' + self._fallocate_file() + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir1', + limit='40GB') + self.assertEqual(ret, 0, 'Not able to expand quota limit on /dir/dir1') + sleep(15) + self._insert_bp(self.client, self.logpath) + self.fqpath = self.m_point + '/dir/dir1' + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 3, self.bp_count)) + self.assertTrue( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Not able to write data even after expanding quota limit') + sleep(10) + self._insert_bp(self.client, self.logpath) + self._validate_error_in_mount_log(pattern=self.quota_error, + exp_pre=False) + self._validate_error_in_mount_log(pattern=self.space_error, + exp_pre=False) + + # Decrease the quota limit and validate error on reaching quota + self._insert_bp(self.client, self.logpath) + ret, _, _ = quota_limit_usage(self.mnode, + self.volname, + path='/dir/dir1', + limit='15GB') + self.assertEqual(ret, 0, 'Not able to expand quota limit on /dir/dir1') + sleep(10) + self.fqpath = self.m_point + '/dir/dir1' + self.rem_size = self._get_space_in_gb(self.client, self.fqpath) + proc = g.run_async( + self.client, + self.cmd.format(self.fqpath, self.rem_size * 3, self.bp_count)) + self.assertFalse( + validate_io_procs([proc], self.mounts[0]), + 'Fail: Process should not allow data more ' + 'than available space to be written') + sleep(10) + self._insert_bp(self.client, self.logpath) + self._validate_error_in_mount_log(pattern=self.quota_error) + self._validate_error_in_mount_log(pattern=self.space_error, + exp_pre=False) + + def test_ec_quota_errors_on_brick_down(self): + """ + Steps: + - Create and mount EC volume on two clients + - Create two dirs on the mount and perform parallel IO from clients + - Simulate disk full to validate EIO errors when no space is left + - Remove simulation and apply different quota limits on two dirs + - Bring down redundant bricks from the volume + - Validate EDQUOTE error on reaching quota limit and extend quota to + validate absence of EDQUOTE error + - Reduce the quota limit and validate EDQUOTE error upon reaching quota + - Remove quota limits, unmount and cleanup the volume + """ + self._perform_quota_ops_before_brick_down() + + # Bring redundant bricks offline + subvols = get_subvols(self.mnode, self.volname) + self.assertTrue(subvols.get('volume_subvols'), 'Not able to get ' + 'subvols of the volume') + self.offline_bricks = [] + for subvol in subvols['volume_subvols']: + self.offline_bricks.extend( + sample(subvol, + self.volume.get('voltype')['redundancy_count'])) + ret = bring_bricks_offline(self.volname, self.offline_bricks) + self.assertTrue(ret, 'Not able to bring redundant bricks offline') + + self._perform_quota_ops_after_brick_down() + + # Bring offline bricks online + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, 'Not able to bring offline bricks online') + self.offline_bricks *= 0 + + g.log.info('Pass: Validating quota errors on brick down is successful') + + def test_ec_quota_errors_with_multiple_ios(self): + """ + Steps: + - Create and mount EC volume on two clients + - Create two dirs on the mount and perform parallel IO from clients + - Simulate disk full to validate EIO errors when no space is left + - Remove simulation and apply quota limits on base dir + - Validate EDQUOTE error on reaching quota limit and extend quota to + validate absence of EDQUOTE error + - Reduce the quota limit and validate EDQUOTE error upon reaching quota + - Remove quota limits, unmount and cleanup the volume + """ + self._perform_quota_ops_before_brick_down() + self._perform_quota_ops_after_brick_down() + g.log.info('Pass: Validating quota errors with multiple IOs is ' + 'successful') + + def test_ec_quota_errors_on_limit(self): + """ + Steps: + - Create and mount EC volume on one client + - Create a dir on the mount and perform IO from clients + - Simulate disk full to validate EIO errors when no space is left + - Remove simulation and apply quota limits on the dir + - Validate EDQUOTE error on reaching quota limit and extend quota to + validate absence of EDQUOTE error + - Reduce the quota limit and validate EDQUOTE error upon reaching quota + - Remove quota limits, unmount and cleanup the volume + """ + + # Only a single client is used + self._perform_quota_ops_before_brick_down() + self._perform_quota_ops_after_brick_down() + g.log.info('Pass: Validating quota errors on limit breach is ' + 'successful') diff --git a/tests/functional/disperse/test_ec_read_from_hardlink.py b/tests/functional/disperse/test_ec_read_from_hardlink.py new file mode 100644 index 000000000..469bc673b --- /dev/null +++ b/tests/functional/disperse/test_ec_read_from_hardlink.py @@ -0,0 +1,111 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.mount_ops import mount_volume, umount_volume +from glustolibs.gluster.volume_ops import (set_volume_options, + volume_reset) +from glustolibs.gluster.glusterfile import create_link_file + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs', 'nfs']]) +class TestEcReadFromHardlink(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Setup volume + if not self.setup_volume(): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + self.get_super_method(self, 'tearDown')() + + # Unmount the volume + ret = umount_volume(mclient=self.mounts[0].client_system, + mpoint=self.mounts[0].mountpoint) + if not ret: + raise ExecutionError("Unable to umount the volume") + g.log.info("Unmounting of the volume %s succeeded", self.volname) + + # The reason for volume reset is, metadata-cache is enabled + # by group, can't disable the group in glusterfs. + ret, _, _ = volume_reset(self.mnode, self.volname) + if ret: + raise ExecutionError("Unable to reset the volume {}". + format(self.volname)) + g.log.info("Volume: %s reset successful ", self.volname) + + # Cleanup the volume + if not self.cleanup_volume(): + raise ExecutionError("Unable to perform volume clenaup") + g.log.info("Volume cleanup is successfull") + + def test_ec_read_from_hardlink(self): + """ + Test steps: + 1. Enable metadata-cache(md-cache) options on the volume + 2. Touch a file and create a hardlink for it + 3. Read data from the hardlink. + 4. Read data from the actual file. + """ + options = {'group': 'metadata-cache'} + # Set metadata-cache options as group + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, "Unable to set the volume options {}". + format(options)) + g.log.info("Able to set the %s options", options) + + # Mounting the volume on one client + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mounts[0].mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + self.assertEqual(ret, 0, ("Volume {} is not mounted"). + format(self.volname)) + g.log.info("Volume mounted successfully : %s", self.volname) + + file_name = self.mounts[0].mountpoint + "/test1" + content = "testfile" + hard_link = self.mounts[0].mountpoint + "/test1_hlink" + cmd = 'echo "{content}" > {file}'.format(file=file_name, + content=content) + + # Creating a file with data + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Sucessful in creating a file with data") + g.log.info("file created successfully on %s", + self.mounts[0].mountpoint) + + # Creating a hardlink for the file created + ret = create_link_file(self.mounts[0].client_system, + file_name, hard_link) + self.assertTrue(ret, "Link file creation failed") + g.log.info("Link file creation for %s is successful", file_name) + + # Reading from the file as well as the hardlink + for each in (file_name, hard_link): + ret, out, _ = g.run(self.mounts[0].client_system, + "cat {}".format(each)) + self.assertEqual(ret, 0, "Unable to read the {}".format(each)) + self.assertEqual(content, out.strip('\n'), "The content {} and" + " data in file {} is not same". + format(content, each)) + g.log.info("Read of %s file is successful", each) diff --git a/tests/functional/disperse/test_ec_remove_brick_operations.py b/tests/functional/disperse/test_ec_remove_brick_operations.py new file mode 100644 index 000000000..184c72b63 --- /dev/null +++ b/tests/functional/disperse/test_ec_remove_brick_operations.py @@ -0,0 +1,324 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + + This test verifies remove brick operation on EC + volume. + +""" +from time import sleep +from glusto.core import Glusto as g +from glustolibs.io.utils import (validate_io_procs, + wait_for_io_to_complete) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.brick_ops import (remove_brick) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import (get_subvols, + get_volume_info, + log_volume_info_and_status) +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.rebalance_ops import ( + wait_for_remove_brick_to_complete) + + +@runs_on([['distributed-dispersed'], ['glusterfs']]) +class EcRemoveBrickOperations(GlusterBaseClass): + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Ensure we have sufficient subvols + self.volume['voltype']['dist_count'] = 4 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + # Start IO on mounts + self.counter = 1 + self.all_mounts_procs = [] + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 2 " + "--dir-length 5 " + "--max-num-of-dirs 3 " + "--num-of-files 3 %s" % (self.script_upload_path, + self.counter, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + self.counter += 10 + self.io_validation_complete = False + + # Adding a delay of 10 seconds before test method starts. This + # is to ensure IO's are in progress and giving some time to fill data + sleep(10) + + def test_remove_brick_operations(self): + """ + Steps: + 1. Remove data brick count number of bricks from the volume + should fail + 2. step 1 with force option should fail + 3. Remove redundant brick count number of bricks from the volume + should fail + 4. step 3 with force option should fail + 5. Remove data brick count+1 number of bricks from the volume + should fail + 6. step 5 with force option should fail + 7. Remove disperse count number of bricks from the volume with + one wrong brick path should fail + 8. step 7 with force option should fail + 9. Start remove brick on first subvol bricks + 10. Remove all the subvols to make a pure EC vol + by start remove brick on second subvol bricks + 11. Start remove brick on third subvol bricks + 12. Write files and perform read on mountpoints + """ + # pylint: disable=too-many-locals + # pylint: disable=too-many-statements + + subvols_list = get_subvols(self.mnode, self.volname) + volinfo = get_volume_info(self.mnode, self.volname) + initial_brickcount = volinfo[self.volname]['brickCount'] + data_brick_count = (self.volume['voltype']['disperse_count'] - + self.volume['voltype']['redundancy_count']) + + # Try to remove data brick count number of bricks from the volume + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:data_brick_count]) + ret, _, _ = remove_brick(self.mnode, self.volname, + bricks_list_to_remove, + option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Try to remove redundant brick count number of bricks from the volume + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:self.volume['voltype'] + ['redundancy_count']]) + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume" + "%s" % (bricks_list_to_remove, self.volname))) + + # Try to remove data brick count+1 number of bricks from the volume + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:data_brick_count + 1]) + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Try to remove disperse count number of bricks from the volume with + # one wrong brick path + bricks_list_to_remove = (subvols_list['volume_subvols'][0] + [0:self.volume['voltype']['disperse_count']]) + bricks_list_to_remove[0] = bricks_list_to_remove[0] + "wrong_path" + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Trying with force option + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="force") + self.assertEqual( + ret, 1, ("ERROR: Removed bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Verify that the brick count is intact + volinfo = get_volume_info(self.mnode, self.volname) + latest_brickcount = volinfo[self.volname]['brickCount'] + self.assertEqual(initial_brickcount, latest_brickcount, + ("Brick count is not expected to " + "change, but changed")) + + # Start remove brick on first subvol bricks + bricks_list_to_remove = subvols_list['volume_subvols'][0] + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 0, ("Failed to remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Verify that the brick count is intact + volinfo = get_volume_info(self.mnode, self.volname) + latest_brickcount = volinfo[self.volname]['brickCount'] + self.assertEqual(initial_brickcount, latest_brickcount, + ("Brick count is not expected to " + "change, but changed")) + + # Wait for remove brick to complete + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + bricks_list_to_remove) + self.assertTrue( + ret, ("Remove brick is not yet complete on the volume " + "%s" % self.volname)) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Commit the remove brick operation + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="commit") + self.assertEqual( + ret, 0, ("Failed to commit remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Remove all the subvols to make a pure EC vol + # Start remove brick on second subvol bricks + bricks_list_to_remove = subvols_list['volume_subvols'][1] + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual( + ret, 0, ("Failed to remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + + # Wait for remove brick to complete + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + bricks_list_to_remove) + self.assertTrue( + ret, ("Remove brick is not yet complete on the volume " + "%s", self.volname)) + + # Commit the remove brick operation + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="commit") + self.assertEqual( + ret, 0, ("Failed to commit remove bricks %s from the volume" + " %s" % (bricks_list_to_remove, self.volname))) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Start remove brick on third subvol bricks + bricks_list_to_remove = subvols_list['volume_subvols'][2] + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="start") + self.assertEqual(ret, 0, ("Failed to remove bricks %s from " + "the volume %s" % ( + bricks_list_to_remove, self.volname))) + + # Wait for remove brick to complete + ret = wait_for_remove_brick_to_complete(self.mnode, self.volname, + bricks_list_to_remove) + self.assertTrue( + ret, ("Remove brick is not yet complete on the volume " + "%s" % self.volname)) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Commit the remove brick operation + ret, _, _ = remove_brick( + self.mnode, self.volname, bricks_list_to_remove, option="commit") + self.assertEqual( + ret, 0, ("Failed to commit remove bricks %s from the volume " + "%s" % (bricks_list_to_remove, self.volname))) + g.log.info("Remove brick is successfully complete on the volume %s", + self.volname) + + # Log volume info and status + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed " + "on volume %s" % self.volname)) + g.log.info("Successful in logging volume info and status " + "of volume %s", self.volname) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.io_validation_complete = True + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Write some files on the mount point + cmd1 = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;" + "do touch file$i; done" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd1) + self.assertEqual(ret, 0, ("Write operation failed on client " + "%s " % self.mounts[0].client_system)) + g.log.info("Writes on mount point successful") + + # Perform read operation on mountpoint + cmd2 = ("cd %s; ls -lRt;" % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd2) + self.assertEqual(ret, 0, ("Read operation failed on client " + "%s " % self.mounts[0].client_system)) + g.log.info("Read on mount point successful") + + def tearDown(self): + # Wait for IO to complete if io validation is not executed in the + # test method + if not self.io_validation_complete: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/disperse/test_ec_replace_brick.py b/tests/functional/disperse/test_ec_replace_brick.py new file mode 100644 index 000000000..b695cc03d --- /dev/null +++ b/tests/functional/disperse/test_ec_replace_brick.py @@ -0,0 +1,373 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Tests replace brick on an EC volume +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (collect_mounts_arequal, + validate_io_procs) +from glustolibs.gluster.brick_libs import (get_all_bricks, + wait_for_bricks_to_be_online, + are_bricks_online) +from glustolibs.gluster.volume_libs import replace_brick_from_volume +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcBrickReplace(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path1 = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + cls.script_upload_path2 = ("/usr/share/glustolibs/io/scripts/" + "fd_writes.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path1, + cls.script_upload_path2]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + If test method failed before validating IO, tearDown waits for the + IO's to complete and checks for the IO exit status + Cleanup and umount volume + """ + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_ec_replace_brick(self): + """ + - Start resource consumption tool + - Create directory dir1 + - Create 5 directory and 5 files in dir of mountpoint + - Rename all files inside dir1 at mountpoint + - Create softlink and hardlink of files in dir1 of mountpoint + - Delete op for deleting all file in one of the dirs inside dir1 + - Change chmod, chown, chgrp + - Create tiny, small, medium and large file + - Get arequal before replacing brick + - Replace brick + - Get arequal after replacing brick + - Compare Arequal's + - Create IO's + - Replace brick while IO's are going on + - Validating IO's and waiting for it to complete + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Starting resource consumption using top + log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log' + cmd = ("for i in {1..20};do top -n 1 -b|egrep " + "'RES|gluster' & free -h 2>&1 >> %s ;" + "sleep 10;done" % (log_file_mem_monitor)) + g.log.info(cmd) + cmd_list_procs = [] + for server in self.servers: + proc = g.run_async(server, cmd) + cmd_list_procs.append(proc) + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME; cd ~;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint. Start is an + # offset to be added to dirname to act on diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME; cd ~;' + 'done;' + % (mount_obj.mountpoint, str(start + 1))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + start += 5 + + # chmod, chown, chgrp inside dir1 + # start and end used as offset to access diff files + # at diff clients. + start, end = 2, 5 + for mount_obj in self.mounts: + dir_file_range = '%s..%s' % (str(start), str(end)) + cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing mode of files has failed") + g.log.info("Mode of files have been changed successfully") + + cmd = ('chown root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing owner of files has failed") + g.log.info("Owner of files have been changed successfully") + + cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' + % (mount_obj.mountpoint, dir_file_range, dir_file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Changing group of files has failed") + g.log.info("Group of files have been changed successfully") + start += 5 + end += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Get arequal before replacing brick + ret, result_before_replacing_brick = ( + collect_mounts_arequal(self.mounts[0])) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting arequal before replacing of brick ' + 'is successful') + + # Replacing a brick of random choice + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info) + self.assertTrue(ret, "Unexpected:Replace brick is not successful") + g.log.info("Expected : Replace brick is successful") + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Unexpected:Bricks are not online") + g.log.info("Expected : Bricks are online") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Unexpected:Heal has not yet completed') + g.log.info('Heal has completed successfully') + + # Check if bricks are online + all_bricks = get_all_bricks(self.mnode, self.volname) + ret = are_bricks_online(self.mnode, self.volname, all_bricks) + self.assertTrue(ret, 'Unexpected:All bricks are not online') + g.log.info('All bricks are online') + + # Get areequal after replacing brick + ret, result_after_replacing_brick = ( + collect_mounts_arequal(self.mounts[0])) + self.assertTrue(ret, 'Failed to get arequal') + g.log.info('Getting areequal after replacing of brick ' + 'is successful') + + # Comparing arequals + self.assertEqual(result_before_replacing_brick, + result_after_replacing_brick, + 'Arequals are not equals before replacing ' + 'brick and after replacing brick') + g.log.info('Arequals are equals before replacing brick ' + 'and after replacing brick') + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 2 " + "--dir-length 10 " + "--max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path1, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Replacing a brick while IO's are going on + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info) + self.assertTrue(ret, "Unexpected:Replace brick is not successful") + g.log.info("Expected : Replace brick is successful") + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Unexpected:Bricks are not online") + g.log.info("Expected : Bricks are online") + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all io's") + + # Create 2 directories and start IO's which opens FD + ret = mkdir(self.mounts[0].client_system, "%s/count{1..2}" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create directories") + g.log.info("Directories created on %s successfully", self.mounts[0]) + + all_fd_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("cd %s ;/usr/bin/env python %s -n 10 -t 120 " + "-d 5 -c 16 --dir count%s" % ( + mount_obj.mountpoint, + self.script_upload_path2, count)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_fd_procs.append(proc) + count += 1 + + # Replacing a brick while open FD IO's are going on + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info) + self.assertTrue(ret, "Unexpected:Replace brick is not successful") + g.log.info("Expected : Replace brick is successful") + + # Wait for brick to come online + ret = wait_for_bricks_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Unexpected:Bricks are not online") + g.log.info("Expected : Bricks are online") + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_fd_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all io's") + + # Close connection and check file exist for memory log + ret = file_exists(self.mnode, + '/var/log/glusterfs/mem_usage.log') + self.assertTrue(ret, "Unexpected:Memory log file does " + "not exist") + g.log.info("Memory log file exists") + for proc in cmd_list_procs: + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Memory logging failed") + g.log.info("Memory logging is successful") diff --git a/tests/functional/disperse/test_ec_replace_brick_after_add_brick.py b/tests/functional/disperse/test_ec_replace_brick_after_add_brick.py new file mode 100644 index 000000000..8aae20b0b --- /dev/null +++ b/tests/functional/disperse/test_ec_replace_brick_after_add_brick.py @@ -0,0 +1,168 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.glusterfile import (remove_file, + occurences_of_pattern_in_file) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.gluster.volume_libs import (replace_brick_from_volume, + expand_volume) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) + + +@runs_on([['dispersed'], ['glusterfs']]) +class TestEcReplaceBrickAfterAddBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ( + "/usr/share/glustolibs/io/scripts/file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients {}". + format(cls.clients)) + + @classmethod + def tearDownClass(cls): + for each_client in cls.clients: + ret = remove_file(each_client, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to delete file {}". + format(cls.script_upload_path)) + cls.get_super_method(cls, 'tearDownClass')() + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume(self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + if self.all_mounts_procs: + ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts) + if ret: + raise ExecutionError( + "Wait for IO completion failed on some of the clients") + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume(self.mounts): + raise ExecutionError("Unable to unmount and cleanup volume") + + self.get_super_method(self, 'tearDown')() + + def test_ec_replace_brick_after_add_brick(self): + """ + Test Steps: + 1. Create a pure-ec volume (say 1x(4+2)) + 2. Mount volume on two clients + 3. Create some files and dirs from both mnts + 4. Add bricks in this case the (4+2) ie 6 bricks + 5. Create a new dir(common_dir) and in that directory create a distinct + directory(using hostname as dirname) for each client and pump IOs + from the clients(dd) + 6. While IOs are in progress replace any of the bricks + 7. Check for errors if any collected after step 6 + """ + # pylint: disable=unsubscriptable-object,too-many-locals + all_bricks = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(all_bricks, "Unable to get the bricks from the {}" + " volume".format(self.volname)) + + self.all_mounts_procs = [] + for count, mount_obj in enumerate(self.mounts): + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 3 --dir-length 5 " + "--max-num-of-dirs 5 --num-of-files 5 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on the mounts") + self.all_mounts_procs *= 0 + + # Expand the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Expanding volume failed") + + # Create a new dir(common_dir) on mountpoint + common_dir = self.mounts[0].mountpoint + "/common_dir" + ret = mkdir(self.mounts[0].client_system, common_dir) + self.assertTrue(ret, "Directory creation failed") + + # Create distinct directory for each client under common_dir + distinct_dir = common_dir + "/$HOSTNAME" + for each_client in self.clients: + ret = mkdir(each_client, distinct_dir) + self.assertTrue(ret, "Directory creation failed") + + # Run dd in the background and stdout,stderr to error.txt for + # validating any errors after io completion. + run_dd_cmd = ("cd {}; for i in `seq 1 1000`; do dd if=/dev/urandom " + "of=file$i bs=4096 count=10 &>> error.txt; done". + format(distinct_dir)) + for each_client in self.clients: + proc = g.run_async(each_client, run_dd_cmd) + self.all_mounts_procs.append(proc) + + # Get random brick from the bricks + brick_to_replace = choice(all_bricks) + node_from_brick_replace, _ = brick_to_replace.split(":") + + # Replace brick from the same node + servers_info_of_replaced_node = {} + servers_info_of_replaced_node[node_from_brick_replace] = ( + self.all_servers_info[node_from_brick_replace]) + + ret = replace_brick_from_volume(self.mnode, self.volname, + node_from_brick_replace, + servers_info_of_replaced_node, + src_brick=brick_to_replace) + self.assertTrue(ret, "Replace brick failed") + + self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on the mounts") + self.all_mounts_procs *= 0 + + err_msg = "Too many levels of symbolic links" + dd_log_file = distinct_dir + "/error.txt" + for each_client in self.clients: + ret = occurences_of_pattern_in_file(each_client, err_msg, + dd_log_file) + self.assertEqual(ret, 0, "Either file {} doesn't exist or {} " + "messages seen while replace brick operation " + "in-progress".format(dd_log_file, err_msg)) + + self.assertTrue(monitor_heal_completion(self.mnode, self.volname), + "Heal failed on the volume {}".format(self.volname)) diff --git a/tests/functional/disperse/test_ec_truncate_file_with_brick_down.py b/tests/functional/disperse/test_ec_truncate_file_with_brick_down.py new file mode 100755 index 000000000..ac9db90fa --- /dev/null +++ b/tests/functional/disperse/test_ec_truncate_file_with_brick_down.py @@ -0,0 +1,145 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import sample +import time + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.gluster.lib_utils import is_core_file_created +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + are_bricks_offline, + are_bricks_online) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.misc.misc_libs import reboot_nodes_and_wait_to_come_online + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs', 'nfs']]) +class TestEcTruncateFileWithBrickDown(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup volume and mount it on three clients. + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Unmount and cleanup the volume + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Unable to unmount and cleanup volume") + + def test_ec_truncate_file_with_brick_down(self): + """ + Test steps: + 1. Create a volume, start and mount it on a client + 2. Bring down redundant bricks in the subvol + 3. Create a file on the volume using "touch" + 4. Truncate the file using "O_TRUNC" + 5. Bring the brick online + 6. Write data on the file and wait for heal completion + 7. Check for crashes and coredumps + """ + # pylint: disable=unsubscriptable-object + for restart_type in ("volume_start", "node_reboot"): + # Time stamp from mnode for checking cores at the end of test + ret, test_timestamp, _ = g.run(self.mnode, "date +%s") + self.assertEqual(ret, 0, "date command failed") + test_timestamp = test_timestamp.strip() + + # Create a file using touch + file_name = self.mounts[0].mountpoint + "/test_1" + ret, _, err = g.run(self.mounts[0].client_system, "touch {}". + format(file_name)) + self.assertEqual(ret, 0, "File creation failed") + g.log.info("File Created successfully") + + # List two bricks in each subvol + subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] + bricks_to_bring_offline = [] + for subvol in subvols: + self.assertTrue(subvol, "List is empty") + bricks_to_bring_offline.extend(sample(subvol, 2)) + + # Bring two bricks of each subvol offline + ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) + self.assertTrue(ret, "Bricks are still online") + + # Validating the bricks are offline or not + ret = are_bricks_offline(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Few of the bricks are still online in" + " {} in".format(bricks_to_bring_offline)) + + # Truncate the file + cmd = ( + 'python -c "import os, sys; fd = os.open(\'{}\', os.O_TRUNC )' + '; os.close( fd )"').format(file_name) + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info("File truncated successfully") + + # Bring back the bricks online + if restart_type == "volume_start": + # Bring back bricks online by volume start + ret, _, err = volume_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, err) + g.log.info("All bricks are online") + elif restart_type == "node_reboot": + # Bring back the bricks online by node restart + for brick in bricks_to_bring_offline: + node_to_reboot = brick.split(":")[0] + ret = reboot_nodes_and_wait_to_come_online(node_to_reboot) + self.assertTrue(ret, "Reboot Failed on node: " + "{}".format(node_to_reboot)) + g.log.info("Node: %s rebooted successfully", + node_to_reboot) + time.sleep(60) + + # Check whether bricks are online or not + ret = are_bricks_online(self.mnode, self.volname, + bricks_to_bring_offline) + self.assertTrue(ret, "Bricks {} are still offline". + format(bricks_to_bring_offline)) + + # write data to the file + cmd = ('python -c "import os, sys;fd = os.open(\'{}\', ' + 'os.O_RDWR) ;' + 'os.write(fd, \'This is test after truncate\'.encode());' + ' os.close(fd)"').format(file_name) + + ret, _, err = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, err) + g.log.info("Data written successfully on to the file") + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, "Heal pending for file {}".format(file_name)) + + # check for any crashes on servers and client + for nodes in (self.servers, [self.clients[0]]): + ret = is_core_file_created(nodes, test_timestamp) + self.assertTrue(ret, + "Cores found on the {} nodes".format(nodes)) diff --git a/tests/functional/disperse/test_ec_uss_snapshot.py b/tests/functional/disperse/test_ec_uss_snapshot.py new file mode 100644 index 000000000..fec1754f6 --- /dev/null +++ b/tests/functional/disperse/test_ec_uss_snapshot.py @@ -0,0 +1,328 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Test Description: + Test USS and snapshot on an EC volume +""" + +from unittest import skip +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.snap_ops import (snap_create, + snap_activate, + snap_delete, + snap_delete_all) +from glustolibs.gluster.glusterfile import file_exists +from glustolibs.gluster.uss_ops import (enable_uss, + uss_list_snaps, + disable_uss) +from glustolibs.gluster.glusterdir import mkdir + + +@runs_on([['dispersed', 'distributed-dispersed'], + ['glusterfs']]) +class TestEcUssSnapshot(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + self.io_validation_complete = False + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts, + volume_create_force=False) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def tearDown(self): + """ + Delete all snapshots and disable uss + Cleanup and umount volume + """ + # Deleting all snapshot + ret, _, _ = snap_delete_all(self.mnode) + if ret: + raise ExecutionError("Snapshot Delete Failed") + g.log.info("Successfully deleted all snapshots") + + # Disable uss for volume + ret, _, _ = disable_uss(self.mnode, self.volname) + if ret: + raise ExecutionError("Failed to disable uss") + g.log.info("Successfully disabled uss for volume" + "%s", self.volname) + + # Cleanup and umount volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + @skip('Skipping due to Bug 1828820') + def test_ec_uss_snapshot(self): + """ + - Start resource consumption tool + - Create directory dir1 + - Create 5 directory and 5 files in dir of mountpoint + - Rename all files inside dir1 at mountpoint + - Create softlink and hardlink of files in dir1 of mountpoint + - Delete op for deleting all file in one of the dirs inside dir1 + - Create tiny, small, medium and large file + - Create IO's + - Enable USS + - Create a Snapshot + - Activate Snapshot + - List snapshot and the contents inside snapshot + - Delete Snapshot + - Create Snapshot with same name + - Activate Snapshot + - List snapshot and the contents inside snapshot + - Validating IO's and waiting for it to complete + - Close connection and check file exist for memory log + """ + # pylint: disable=too-many-branches,too-many-statements,too-many-locals + # Starting resource consumption using top + log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log' + cmd = ("for i in {1..20};do top -n 1 -b|egrep " + "'RES|gluster' & free -h 2>&1 >> %s ;" + "sleep 10;done" % (log_file_mem_monitor)) + g.log.info(cmd) + cmd_list_procs = [] + for server in self.servers: + proc = g.run_async(server, cmd) + cmd_list_procs.append(proc) + + # Creating dir1 + ret = mkdir(self.mounts[0].client_system, "%s/dir1" + % self.mounts[0].mountpoint) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("Directory dir1 on %s created successfully", self.mounts[0]) + + # Create 5 dir and 5 files in each dir at mountpoint on dir1 + start, end = 1, 5 + for mount_obj in self.mounts: + # Number of dir and files to be created. + dir_range = ("%s..%s" % (str(start), str(end))) + file_range = ("%s..%s" % (str(start), str(end))) + # Create dir 1-5 at mountpoint. + ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}" + % (mount_obj.mountpoint, dir_range)) + self.assertTrue(ret, "Failed to create directory") + g.log.info("Directory created successfully") + + # Create files inside each dir. + cmd = ('touch %s/dir1/dir{%s}/file{%s};' + % (mount_obj.mountpoint, dir_range, file_range)) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "File creation failed") + g.log.info("File created successfull") + + # Increment counter so that at next client dir and files are made + # with diff offset. Like at next client dir will be named + # dir6, dir7...dir10. Same with files. + start += 5 + end += 5 + + # Rename all files inside dir1 at mountpoint on dir1 + cmd = ('cd %s/dir1/dir1/; ' + 'for FILENAME in *;' + 'do mv $FILENAME Unix_$FILENAME;' + 'done;' + % self.mounts[0].mountpoint) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to rename file on " + "client") + g.log.info("Successfully renamed file on client") + + # Truncate at any dir in mountpoint inside dir1 + # start is an offset to be added to dirname to act on + # diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s/; ' + 'for FILENAME in *;' + 'do echo > $FILENAME;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Truncate failed") + g.log.info("Truncate of files successfull") + + # Create softlink and hardlink of files in mountpoint. Start is an + # offset to be added to dirname to act on diff files at diff clients. + start = 1 + for mount_obj in self.mounts: + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln -s $FILENAME softlink_$FILENAME;' + 'done;' + % (mount_obj.mountpoint, str(start))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Softlinks have failed") + g.log.info("Softlink of files have been changed successfully") + + cmd = ('cd %s/dir1/dir%s; ' + 'for FILENAME in *; ' + 'do ln $FILENAME hardlink_$FILENAME;' + 'done;' + % (mount_obj.mountpoint, str(start + 1))) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Creating Hardlinks have failed") + g.log.info("Hardlink of files have been changed successfully") + start += 5 + + # Create tiny, small, medium and large file + # at mountpoint. Offset to differ filenames + # at diff clients. + offset = 1 + for mount_obj in self.mounts: + cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for tiny files failed") + g.log.info("Fallocate for tiny files successfully") + + cmd = 'fallocate -l 20M small_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for small files failed") + g.log.info("Fallocate for small files successfully") + + cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for medium files failed") + g.log.info("Fallocate for medium files successfully") + + cmd = 'fallocate -l 1G large_file%s.txt' % str(offset) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, "Fallocate for large files failed") + g.log.info("Fallocate for large files successfully") + offset += 1 + + # Creating files on client side for dir1 + # Write IO + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 2 " + "--dir-length 10 " + "--max-num-of-dirs 5 " + "--num-of-files 5 %s/dir1" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Enable USS + ret, _, _ = enable_uss(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to enable USS on volume") + g.log.info("Successfully enabled USS on volume") + + # Create Snapshot + ret, _, _ = snap_create(self.mnode, self.volname, + "ec_snap", timestamp=False) + self.assertEqual(ret, 0, "Failed to create snapshot ec_snap") + g.log.info("Snapshot ec_snap of volume %s created" + "successfully.", self.volname) + + # Activate snapshot + ret, _, _ = snap_activate(self.mnode, "ec_snap") + self.assertEqual(ret, 0, "Failed to activate snapshot ec_snap") + g.log.info("Snapshot activated successfully") + + # List contents inside snaphot and wait before listing + sleep(5) + for mount_obj in self.mounts: + ret, out, _ = uss_list_snaps(mount_obj.client_system, + mount_obj.mountpoint) + self.assertEqual(ret, 0, "Directory Listing Failed for" + " Activated Snapshot") + self.assertIn("ec_snap", out.split("\n"), "Failed to " + "validate ec_snap under .snaps directory") + g.log.info("Activated Snapshot listed Successfully") + + # Delete Snapshot ec_snap + ret, _, _ = snap_delete(self.mnode, "ec_snap") + self.assertEqual(ret, 0, "Failed to delete snapshot") + g.log.info("Snapshot deleted Successfully") + + # Creating snapshot with the same name + ret, _, _ = snap_create(self.mnode, self.volname, + "ec_snap", timestamp=False) + self.assertEqual(ret, 0, "Failed to create snapshot ec_snap") + g.log.info("Snapshot ec_snap of volume %s created" + "successfully.", self.volname) + + # Activate snapshot ec_snap + ret, _, _ = snap_activate(self.mnode, "ec_snap") + self.assertEqual(ret, 0, "Failed to activate snapshot ec_snap") + g.log.info("Snapshot activated successfully") + + # List contents inside ec_snap and wait before listing + sleep(5) + for mount_obj in self.mounts: + ret, out, _ = uss_list_snaps(mount_obj.client_system, + mount_obj.mountpoint) + self.assertEqual(ret, 0, "Directory Listing Failed for" + " Activated Snapshot") + self.assertIn("ec_snap", out.split('\n'), "Failed to " + "validate ec_snap under .snaps directory") + g.log.info("Activated Snapshot listed Successfully") + + # Validating IO's and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all io's") + + # Close connection and check file exist for memory log + ret = file_exists(self.mnode, + '/var/log/glusterfs/mem_usage.log') + self.assertTrue(ret, "Unexpected:Memory log file does " + "not exist") + g.log.info("Memory log file exists") + for proc in cmd_list_procs: + ret, _, _ = proc.async_communicate() + self.assertEqual(ret, 0, "Memory logging failed") + g.log.info("Memory logging is successful") diff --git a/tests/functional/disperse/test_no_fresh_lookup_on_directory.py b/tests/functional/disperse/test_no_fresh_lookup_on_directory.py new file mode 100644 index 000000000..7be18b4e1 --- /dev/null +++ b/tests/functional/disperse/test_no_fresh_lookup_on_directory.py @@ -0,0 +1,183 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.brickdir import file_exists +from glustolibs.gluster.mount_ops import mount_volume, umount_volume +from glustolibs.gluster.volume_libs import set_volume_options, get_subvols +from glustolibs.gluster.glusterfile import occurences_of_pattern_in_file +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + get_online_bricks_list) +from glustolibs.gluster.volume_ops import volume_start +from glustolibs.gluster.volume_libs import wait_for_volume_process_to_be_online + + +@runs_on([['distributed-dispersed', 'distributed-replicated', + 'distributed-arbiter'], ['glusterfs']]) +class TestNoFreshLookUpBrickDown(GlusterBaseClass): + + def setUp(self): + """ + setUp method for every test + """ + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setting client log-level to Debug + self.volume['options'] = {'diagnostics.client-log-level': 'DEBUG'} + + # Creating Volume and mounting + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" % self.volname) + g.log.info("Volume is created and started") + + def tearDown(self): + """ + tearDown method for every test + """ + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + ret = umount_volume(mclient=self.mounts[0].client_system, + mpoint=self.mountpoint) + if not ret: + raise ExecutionError("Unable to umount the volume") + g.log.info("Unmounting of the volume %s succeeded", self.volname) + + # Resetting the volume option set in the setup + ret = set_volume_options(self.mnode, self.volname, + {'diagnostics.client-log-level': 'INFO'}) + if not ret: + raise ExecutionError("Unable to set the client log level to INFO") + g.log.info("Volume option is set successfully.") + + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Unable to perform volume clenaup") + g.log.info("Volume cleanup is successfull") + + def do_lookup(self, dirname): + """ + Performes a look up on the directory. + """ + ret = file_exists(self.mounts[0].client_system, dirname) + self.assertTrue(ret, "Directory %s doesn't exists " % dirname) + g.log.info("Directory present on the %s", + self.mounts[0].client_system) + + def match_occurences(self, first_count, search_pattern, filename): + """ + Validating the count of the search pattern before and after + lookup. + """ + newcount = occurences_of_pattern_in_file(self.mounts[0].client_system, + search_pattern, filename) + self.assertEqual(first_count, newcount, "Failed: The lookups logged" + " for the directory <dirname> are more than expected") + g.log.info("No more new lookups for the dir1") + + def test_no_fresh_lookup(self): + """ + The testcase covers negative lookup of a directory in distributed- + replicated and distributed-dispersed volumes + 1. Mount the volume on one client. + 2. Create a directory + 3. Validate the number of lookups for the directory creation from the + log file. + 4. Perform a new lookup of the directory + 5. No new lookups should have happened on the directory, validate from + the log file. + 6. Bring down one subvol of the volume and repeat step 4, 5 + 7. Bring down one brick from the online bricks and repeat step 4, 5 + 8. Start the volume with force and wait for all process to be online. + """ + + # Mounting the volume on a distinct directory for the validation of + # testcase + self.mountpoint = "/mnt/" + self.volname + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname) + g.log.info("Volume mounted successfully : %s", self.volname) + + # Distinct log file for the validation of this test + filename = "/var/log/glusterfs/mnt-" + self.volname + ".log" + # Creating a dir on the mount point. + dirname = self.mountpoint + "/dir1" + ret = mkdir(host=self.mounts[0].client_system, fqpath=dirname) + self.assertTrue(ret, "Failed to create dir1") + g.log.info("dir1 created successfully for %s", + self.mounts[0].client_system) + + search_pattern = "/dir1: Calling fresh lookup" + + # Check log file for the pattern in the log file + first_count = occurences_of_pattern_in_file( + self.mounts[0].client_system, search_pattern, filename) + self.assertGreater(first_count, 0, "Unable to find " + "pattern in the given file") + g.log.info("Searched for the pattern in the log file successfully") + + # Perform a lookup of the directory dir1 + self.do_lookup(dirname) + + # Recheck for the number of lookups from the log file + self.match_occurences(first_count, search_pattern, filename) + + # Bring down one subvol of the volume + ret = get_subvols(self.mnode, self.volname) + brick_list = choice(ret['volume_subvols']) + ret = bring_bricks_offline(self.volname, brick_list) + self.assertTrue(ret, "Unable to bring the given bricks offline") + g.log.info("Able to bring all the bricks in the subvol offline") + + # Do a lookup on the mountpoint for the directory dir1 + self.do_lookup(dirname) + + # Re-check the number of occurences of lookup + self.match_occurences(first_count, search_pattern, filename) + + # From the online bricks, bring down one brick + online_bricks = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(online_bricks, "Unable to fetch online bricks") + g.log.info("Able to fetch the online bricks") + offline_brick = choice(online_bricks) + ret = bring_bricks_offline(self.volname, [offline_brick]) + self.assertTrue(ret, "Unable to bring the brick %s offline " % + offline_brick) + g.log.info("Successfully brought the brick %s offline", offline_brick) + + # Do a lookup on the mounpoint and check for new lookups from the log + self.do_lookup(dirname) + self.match_occurences(first_count, search_pattern, filename) + + # Start volume with force + ret, _, err = volume_start(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, "Unable to force start the volume %s " % err) + g.log.info("Volume started successfully") + + # Wait for all the processess to be online. + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, "Some processes are offline") + g.log.info("All processes of the volume") diff --git a/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py b/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py index 7d8aad0e0..0e0a58842 100644 --- a/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py +++ b/tests/functional/glusterd/test_add_brick_when_quorum_not_met.py @@ -20,6 +20,7 @@ from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.volume_libs import setup_volume from glustolibs.gluster.volume_ops import (set_volume_options, + volume_reset, get_volume_status) from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd, is_glusterd_running) @@ -62,13 +63,12 @@ class TestAddBrickWhenQuorumNotMet(GlusterBaseClass): % self.volname) g.log.info("Volume deleted successfully : %s", self.volname) - # Setting quorum ratio to 51% - ret = set_volume_options(self.mnode, 'all', - {'cluster.server-quorum-ratio': '51%'}) + # Reset Cluster options + ret = volume_reset(self.mnode, 'all') if not ret: - raise ExecutionError("Failed to set server quorum ratio on %s" + raise ExecutionError("Failed to reset cluster options on %s" % self.volname) - g.log.info("Able to set server quorum ratio successfully on %s", + g.log.info("Cluster options reset successfully on %s", self.servers) self.get_super_method(self, 'tearDown')() @@ -100,7 +100,7 @@ class TestAddBrickWhenQuorumNotMet(GlusterBaseClass): # bring down glusterd of half nodes num_of_servers = len(self.servers) - num_of_nodes_to_bring_down = num_of_servers/2 + num_of_nodes_to_bring_down = num_of_servers//2 for node in range(num_of_nodes_to_bring_down, num_of_servers): ret = stop_glusterd(self.servers[node]) diff --git a/tests/functional/glusterd/test_add_identical_brick_new_node.py b/tests/functional/glusterd/test_add_identical_brick_new_node.py index ce643fbae..849894943 100644 --- a/tests/functional/glusterd/test_add_identical_brick_new_node.py +++ b/tests/functional/glusterd/test_add_identical_brick_new_node.py @@ -115,8 +115,14 @@ class TestAddIdenticalBrick(GlusterBaseClass): # Replace just host IP to create identical brick add_bricks = [] - add_bricks.append(string.replace(bricks_list[0], - self.servers[0], self.servers[1])) + try: + add_bricks.append(string.replace(bricks_list[0], + self.servers[0], + self.servers[1])) + except AttributeError: + add_bricks.append(str.replace(bricks_list[0], + self.servers[0], + self.servers[1])) ret, _, _ = add_brick(self.mnode, self.volname, add_bricks) self.assertEqual(ret, 0, "Failed to add the bricks to the volume") g.log.info("Successfully added bricks to volume %s", add_bricks[0]) diff --git a/tests/functional/glusterd/test_brick_port_after_stop_glusterd_modify_volume.py b/tests/functional/glusterd/test_brick_port_after_stop_glusterd_modify_volume.py index 7d56abd83..865230d1a 100644 --- a/tests/functional/glusterd/test_brick_port_after_stop_glusterd_modify_volume.py +++ b/tests/functional/glusterd/test_brick_port_after_stop_glusterd_modify_volume.py @@ -13,6 +13,7 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from time import sleep from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on @@ -158,6 +159,8 @@ class TestBrickPortAfterModifyVolume(GlusterBaseClass): self.assertTrue(ret, "glusterd is not connected %s with peer %s" % (self.servers[0], self.servers[1])) + # Waiting for 5 sec so that the brick will get port + sleep(5) vol_status = get_volume_status(self.mnode, self.volname) self.assertIsNotNone(vol_status, "Failed to get volume " "status for %s" % self.volname) diff --git a/tests/functional/glusterd/test_change_reserve_limit_to_wrong_values.py b/tests/functional/glusterd/test_change_reserve_limit_to_wrong_values.py new file mode 100644 index 000000000..334639e7c --- /dev/null +++ b/tests/functional/glusterd/test_change_reserve_limit_to_wrong_values.py @@ -0,0 +1,80 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import string +from random import choice + +from glusto.core import Glusto as g + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestChangeReserveLimit(GlusterBaseClass): + """ + Test to validate behaviour of 'storage.reserve' option on supplying + erroneous values. + """ + def setUp(self): + self.get_super_method(self, 'setUp')() + ret = self.setup_volume() + if not ret: + raise ExecutionError("Failed to create the volume") + g.log.info("Created volume successfully") + + def tearDown(self): + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup the volume") + g.log.info("Successfully cleaned the volume") + self.get_super_method(self, 'tearDown')() + + @staticmethod + def get_random_string(chars, str_len=4): + return ''.join((choice(chars) for _ in range(str_len))) + + def test_change_reserve_limit_to_wrong_value(self): + """ + Test Case: + 1) Create and start a distributed-replicated volume. + 2) Give different inputs to the storage.reserve volume set options + 3) Validate the command behaviour on wrong inputs + """ + + # Creation of random data for storage.reserve volume option + # Data has: alphabets, numbers, punctuations and their combinations + key = 'storage.reserve' + + for char_type in (string.ascii_letters, string.punctuation, + string.printable): + + # Remove quotes from the generated string + temp_val = self.get_random_string(char_type) + temp_val = temp_val.replace("'", "").replace("&", "") + value = "'{}'".format(temp_val) + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertFalse( + ret, "Unexpected: Erroneous value {}, to option " + "{} should result in failure".format(value, key)) + + # Passing an out of range value + for value in ('-1%', '-101%', '101%', '-1', '-101'): + ret = set_volume_options(self.mnode, self.volname, {key: value}) + self.assertFalse( + ret, "Unexpected: Erroneous value {}, to option " + "{} should result in failure".format(value, key)) diff --git a/tests/functional/glusterd/test_default_log_level_of_cli.py b/tests/functional/glusterd/test_default_log_level_of_cli.py new file mode 100644 index 000000000..76ad06d66 --- /dev/null +++ b/tests/functional/glusterd/test_default_log_level_of_cli.py @@ -0,0 +1,97 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test to check that default log level of CLI should be INFO +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import (volume_start, volume_status, + volume_info, volume_stop) + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestDefaultLogLevelOfCLI(GlusterBaseClass): + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Creating and starting the volume + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation/start operation" + " failed: %s" % self.volname) + g.log.info("Volme created and started successfully : %s", self.volname) + + def tearDown(self): + # Stopping the volume and Cleaning up the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Volume deleted successfully : %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_default_log_level_of_cli(self): + """ + Test Case: + 1) Create and start a volume + 2) Run volume info command + 3) Run volume status command + 4) Run volume stop command + 5) Run volume start command + 6) Check the default log level of cli.log + """ + # Check volume info operation + ret, _, _ = volume_info(self.mnode) + self.assertEqual(ret, 0, "Failed to execute volume info" + " command on node: %s" % self.mnode) + g.log.info("Successfully executed the volume info command on" + " node: %s", self.mnode) + + # Check volume status operation + ret, _, _ = volume_status(self.mnode) + self.assertEqual(ret, 0, "Failed to execute volume status command" + " on node: %s" % self.mnode) + g.log.info("Successfully executed the volume status command" + " on node: %s", self.mnode) + + # Check volume stop operation + ret, _, _ = volume_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop the volume %s on node: %s" + % (self.volname, self.mnode)) + g.log.info("Successfully stopped the volume %s on node: %s", + self.volname, self.mnode) + + # Check volume start operation + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start the volume %s on node: %s" + % (self.volname, self.mnode)) + g.log.info("Successfully started the volume %s on node: %s", + self.volname, self.mnode) + + # Check the default log level of cli.log + cmd = 'cat /var/log/glusterfs/cli.log | grep -F "] D [" | wc -l' + ret, out, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to execute the command") + self.assertEqual(int(out), 0, "Unexpected: Default log level of " + "cli.log is not INFO") + g.log.info("Default log level of cli.log is INFO as expected") diff --git a/tests/functional/glusterd/test_default_max_bricks_per_process.py b/tests/functional/glusterd/test_default_max_bricks_per_process.py new file mode 100644 index 000000000..b20c1bccd --- /dev/null +++ b/tests/functional/glusterd/test_default_max_bricks_per_process.py @@ -0,0 +1,100 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Default max bricks per-process should be 250 +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import (get_volume_options, + reset_volume_option, + set_volume_options) + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestDefaultMaxBricksPerProcess(GlusterBaseClass): + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" + % self.volname) + g.log.info("Volume created successfully : %s", self.volname) + + def tearDown(self): + # Cleaning up the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup the volume %s" + % self.volname) + g.log.info("Volume deleted successfully: %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_default_max_bricks_per_process(self): + """ + Test Case: + 1) Create a volume and start it. + 2) Fetch the max bricks per process value + 3) Reset the volume options + 4) Fetch the max bricks per process value + 5) Compare the value fetched in last step with the initial value + 6) Enable brick-multiplexing in the cluster + 7) Fetch the max bricks per process value + 8) Compare the value fetched in last step with the initial value + """ + # Fetch the max bricks per process value + ret = get_volume_options(self.mnode, 'all') + self.assertIsNotNone(ret, "Failed to execute the volume get command") + initial_value = ret['cluster.max-bricks-per-process'] + g.log.info("Successfully fetched the max bricks per-process value") + + # Reset the volume options + ret, _, _ = reset_volume_option(self.mnode, 'all', 'all') + self.assertEqual(ret, 0, "Failed to reset the volumes") + g.log.info("Volumes reset was successful") + + # Fetch the max bricks per process value + ret = get_volume_options(self.mnode, 'all') + self.assertIsNotNone(ret, "Failed to execute the volume get command") + + # Comparing the values + second_value = ret['cluster.max-bricks-per-process'] + self.assertEqual(initial_value, second_value, "Unexpected: Max" + " bricks per-process value is not equal") + + # Enable brick-multiplex in the cluster + ret = set_volume_options(self.mnode, 'all', + {'cluster.brick-multiplex': 'enable'}) + self.assertTrue(ret, "Failed to enable brick-multiplex" + " for the cluster") + g.log.info("Successfully enabled brick-multiplex in the cluster") + + # Fetch the max bricks per process value + ret = get_volume_options(self.mnode, 'all') + self.assertIsNotNone(ret, "Failed to execute the volume get command") + + # Comparing the values + third_value = ret['cluster.max-bricks-per-process'] + self.assertEqual(initial_value, third_value, "Unexpected: Max bricks" + " per-process value is not equal") diff --git a/tests/functional/glusterd/test_default_ping_timer_and_epoll_thread_count.py b/tests/functional/glusterd/test_default_ping_timer_and_epoll_thread_count.py new file mode 100644 index 000000000..4ffe047d3 --- /dev/null +++ b/tests/functional/glusterd/test_default_ping_timer_and_epoll_thread_count.py @@ -0,0 +1,87 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Tests to check by default ping timer is disabled and epoll + thread count is 1 +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass + + +class TestPingTimerAndEpollThreadCountDefaultValue(GlusterBaseClass): + def tearDown(self): + # Remvoing the test script created during the test + cmd = "rm -f test.sh;" + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to remove the test script") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_ping_timer_disbaled_and_epoll_thread_count_default_value(self): + """ + Test Steps: + 1. Start glusterd + 2. Check ping timeout value in glusterd.vol should be 0 + 3. Create a test script for epoll thread count + 4. Source the test script + 5. Fetch the pid of glusterd + 6. Check epoll thread count of glusterd should be 1 + """ + # Fetch the ping timeout value from glusterd.vol file + cmd = "cat /etc/glusterfs/glusterd.vol | grep -i ping-timeout" + ret, out, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to get ping-timeout value from" + " glusterd.vol file") + + # Check if the default value is 0 + self.ping_value = out.split("ping-timeout") + self.ping_value[1] = (self.ping_value[1]).strip() + self.assertEqual(int(self.ping_value[1]), 0, "Unexpected: Default" + " value of ping-timeout is not 0") + + # Shell Script to be run for epoll thread count + script = """ + #!/bin/bash + function nepoll () + { + local pid=$1; + for i in $(ls /proc/$pid/task); + do + cat /proc/$pid/task/$i/stack | grep -i 'sys_epoll_wait'; + done + } + """ + + # Execute the shell script + cmd = "echo '{}' > test.sh;".format(script) + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to create the file with the script") + + # Fetch the pid of glusterd + cmd = "pidof glusterd" + ret, pidof_glusterd, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to get the pid of glusterd") + pidof_glusterd = int(pidof_glusterd) + + # Check the epoll thread count of glusterd + cmd = "source test.sh; nepoll %d | wc -l" % pidof_glusterd + ret, count, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to get the epoll thread count") + self.assertEqual(int(count), 1, "Unexpected: Default epoll thread" + "count is not 1") diff --git a/tests/functional/glusterd/test_enable_storage_reserve_volume.py b/tests/functional/glusterd/test_enable_storage_reserve_volume.py new file mode 100644 index 000000000..b930cad87 --- /dev/null +++ b/tests/functional/glusterd/test_enable_storage_reserve_volume.py @@ -0,0 +1,79 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + This test case is authored to test posix storage.reserve option. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestPosixStorageReserveOption(GlusterBaseClass): + def setUp(self): + """ + setUp method for every test. + """ + self.get_super_method(self, 'setUp')() + + # setup volume + ret = self.setup_volume() + if not ret: + raise ExecutionError("Failed to setup volume") + + def test_enable_storage_reserve_volume(self): + """ + 1) Create a distributed-replicated volume and start it. + 2) Enable storage.reserve option on the volume using below command, + gluster volume set storage.reserve. + let's say, set it to a value of 50. + 3) Mount the volume on a client + 4) check df -h output of the mount point and backend bricks. + """ + # Set volume option storage.reserve 50 + ret = set_volume_options( + self.mnode, self.volname, {"storage.reserve ": 50}) + self.assertTrue( + ret, "gluster volume set {} storage.reserve 50 Failed on server " + "{}".format(self.volname, self.mnode)) + # Mounting the volume on a client + ret = self.mount_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to mount volume") + + ret, out, _ = g.run( + self.clients[0], "df -h | grep -i '{}'".format( + self.mounts[0].mountpoint)) + self.assertFalse( + ret, "Failed to run cmd df -h on client {}".format + (self.clients[0])) + + self.assertTrue("51%" in out.split(" "), "51 % is not in list ") + + def tearDown(self): + """Tear Down callback""" + # Unmount volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + g.log.info("Successful in unmount and cleanup operations") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_get_state_on_brick_unmount.py b/tests/functional/glusterd/test_get_state_on_brick_unmount.py new file mode 100644 index 000000000..a2caef214 --- /dev/null +++ b/tests/functional/glusterd/test_get_state_on_brick_unmount.py @@ -0,0 +1,126 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import (setup_volume, cleanup_volume,) +from glustolibs.gluster.volume_ops import (get_gluster_state, get_volume_list) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.glusterfile import check_if_pattern_in_file + + +@runs_on([['distributed', 'replicated', + 'distributed-replicated', + 'dispersed', 'distributed-dispersed', + 'arbiter', 'distributed-arbiter'], []]) +class TestGetStateOnBrickUnmount(GlusterBaseClass): + """ + Tests to verify 'gluster get state' command on unmounting the brick from + an online volume + """ + + @classmethod + def setUpClass(cls): + + cls.get_super_method(cls, 'setUpClass')() + + ret = cls.validate_peers_are_connected() + if not ret: + raise ExecutionError("Failed to validate peers are in connected") + g.log.info("Successfully validated peers are in connected state") + + def tearDown(self): + + # Mount the bricks which are unmounted as part of test + if getattr(self, 'umount_host', None) and getattr(self, 'umount_brick', + None): + ret, _, _ = g.run(self.umount_host, 'mount -a') + if ret: + raise ExecutionError("Not able to mount unmounted brick on " + "{}".format(self.umount_host)) + + vol_list = get_volume_list(self.mnode) + if vol_list: + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if ret: + g.log.info("Volume deleted successfully %s", volume) + else: + raise ExecutionError( + "Not able to delete volume {}".format(volume)) + + self.get_super_method(self, 'tearDown')() + + def test_get_state_on_brick_unmount(self): + """ + Steps: + 1. Form a gluster cluster by peer probing and create a volume + 2. Unmount the brick using which the volume is created + 3. Run 'gluster get-state' and validate absence of error 'Failed to get + daemon state. Check glusterd log file for more details' + 4. Create another volume and start it using different bricks which are + not used to create above volume + 5. Run 'gluster get-state' and validate the absence of above error. + """ + # Setup Volume + ret = setup_volume(mnode=self.mnode, + all_servers_info=self.all_servers_info, + volume_config=self.volume, create_only=True) + self.assertTrue(ret, "Failed to setup volume {}".format(self.volname)) + g.log.info("Successful in setting up volume %s", self.volname) + + # Select one of the bricks in the volume to unmount + brick_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(brick_list, ("Not able to get list of bricks " + "of volume %s", self.volname)) + + select_brick = choice(brick_list) + self.umount_host, self.umount_brick = ( + select_brick[0:select_brick.rfind('/')].split(':')) + + # Verify mount entry in /etc/fstab + ret = check_if_pattern_in_file(self.umount_host, + self.umount_brick, '/etc/fstab') + self.assertEqual(ret, 0, "Fail: Brick mount entry is not" + " found in /etc/fstab of {}".format(self.umount_host)) + + # Unmount the selected brick + cmd = 'umount {}'.format(self.umount_brick) + ret, _, _ = g.run(self.umount_host, cmd) + self.assertEqual(0, ret, "Fail: Not able to unmount {} on " + "{}".format(self.umount_brick, self.umount_host)) + + # Run 'gluster get-state' and verify absence of any error + ret = get_gluster_state(self.mnode) + self.assertIsNotNone(ret, "Fail: 'gluster get-state' didn't dump the " + "state of glusterd when {} unmounted from " + "{}".format(self.umount_brick, self.umount_host)) + + # Create another volume + self.volume['name'] = 'second_volume' + ret = setup_volume(self.mnode, self.all_servers_info, self.volume) + self.assertTrue(ret, 'Failed to create and start volume') + g.log.info('Second volume created and started successfully') + + # Run 'gluster get-state' and verify absence of any error after + # creation of second-volume + ret = get_gluster_state(self.mnode) + self.assertIsNotNone(ret, "Fail: 'gluster get-state' didn't dump the " + "state of glusterd ") diff --git a/tests/functional/glusterd/test_getstate_shows_correct_brick_status_when_brick_killed.py b/tests/functional/glusterd/test_getstate_shows_correct_brick_status_when_brick_killed.py new file mode 100644 index 000000000..dff7aa9ef --- /dev/null +++ b/tests/functional/glusterd/test_getstate_shows_correct_brick_status_when_brick_killed.py @@ -0,0 +1,124 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) +from glustolibs.gluster.volume_ops import (volume_stop, + volume_start, + get_gluster_state) +from glustolibs.gluster.brick_libs import (get_offline_bricks_list, + bring_bricks_online, + get_online_bricks_list, + bring_bricks_offline) + + +@runs_on([['distributed-dispersed', 'replicated', 'arbiter', + 'distributed-replicated', 'distributed', 'dispersed', + 'distributed-arbiter'], + ['glusterfs']]) +class TestGetStateBrickStatus(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def test_validate_get_state(self): + """ + TestCase: + 1. Execute "gluster get-state" say on N1(Node1) + 2. Start one by one volume and check brick status in get-state output + 3. Make sure there are multiple glusterfsd on one node say N1 + Kill one glusterfsd (kill -9 <piod>) and check + 4. Execute "gluster get-state" on N1 + """ + # Stop Volume + ret, _, _ = volume_stop(self.mnode, self.volname, force=True) + self.assertEqual(ret, 0, ("Failed to stop the volume " + "%s", self.volname)) + + # Execute 'gluster get-state' on mnode + get_state_data = get_gluster_state(self.mnode) + self.assertIsNotNone(get_state_data, "Getting gluster state failed.") + + # Getting Brick 1 Status - It should be in Stopped State + brick_status = (get_state_data['Volumes'] + ['volume1.brick1.status'].strip()) + self.assertEqual(brick_status, "Stopped", + "The brick is not in Stopped State") + + # Start the volume and check the status of brick again + ret, _, _ = volume_start(self.mnode, self.volname, force=True) + self.assertFalse(ret, 'Failed to start volume %s with "force" option' + % self.volname) + + # Execute 'gluster get-state' on mnode + get_state_data = get_gluster_state(self.mnode) + self.assertIsNotNone(get_state_data, "Getting gluster state failed.") + # Getting Brick 1 Status - It should be in Started State + brick_status = (get_state_data['Volumes'] + ['volume1.brick1.status'].strip()) + self.assertEqual(brick_status, "Started", + "The brick is not in Started State") + + # Bringing the brick offline + vol_bricks = get_online_bricks_list(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, vol_bricks[0]) + self.assertTrue(ret, 'Failed to bring brick %s offline' % + vol_bricks[0]) + + # Execute 'gluster get-state' on mnode + get_state_data = get_gluster_state(self.mnode) + self.assertIsNotNone(get_state_data, "Getting gluster state failed.") + # Getting Brick 1 Status - It should be in Stopped State + brick_status = (get_state_data['Volumes'] + ['volume1.brick1.status'].strip()) + self.assertEqual(brick_status, "Stopped", + "The brick is not in Stopped State") + g.log.info("Brick 1 is in Stopped state as expected.") + + # Checking the server 2 for the status of Brick. + # It should be 'Started' state + node2 = self.servers[1] + get_state_data = get_gluster_state(node2) + self.assertIsNotNone(get_state_data, "Getting gluster state failed.") + # Getting Brick 2 Status - It should be in Started State + brick_status = (get_state_data['Volumes'] + ['volume1.brick2.status'].strip()) + self.assertEqual(brick_status, "Started", + "The brick is not in Started State") + g.log.info("Brick2 is in started state.") + + # Bringing back the offline brick + offline_brick = get_offline_bricks_list(self.mnode, self.volname) + ret = bring_bricks_online(self.mnode, self.volname, + offline_brick) + self.assertTrue(ret, 'Failed to bring brick %s online' % + offline_brick) + + def tearDown(self): + # stopping the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount Volume & Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") + + # calling GlusterBaseClass tearDownClass + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_gluster_detect_drop_of_outbound_traffic.py b/tests/functional/glusterd/test_gluster_detect_drop_of_outbound_traffic.py new file mode 100644 index 000000000..1a45d5c82 --- /dev/null +++ b/tests/functional/glusterd/test_gluster_detect_drop_of_outbound_traffic.py @@ -0,0 +1,115 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Gluster should detect drop of outbound traffic as network failure +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.peer_ops import nodes_from_pool_list, get_peer_status +from glustolibs.gluster.volume_ops import volume_status + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestGlusterDetectDropOfOutboundTrafficAsNetworkFailure(GlusterBaseClass): + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" + % self.volname) + g.log.info("Volume created successfully : %s", self.volname) + + def tearDown(self): + # Removing the status_err file and the iptable rule,if set previously + if self.iptablerule_set: + cmd = "iptables -D OUTPUT -p tcp -m tcp --dport 24007 -j DROP" + ret, _, _ = g.run(self.servers[1], cmd) + if ret: + raise ExecutionError("Failed to remove the iptable rule" + " for glusterd") + + # Cleaning up the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup the volume %s" + % self.volname) + g.log.info("Volume deleted successfully: %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_gluster_detect_drop_of_out_traffic_as_network_failure(self): + """ + Test Case: + 1) Create a volume and start it. + 2) Add an iptable rule to drop outbound glusterd traffic + 3) Check if the rule is added in iptables list + 4) Execute few Gluster CLI commands like volume status, peer status + 5) Gluster CLI commands should fail with suitable error message + """ + # Set iptablerule_set as false initially + self.iptablerule_set = False + + # Set iptable rule on one node to drop outbound glusterd traffic + cmd = "iptables -I OUTPUT -p tcp --dport 24007 -j DROP" + ret, _, _ = g.run(self.servers[1], cmd) + self.assertEqual(ret, 0, "Failed to set iptable rule on the node: %s" + % self.servers[1]) + g.log.info("Successfully added the rule to iptable") + + # Update iptablerule_set to true + self.iptablerule_set = True + + # Confirm if the iptable rule was added successfully + iptable_rule = "'OUTPUT -p tcp -m tcp --dport 24007 -j DROP'" + cmd = "iptables -S OUTPUT | grep %s" % iptable_rule + ret, _, _ = g.run(self.servers[1], cmd) + self.assertEqual(ret, 0, "Failed to get the rule from iptable") + + # Fetch number of nodes in the pool, except localhost + pool_list = nodes_from_pool_list(self.mnode) + peers_count = len(pool_list) - 1 + + # Gluster CLI commands should fail + # Check volume status command + ret, _, err = volume_status(self.servers[1]) + self.assertEqual(ret, 2, "Unexpected: gluster volume status command" + " did not return any error") + + status_err_count = err.count("Staging failed on") + self.assertEqual(status_err_count, peers_count, "Unexpected: No. of" + " nodes on which vol status cmd failed is not equal" + " to peers_count value") + g.log.info("Volume status command failed with expected error message") + + # Check peer status command and all peers are in 'Disconnected' state + peer_list = get_peer_status(self.servers[1]) + + for peer in peer_list: + self.assertEqual(int(peer["connected"]), 0, "Unexpected: All" + " the peers are not in 'Disconnected' state") + self.assertEqual(peer["stateStr"], "Peer in Cluster", "Unexpected:" + " All the peers not in 'Peer in Cluster' state") + + g.log.info("Peer status command listed all the peers in the" + "expected state") diff --git a/tests/functional/glusterd/test_gluster_does_not_do_posix_lock_when_multiple_client.py b/tests/functional/glusterd/test_gluster_does_not_do_posix_lock_when_multiple_client.py new file mode 100644 index 000000000..e6cf31e9e --- /dev/null +++ b/tests/functional/glusterd/test_gluster_does_not_do_posix_lock_when_multiple_client.py @@ -0,0 +1,91 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', + 'arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestFlock(GlusterBaseClass): + def setUp(self): + """ + setUp method for every test + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation failed: %s" % self.volname) + + def tearDown(self): + """ + TearDown for every test + """ + # Stopping the volume and Cleaning up the volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError( + "Failed Cleanup the Volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_gluster_does_not_do_posix_lock_when_multiple_client(self): + """ + Steps: + 1. Create all types of volumes. + 2. Mount the brick on two client mounts + 3. Prepare same script to do flock on the two nodes + while running this script it should not hang + 4. Wait till 300 iteration on both the node + """ + + # Shell Script to be run on mount point + script = """ + #!/bin/bash + flock_func(){ + file=/bricks/brick0/test.log + touch $file + ( + flock -xo 200 + echo "client1 do something" > $file + sleep 1 + ) 300>$file + } + i=1 + while [ "1" = "1" ] + do + flock_func + ((i=i+1)) + echo $i + if [[ $i == 300 ]]; then + break + fi + done + """ + mount_point = self.mounts[0].mountpoint + cmd = "echo '{}' >'{}'/test.sh; sh '{}'/test.sh ".format( + script, mount_point, mount_point) + ret = g.run_parallel(self.clients[:2], cmd) + + # Check if 300 is present in the output + for client_ip, _ in ret.items(): + self.assertTrue("300" in ret[client_ip][1].split("\n"), + "300 iteration is not completed") + self.assertFalse(ret[client_ip][0], "Failed to run the cmd ") diff --git a/tests/functional/glusterd/test_gluster_volume_status_xml_dump.py b/tests/functional/glusterd/test_gluster_volume_status_xml_dump.py new file mode 100644 index 000000000..eacc0b3c5 --- /dev/null +++ b/tests/functional/glusterd/test_gluster_volume_status_xml_dump.py @@ -0,0 +1,106 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test Default volume behavior and quorum options +""" +from time import sleep + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.volume_libs import cleanup_volume +from glustolibs.gluster.volume_ops import ( + volume_stop, get_volume_status, + volume_create, volume_start +) + + +@runs_on([['distributed-arbiter'], + ['glusterfs']]) +class GetVolumeStatusXmlDump(GlusterBaseClass): + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Fetching all the parameters for volume_create + list_of_three_servers = [] + server_info_for_three_nodes = {} + + for server in self.servers[0:3]: + list_of_three_servers.append(server) + server_info_for_three_nodes[server] = self.all_servers_info[ + server] + + bricks_list = form_bricks_list( + self.mnode, self.volname, 3, list_of_three_servers, + server_info_for_three_nodes) + # Creating 2nd volume + self.volname_2 = "test_volume" + ret, _, _ = volume_create(self.mnode, self.volname_2, + bricks_list) + self.assertFalse(ret, "Volume creation failed") + g.log.info("Volume %s created successfully", self.volname_2) + ret, _, _ = volume_start(self.mnode, self.volname_2) + if ret: + raise ExecutionError( + "Failed to start volume {}".format(self.volname_2)) + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume and mount it") + + def test_gluster_volume_status_xml_dump(self): + """ + Setps: + 1. stop one of the volume + (i.e) gluster volume stop <vol-name> + 2. Get the status of the volumes with --xml dump + XML dump should be consistent + """ + ret, _, _ = volume_stop(self.mnode, volname=self.volname_2, + force=True) + self.assertFalse(ret, + "Failed to stop volume '{}'".format( + self.volname_2)) + out = get_volume_status(self.mnode) + self.assertIsNotNone( + out, "Failed to get volume status on {}".format(self.mnode)) + for _ in range(4): + sleep(2) + out1 = get_volume_status(self.mnode) + self.assertIsNotNone( + out1, "Failed to get volume status on {}".format( + self.mnode)) + self.assertEqual(out1, out) + + def tearDown(self): + """tear Down Callback""" + ret = cleanup_volume(self.mnode, self.volname_2) + if not ret: + raise ExecutionError( + "Failed to remove volume '{}'".format(self.volname_2)) + # Unmount volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount and Cleanup volume") + g.log.info("Successful in unmount and cleanup operations") + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_default_volume_behavior_quorum_options.py b/tests/functional/glusterd/test_glusterd_default_volume_behavior_quorum_options.py new file mode 100644 index 000000000..b2652a4ea --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_default_volume_behavior_quorum_options.py @@ -0,0 +1,144 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test Default volume behavior and quorum options +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import ( + get_volume_options, + volume_reset) +from glustolibs.gluster.gluster_init import ( + stop_glusterd, + start_glusterd, + is_glusterd_running, + wait_for_glusterd_to_start) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brickmux_ops import get_brick_processes_count +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect + + +@runs_on([['replicated', 'arbiter', 'dispersed', 'distributed', + 'distributed-replicated', 'distributed-arbiter'], + ['glusterfs']]) +class TestGlusterDDefaultVolumeBehaviorQuorumOptions(GlusterBaseClass): + """ Testing default volume behavior and Quorum options """ + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume and mount it") + + def _validate_vol_options(self, option_name, option_value, for_all=False): + """ Function to validate default vol options """ + if not for_all: + ret = get_volume_options(self.mnode, self.volname, option_name) + else: + ret = get_volume_options(self.mnode, 'all', option_name) + self.assertIsNotNone(ret, "The %s option is not present" % option_name) + value = (ret[option_name]).split() + self.assertEqual(value[0], option_value, + ("Volume option for %s is not equal to %s" + % (option_name, option_value))) + g.log.info("Volume option %s is equal to the expected value %s", + option_name, option_value) + + def _get_total_brick_processes_count(self): + """ + Function to find the total number of brick processes in the cluster + """ + count = 0 + self.brick_list = get_all_bricks(self.mnode, self.volname) + for brick in self.brick_list: + server = brick.split(":")[0] + count += get_brick_processes_count(server) + return count + + def test_glusterd_default_vol_behavior_and_quorum_options(self): + """ + Test default volume behavior and quorum options + 1. Create a volume and start it. + 2. Check that no quorum options are found in vol info. + 3. Kill two glusterd processes. + 4. There shouldn't be any effect to the running glusterfsd + processes. + """ + # Check the default quorum options are correct. + self._validate_vol_options('cluster.server-quorum-type', 'off') + self._validate_vol_options('cluster.server-quorum-ratio', + '51', True) + + # Get the count of number of glusterfsd processes running. + count_before_glusterd_kill = self._get_total_brick_processes_count() + + # Kill two glusterd processes. + server_list = [self.servers[1], self.servers[2]] + ret = stop_glusterd(server_list) + self.assertTrue(ret, "Failed to stop glusterd on the specified nodes.") + ret = is_glusterd_running(server_list) + self.assertNotEqual(ret, 0, ("Glusterd is not stopped on the servers" + " where it was desired to be stopped.")) + g.log.info("Glusterd processes stopped in the desired servers.") + + # Get the count of number of glusterfsd processes running. + count_after_glusterd_kill = self._get_total_brick_processes_count() + + # The count of glusterfsd processes should match + self.assertEqual(count_before_glusterd_kill, count_after_glusterd_kill, + ("Glusterfsd processes are affected.")) + g.log.info("Glusterd processes are not affected.") + + # Start glusterd on all servers. + ret = start_glusterd(self.servers) + self.assertTrue(ret, "Failed to Start glusterd on the specified" + " nodes") + g.log.info("Started glusterd on all nodes.") + + # Wait for glusterd to restart. + ret = wait_for_glusterd_to_start(self.servers) + self.assertTrue(ret, "Glusterd not up on all nodes.") + g.log.info("Glusterd is up and running on all nodes.") + + def tearDown(self): + """tear Down Callback""" + # Wait for peers to connect. + ret = wait_for_peers_to_connect(self.mnode, self.servers, 50) + if not ret: + raise ExecutionError("Peers are not in connected state.") + + # Unmount volume and cleanup. + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to Unmount and Cleanup volume") + g.log.info("Successful in unmount and cleanup operations") + + # Reset the cluster options. + ret = volume_reset(self.mnode, "all") + if not ret: + raise ExecutionError("Failed to Reset the cluster options.") + g.log.info("Successfully reset cluster options.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_gluster_process_stop_start_cycle.py b/tests/functional/glusterd/test_glusterd_gluster_process_stop_start_cycle.py new file mode 100644 index 000000000..3eb3518d2 --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_gluster_process_stop_start_cycle.py @@ -0,0 +1,123 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Checking gluster processes stop and start cycle. +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_libs import ( + cleanup_volume, + wait_for_volume_process_to_be_online, + setup_volume) +from glustolibs.gluster.gluster_init import ( + start_glusterd, + wait_for_glusterd_to_start) +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect + + +@runs_on([['distributed', 'replicated', 'arbiter', 'dispersed', + 'distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed'], ['glusterfs']]) +class TestGlusterdStartStopCycle(GlusterBaseClass): + """ Testing Glusterd stop and start cycle """ + + def _wait_for_gluster_process_online_state(self): + """ + Function which waits for the glusterfs processes to come up + """ + # Wait for glusterd to be online and validate it's running. + self.assertTrue(wait_for_glusterd_to_start(self.servers), + "glusterd not up on the desired nodes.") + g.log.info("Glusterd is up and running on desired nodes.") + + # Wait for peers to connect + ret = wait_for_peers_to_connect(self.mnode, self.servers, 50) + self.assertTrue(ret, "Peers not in connected state.") + g.log.info("Peers in connected state.") + + # Wait for all volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, + self.volname, + timeout=600) + self.assertTrue(ret, ("All volume processes not up.")) + g.log.info("All volume processes are up.") + + def test_glusterd_start_stop_cycle(self): + """ + Test Glusterd stop-start cycle of gluster processes. + 1. Create a gluster volume. + 2. Kill all gluster related processes. + 3. Start glusterd service. + 4. Verify that all gluster processes are up. + 5. Repeat the above steps 5 times. + """ + # Create and start a volume + ret = setup_volume(self.mnode, self.all_servers_info, self.volume) + self.assertTrue(ret, "Failed to create and start volume") + + for _ in range(5): + killed_gluster_process_count = [] + # Kill gluster processes in all servers + for server in self.servers: + cmd = ('pkill --signal 9 -c -e "(glusterd|glusterfsd|glusterfs' + ')"|tail -1') + ret, out, err = g.run(server, cmd) + self.assertEqual(ret, 0, err) + killed_gluster_process_count.append(int(out)) + + # Start glusterd on all servers. + ret = start_glusterd(self.servers) + self.assertTrue(ret, ("Failed to restart glusterd on desired" + " nodes.")) + g.log.info("Glusterd started on desired nodes.") + + # Wait for gluster processes to come up. + self._wait_for_gluster_process_online_state() + + spawned_gluster_process_count = [] + # Get number of gluster processes spawned in all server + for server in self.servers: + cmd = ('pgrep -c "(glusterd|glusterfsd|glusterfs)"') + ret, out, err = g.run(server, cmd) + self.assertEqual(ret, 0, err) + spawned_gluster_process_count.append(int(out)) + + # Compare process count in each server. + for index, server in enumerate(self.servers): + self.assertEqual(killed_gluster_process_count[index], + spawned_gluster_process_count[index], + ("All processes not up and running on %s", + server)) + + def tearDown(self): + """ tear Down Callback """ + # Wait for peers to connect + ret = wait_for_peers_to_connect(self.mnode, self.servers, 50) + if not ret: + raise ExecutionError("Peers are not in connected state.") + + # Cleanup the volume + ret = cleanup_volume(self.mnode, self.volname) + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Successfully cleaned up the volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py new file mode 100644 index 000000000..92c48da6f --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_memory_consumption_increase.py @@ -0,0 +1,207 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Increase in glusterd memory consumption on repetetive operations + for 100 volumes +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_ops import (volume_stop, volume_delete, + get_volume_list, + volume_start) +from glustolibs.gluster.gluster_init import (restart_glusterd, + wait_for_glusterd_to_start) +from glustolibs.gluster.volume_libs import (bulk_volume_creation, + cleanup_volume) +from glustolibs.gluster.volume_ops import set_volume_options + + +class TestGlusterMemoryConsumptionIncrease(GlusterBaseClass): + def tearDown(self): + # Clean up all volumes + if self.volume_present: + vol_list = get_volume_list(self.mnode) + if vol_list is None: + raise ExecutionError("Failed to get the volume list") + + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Unable to delete volume %s" % volume) + g.log.info("Volume deleted successfully : %s", volume) + + # Disable multiplex + ret = set_volume_options(self.mnode, 'all', + {'cluster.brick-multiplex': 'disable'}) + self.assertTrue(ret, "Failed to enable brick-multiplex" + " for the cluster") + + # Calling baseclass tearDown method + self.get_super_method(self, 'tearDown')() + + def _volume_operations_in_loop(self): + """ Create, start, stop and delete 100 volumes in a loop """ + # Create and start 100 volumes in a loop + self.volume_config = { + 'name': 'volume-', + 'servers': self.servers, + 'voltype': {'type': 'distributed-replicated', + 'dist_count': 2, + 'replica_count': 3}, + } + + ret = bulk_volume_creation(self.mnode, 100, self.all_servers_info, + self.volume_config, "", False, True) + self.assertTrue(ret, "Failed to create volumes") + + self.volume_present = True + + g.log.info("Successfully created all the volumes") + + # Start 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start volume: %s" + % self.volname) + + g.log.info("Successfully started all the volumes") + + # Stop 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret, _, _ = volume_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop volume: %s" + % self.volname) + + g.log.info("Successfully stopped all the volumes") + + # Delete 100 volumes in loop + for i in range(100): + self.volname = "volume-%d" % i + ret = volume_delete(self.mnode, self.volname) + self.assertTrue(ret, "Failed to delete volume: %s" + % self.volname) + + self.volume_present = False + + g.log.info("Successfully deleted all the volumes") + + def _memory_consumption_for_all_nodes(self, pid_list): + """Fetch the memory consumption by glusterd process for + all the nodes + """ + memory_consumed_list = [] + for i, server in enumerate(self.servers): + # Get the memory consumption of glusterd in each node + cmd = "top -b -n 1 -p %d | awk 'FNR==8 {print $6}'" % pid_list[i] + ret, mem, _ = g.run(server, cmd) + self.assertEqual(ret, 0, "Failed to get the memory usage of" + " glusterd process") + mem = int(mem)//1024 + memory_consumed_list.append(mem) + + return memory_consumed_list + + def test_glusterd_memory_consumption_increase(self): + """ + Test Case: + 1) Enable brick-multiplex and set max-bricks-per-process to 3 in + the cluster + 2) Get the glusterd memory consumption + 3) Perform create,start,stop,delete operation for 100 volumes + 4) Check glusterd memory consumption, it should not increase by + more than 50MB + 5) Repeat steps 3-4 for two more time + 6) Check glusterd memory consumption it should not increase by + more than 10MB + """ + # pylint: disable=too-many-locals + # Restarting glusterd to refresh its memory consumption + ret = restart_glusterd(self.servers) + self.assertTrue(ret, "Restarting glusterd failed") + + # check if glusterd is running post reboot + ret = wait_for_glusterd_to_start(self.servers) + self.assertTrue(ret, "Glusterd service is not running post reboot") + + # Enable brick-multiplex, set max-bricks-per-process to 3 in cluster + for key, value in (('cluster.brick-multiplex', 'enable'), + ('cluster.max-bricks-per-process', '3')): + ret = set_volume_options(self.mnode, 'all', {key: value}) + self.assertTrue(ret, "Failed to set {} to {} " + " for the cluster".format(key, value)) + + # Get the pidof of glusterd process + pid_list = [] + for server in self.servers: + # Get the pidof of glusterd process + cmd = "pidof glusterd" + ret, pid, _ = g.run(server, cmd) + self.assertEqual(ret, 0, "Failed to get the pid of glusterd") + pid = int(pid) + pid_list.append(pid) + + # Fetch the list of memory consumed in all the nodes + mem_consumed_list = self._memory_consumption_for_all_nodes(pid_list) + + # Perform volume operations for 100 volumes for first time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 1 iteration + mem_consumed_list_1 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_1): + condition_met = False + if mem - mem_consumed_list[i] <= 50: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") + + # Perform volume operations for 100 volumes for second time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 2 iterations + mem_consumed_list_2 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_2): + condition_met = False + if mem - mem_consumed_list_1[i] <= 10: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") + + # Perform volume operations for 100 volumes for third time + self._volume_operations_in_loop() + + # Fetch the list of memory consumed in all the nodes after 3 iterations + mem_consumed_list_3 = self._memory_consumption_for_all_nodes(pid_list) + + for i, mem in enumerate(mem_consumed_list_3): + condition_met = False + if mem - mem_consumed_list_2[i] <= 10: + condition_met = True + + self.assertTrue(condition_met, "Unexpected: Memory consumption" + " glusterd increased more than the expected" + " of value") diff --git a/tests/functional/glusterd/test_glusterd_quorum_command.py b/tests/functional/glusterd/test_glusterd_quorum_command.py new file mode 100644 index 000000000..034d626b3 --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_quorum_command.py @@ -0,0 +1,104 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test quorum cli commands in glusterd +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import ( + set_volume_options, + volume_reset, + get_volume_options) + + +@runs_on([['replicated', 'arbiter', 'dispersed', 'distributed', + 'distributed-replicated', 'distributed-arbiter'], + ['glusterfs']]) +class TestGlusterDQuorumCLICommands(GlusterBaseClass): + """ Testing Quorum CLI commands in GlusterD """ + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume and mount it") + + def set_and_check_vol_option(self, option_name, option_value, + for_all=False): + """ Function for setting and checking volume_options """ + # Set the volume option + vol_option = {option_name: option_value} + if not for_all: + ret = set_volume_options(self.mnode, self.volname, vol_option) + else: + ret = set_volume_options(self.mnode, 'all', vol_option) + self.assertTrue(ret, "gluster volume option set of %s to %s failed" + % (option_name, option_value)) + + # Validate the option set + if not for_all: + ret = get_volume_options(self.mnode, self.volname, option_name) + else: + ret = get_volume_options(self.mnode, 'all', option_name) + self.assertIsNotNone(ret, "The %s option is not present" % option_name) + self.assertEqual(ret[option_name], option_value, + ("Volume option for %s is not equal to %s" + % (option_name, option_value))) + g.log.info("Volume option %s is equal to the expected value %s", + option_name, option_value) + + def test_glusterd_quorum_cli_commands(self): + """ + Test quorum CLI commands on glusterd + 1. Create a volume and start it. + 2. Set the quorum type to 'server' and verify it. + 3. Set the quorum type to 'none' and verify it. + 4. Set the quorum ratio and verify it. + """ + # Set server quorum type to 'server' and validate it + self.set_and_check_vol_option('cluster.server-quorum-type', 'server') + + # Set server quorum type to 'none' and validate it + self.set_and_check_vol_option('cluster.server-quorum-type', 'none') + + # Set server quorum ratio to 90% and validate it + self.set_and_check_vol_option('cluster.server-quorum-ratio', '90%', + True) + + def tearDown(self): + """tear Down Callback""" + # Unmount volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + g.log.info("Successful in unmount and cleanup of volume") + + # Reset the cluster options. + ret = volume_reset(self.mnode, "all") + if not ret: + raise ExecutionError("Failed to Reset the cluster options.") + g.log.info("Successfully reset cluster options.") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_restart_quorum_not_met.py b/tests/functional/glusterd/test_glusterd_restart_quorum_not_met.py new file mode 100644 index 000000000..6716f70f8 --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_restart_quorum_not_met.py @@ -0,0 +1,125 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test brick status when quorum isn't met after glusterd restart. +""" + + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.volume_libs import ( + wait_for_volume_process_to_be_online) +from glustolibs.gluster.gluster_init import ( + stop_glusterd, + start_glusterd, + restart_glusterd, + wait_for_glusterd_to_start) +from glustolibs.gluster.brick_libs import ( + are_bricks_offline, + get_all_bricks) + + +@runs_on([['distributed', 'distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'replicated', 'dispersed', 'arbiter'], + ['glusterfs']]) +class TestBrickStatusQuorumNotMet(GlusterBaseClass): + def setUp(self): + """ + setUp method for every test. + """ + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup and mount the volume. + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume and mount it.") + + def test_offline_brick_status_when_quorum_not_met(self): + """ + Test Brick status when Quorum is not met after glusterd restart. + 1. Create a volume and mount it. + 2. Set the quorum type to 'server'. + 3. Bring some nodes down such that quorum won't be met. + 4. Brick status should be offline in the node which is up. + 5. Restart glusterd in this node. + 6. The brick status still should be offline as quorum isn't met. + """ + # Set the quorum type to server and validate it. + vol_option = {'cluster.server-quorum-type': 'server'} + ret = set_volume_options(self.mnode, self.volname, vol_option) + self.assertTrue(ret, "gluster volume option set of %s to %s failed" + % ('cluster.server-quorum-type', 'server')) + g.log.info("Cluster quorum set to type server.") + + # Get the brick list. + brick_list = get_all_bricks(self.mnode, self.volname) + + # Stop glusterd processes. + ret = stop_glusterd(self.servers[1:]) + self.assertTrue(ret, "Failed to stop glusterd on specified nodes.") + g.log.info("Glusterd processes stopped in the desired servers.") + + # Get the brick status in a node where glusterd is up. + ret = are_bricks_offline(self.mnode, self.volname, brick_list[0:1]) + self.assertTrue(ret, "Bricks are online") + g.log.info("Bricks are offline as expected.") + + # Restart one of the node which is up. + ret = restart_glusterd(self.servers[0]) + self.assertTrue(ret, ("Failed to restart glusterd on desired node.")) + g.log.info("Glusterd restarted on the desired node.") + + # Wait for glusterd to be online and validate it's running. + self.assertTrue(wait_for_glusterd_to_start(self.servers[0]), + "Glusterd not up on the desired server.") + g.log.info("Glusterd is up in the desired server.") + + # Get the brick status from the restarted node. + ret = are_bricks_offline(self.mnode, self.volname, brick_list[0:1]) + self.assertTrue(ret, "Bricks are online") + g.log.info("Bricks are offline as expected.") + + # Start glusterd on all servers. + ret = start_glusterd(self.servers) + self.assertTrue(ret, "Failed to start glusterd on the specified nodes") + g.log.info("Initiated start of glusterd on all nodes.") + + # Wait for glusterd to start. + ret = wait_for_glusterd_to_start(self.servers) + self.assertTrue(ret, "Glusterd not up on all nodes.") + g.log.info("Glusterd is up and running on all nodes.") + + # Wait for all volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname, + timeout=600) + self.assertTrue(ret, ("All volume processes not up.")) + g.log.info("All volume processes are up.") + + def tearDown(self): + """tear Down callback""" + # unmount volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + g.log.info("Successful in unmount and cleanup operations") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_glusterd_selinux.py b/tests/functional/glusterd/test_glusterd_selinux.py new file mode 100644 index 000000000..1790780bc --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_selinux.py @@ -0,0 +1,75 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Description: + Test Cases in this module tests Gluster against SELinux Labels and Policies +""" + +import pytest +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.glusterfile import file_exists + + +class TestGlusterAgainstSELinux(GlusterBaseClass): + """Glusterd checks against SELinux Labels and Policies + """ + + @staticmethod + def run_cmd(host, cmd, opts='', operate_on=''): + if opts: + opts = '-'+opts + command = "{} {} {}".format(cmd, opts, operate_on) + rcode, rout, rerr = g.run(host, command) + if not rcode: + return True, rout + + g.log.error("On '%s', '%s' returned '%s'", host, command, rerr) + return False, rout + + @pytest.mark.test_selinux_label + def test_selinux_label(self): + """ + TestCase: + 1. Check the existence of '/usr/lib/firewalld/services/glusterfs.xml' + 2. Validate the owner of this file as 'glusterfs-server' + 3. Validate SELinux label context as 'system_u:object_r:lib_t:s0' + """ + + fqpath = '/usr/lib/firewalld/services/glusterfs.xml' + + for server in self.all_servers_info: + # Check existence of xml file + self.assertTrue(file_exists(server, fqpath), "Failed to verify " + "existence of '{}' in {} ".format(fqpath, server)) + g.log.info("Validated the existence of required xml file") + + # Check owner of xml file + status, result = self.run_cmd(server, 'rpm', 'qf', fqpath) + self.assertTrue(status, "Fail: Not able to find owner for {} on " + "{}".format(fqpath, server)) + exp_str = 'glusterfs-server' + self.assertIn(exp_str, result, "Fail: Owner of {} should be " + "{} on {}".format(fqpath, exp_str, server)) + + # Validate SELinux label + status, result = self.run_cmd(server, 'ls', 'lZ', fqpath) + self.assertTrue(status, "Fail: Not able to find SELinux label " + "for {} on {}".format(fqpath, server)) + exp_str = 'system_u:object_r:lib_t:s0' + self.assertIn(exp_str, result, "Fail: SELinux label on {}" + "should be {} on {}".format(fqpath, exp_str, server)) diff --git a/tests/functional/glusterd/test_glusterd_set_reset_reserve_limit.py b/tests/functional/glusterd/test_glusterd_set_reset_reserve_limit.py new file mode 100644 index 000000000..c3104f198 --- /dev/null +++ b/tests/functional/glusterd/test_glusterd_set_reset_reserve_limit.py @@ -0,0 +1,95 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test set and reset of storage reserve limit in glusterd +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import ( + set_volume_options, + reset_volume_option, + get_volume_options) + + +@runs_on([['distributed', 'distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'replicated', 'arbiter', 'dispersed'], + ['glusterfs']]) +class TestGlusterDSetResetReserveLimit(GlusterBaseClass): + """ Testing set and reset of Reserve limit in GlusterD """ + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume and mount it") + + def validate_vol_option(self, option_name, value_expected): + """ Function for validating volume options """ + # Get the volume option. + ret = get_volume_options(self.mnode, self.volname, option_name) + self.assertIsNotNone(ret, "The %s option is not present" % option_name) + self.assertEqual(ret[option_name], value_expected, + ("Volume option for %s is not equal to %s" + % (option_name, value_expected))) + g.log.info("Volume option %s is equal to the expected value %s", + option_name, value_expected) + + def test_glusterd_set_reset_reserve_limit(self): + """ + Test set and reset of reserve limit on glusterd + 1. Create a volume and start it. + 2. Set storage.reserve limit on the created volume and verify it. + 3. Reset storage.reserve limit on the created volume and verify it. + """ + # Setting storage.reserve to 50 + ret = set_volume_options(self.mnode, self.volname, + {'storage.reserve': '50'}) + self.assertTrue(ret, "Failed to set storage reserve on %s" + % self.mnode) + + # Validate storage.reserve option set to 50 + self.validate_vol_option('storage.reserve', '50') + + # Reseting the storage.reserve limit + ret, _, _ = reset_volume_option(self.mnode, self.volname, + 'storage.reserve') + self.assertEqual(ret, 0, "Failed to reset the storage.reserve limit") + + # Validate that the storage.reserve option is reset + ret = get_volume_options(self.mnode, self.volname, 'storage.reserve') + if ret['storage.reserve'] == '1': + self.validate_vol_option('storage.reserve', '1') + else: + self.validate_vol_option('storage.reserve', '1 (DEFAULT)') + + def tearDown(self): + """tear Down Callback""" + # Unmount volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + g.log.info("Successful in unmount and cleanup of volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_mountpoint_ownership_post_volume_restart.py b/tests/functional/glusterd/test_mountpoint_ownership_post_volume_restart.py new file mode 100644 index 000000000..5a01d860f --- /dev/null +++ b/tests/functional/glusterd/test_mountpoint_ownership_post_volume_restart.py @@ -0,0 +1,109 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Test mount point ownership persistence post volume restart. +""" + +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterfile import ( + get_file_stat, + set_file_permissions) +from glustolibs.gluster.volume_ops import ( + volume_stop, + volume_start) +from glustolibs.gluster.volume_libs import wait_for_volume_process_to_be_online + + +@runs_on([['arbiter', 'distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'distributed-arbiter'], + ['glusterfs']]) +class TestMountPointOwnershipPostVolumeRestart(GlusterBaseClass): + """ Test mount point ownership persistence post volume restart """ + + def setUp(self): + """Setup Volume""" + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup and mount the volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume and mount it") + self.client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + def validate_mount_permissions(self): + """ + Verify the mount permissions + """ + stat_mountpoint_dict = get_file_stat(self.client, + self.mounts[0].mountpoint) + self.assertEqual(stat_mountpoint_dict['access'], '777', "Expected 777 " + " but found %s" % stat_mountpoint_dict['access']) + g.log.info("Mountpoint permissions is 777, as expected.") + + def test_mountpoint_ownsership_post_volume_restart(self): + """ + Test mountpoint ownership post volume restart + 1. Create a volume and mount it on client. + 2. set ownsership permissions and validate it. + 3. Restart volume. + 4. Ownership permissions should persist. + """ + # Set full permissions on the mountpoint. + ret = set_file_permissions(self.clients[0], self.mountpoint, + "-R 777") + self.assertTrue(ret, "Failed to set permissions on the mountpoint") + g.log.info("Set full permissions on the mountpoint.") + + # Validate the permissions set. + self.validate_mount_permissions() + + # Stop the volume. + ret = volume_stop(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to stop volume %s" % self.volname)) + g.log.info("Successful in stopping volume.") + + # Start the volume. + ret = volume_start(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to start volume %s" % self.volname)) + g.log.info("Successful in starting volume.") + + # Wait for all volume processes to be up and running. + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("All volume processes are not up")) + g.log.info("All volume processes are up and running.") + + # Adding sleep for the mount to be recognized by client. + sleep(3) + + # validate the mountpoint permissions. + self.validate_mount_permissions() + + def tearDown(self): + """tearDown callback""" + # Unmount volume and cleanup. + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to unmount and cleanup volume") + g.log.info("Successful in unmount and cleanup of volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_peer_probe_firewall_ports_not_opened.py b/tests/functional/glusterd/test_peer_probe_firewall_ports_not_opened.py new file mode 100644 index 000000000..8c0920c9e --- /dev/null +++ b/tests/functional/glusterd/test_peer_probe_firewall_ports_not_opened.py @@ -0,0 +1,140 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.peer_ops import (peer_probe, peer_detach, + peer_probe_servers, + nodes_from_pool_list) +from glustolibs.gluster.lib_utils import is_core_file_created +from glustolibs.gluster.exceptions import ExecutionError + + +class TestPeerProbeWithFirewallNotOpened(GlusterBaseClass): + + def setUp(self): + # Performing peer detach + for server in self.servers[1:]: + ret, _, _ = peer_detach(self.mnode, server) + if ret: + raise ExecutionError("Peer detach failed") + g.log.info("Peer detach SUCCESSFUL.") + self.get_super_method(self, 'setUp')() + self.node_to_probe = choice(self.servers[1:]) + + def tearDown(self): + # Add the removed services in firewall + for service in ('glusterfs', 'rpc-bind'): + for option in ("", " --permanent"): + cmd = ("firewall-cmd --zone=public --add-service={}{}" + .format(service, option)) + ret, _, _ = g.run(self.node_to_probe, cmd) + if ret: + raise ExecutionError("Failed to add firewall service %s " + "on %s" % (service, + self.node_to_probe)) + + # Detach servers from cluster + pool = nodes_from_pool_list(self.mnode) + self.assertIsNotNone(pool, "Failed to get pool list") + for node in pool: + if not peer_detach(self.mnode, node): + raise ExecutionError("Failed to detach %s from %s" + % (node, self.mnode)) + # Create a cluster + if not peer_probe_servers(self.mnode, self.servers): + raise ExecutionError("Failed to probe peer " + "servers %s" % self.servers) + g.log.info("Peer probe success for detached " + "servers %s", self.servers) + + self.get_super_method(self, 'tearDown')() + + def _remove_firewall_service(self): + """ Remove glusterfs and rpc-bind services from firewall""" + for service in ['glusterfs', 'rpc-bind']: + for option in ("", " --permanent"): + cmd = ("firewall-cmd --zone=public --remove-service={}{}" + .format(service, option)) + ret, _, _ = g.run(self.node_to_probe, cmd) + self.assertEqual(ret, 0, ("Failed to bring down service {} on" + " node {}" + .format(service, + self.node_to_probe))) + g.log.info("Successfully removed glusterfs and rpc-bind services") + + def _get_test_specific_glusterd_log(self, node): + """Gets the test specific glusterd log""" + # Extract the test specific cmds from cmd_hostory + start_msg = "Starting Test : %s : %s" % (self.id(), + self.glustotest_run_id) + end_msg = "Ending Test: %s : %s" % (self.id(), + self.glustotest_run_id) + glusterd_log = "/var/log/glusterfs/glusterd.log" + cmd = ("awk '/{}/ {{p=1}}; p; /{}/ {{p=0}}' {}" + .format(start_msg, end_msg, glusterd_log)) + ret, test_specific_glusterd_log, err = g.run(node, cmd) + self.assertEqual(ret, 0, "Failed to extract glusterd log specific" + " to the current test case. " + "Error : %s" % err) + return test_specific_glusterd_log + + def test_verify_peer_probe_with_firewall_ports_not_opened(self): + """ + Test Steps: + 1. Open glusterd port only in Node1 using firewall-cmd command + 2. Perform peer probe to Node2 from Node 1 + 3. Verify glusterd.log for Errors + 4. Check for core files created + """ + + ret, test_timestamp, _ = g.run_local('date +%s') + test_timestamp = test_timestamp.strip() + + # Remove firewall service on the node to probe to + self._remove_firewall_service() + + # Try peer probe from mnode to node + ret, _, err = peer_probe(self.mnode, self.node_to_probe) + self.assertEqual(ret, 1, ("Unexpected behavior: Peer probe should" + " fail when the firewall services are " + "down but returned success")) + + expected_err = ('peer probe: failed: Probe returned with ' + 'Transport endpoint is not connected\n') + self.assertEqual(err, expected_err, + "Expected error {}, but returned {}" + .format(expected_err, err)) + msg = ("Peer probe of {} from {} failed as expected " + .format(self.mnode, self.node_to_probe)) + g.log.info(msg) + + # Verify there are no glusterd crashes + status = True + glusterd_logs = (self._get_test_specific_glusterd_log(self.mnode) + .split("\n")) + for line in glusterd_logs: + if ' E ' in line: + status = False + g.log.info("Error found: ' %s '", line) + + self.assertTrue(status, "Glusterd crash found") + + # Verify no core files are created + ret = is_core_file_created(self.servers, test_timestamp) + self.assertTrue(ret, "Unexpected crash found.") + g.log.info("No core file found as expected") diff --git a/tests/functional/glusterd/test_probe_glusterd_down.py b/tests/functional/glusterd/test_probe_glusterd_down.py index 3705904a9..c851bf104 100644 --- a/tests/functional/glusterd/test_probe_glusterd_down.py +++ b/tests/functional/glusterd/test_probe_glusterd_down.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2020-2021 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,17 +14,14 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -from time import sleep - from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.peer_ops import peer_probe from glustolibs.gluster.lib_utils import is_core_file_created from glustolibs.gluster.peer_ops import peer_detach, is_peer_connected -from glustolibs.gluster.gluster_init import (stop_glusterd, start_glusterd, - wait_for_glusterd_to_start) -from glustolibs.misc.misc_libs import are_nodes_online +from glustolibs.gluster.gluster_init import stop_glusterd, start_glusterd +from glustolibs.misc.misc_libs import bring_down_network_interface class PeerProbeWhenGlusterdDown(GlusterBaseClass): @@ -57,7 +54,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): ret, test_timestamp, _ = g.run_local('date +%s') test_timestamp = test_timestamp.strip() - # detach one of the nodes which is part of the cluster + # Detach one of the nodes which is part of the cluster g.log.info("detaching server %s ", self.servers[1]) ret, _, err = peer_detach(self.mnode, self.servers[1]) msg = 'peer detach: failed: %s is not part of cluster\n' \ @@ -66,12 +63,12 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, msg, "Failed to detach %s " % (self.servers[1])) - # bring down glusterd of the server which has been detached + # Bring down glusterd of the server which has been detached g.log.info("Stopping glusterd on %s ", self.servers[1]) ret = stop_glusterd(self.servers[1]) self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1]) - # trying to peer probe the node whose glusterd was stopped using its IP + # Trying to peer probe the node whose glusterd was stopped using IP g.log.info("Peer probing %s when glusterd down ", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertNotEqual(ret, 0, "Peer probe should not pass when " @@ -79,7 +76,7 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, "peer probe: failed: Probe returned with " "Transport endpoint is not connected\n") - # trying to peer probe the same node with hostname + # Trying to peer probe the same node with hostname g.log.info("Peer probing node %s using hostname with glusterd down ", self.servers[1]) hostname = g.run(self.servers[1], "hostname") @@ -89,27 +86,24 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): self.assertEqual(err, "peer probe: failed: Probe returned with" " Transport endpoint is not connected\n") - # start glusterd again for the next set of test steps + # Start glusterd again for the next set of test steps g.log.info("starting glusterd on %s ", self.servers[1]) ret = start_glusterd(self.servers[1]) self.assertTrue(ret, "glusterd couldn't start successfully on %s" % self.servers[1]) - # reboot a server and then trying to peer probe at the time of reboot - g.log.info("Rebooting %s and checking peer probe", self.servers[1]) - reboot = g.run_async(self.servers[1], "reboot") - - # Mandatory sleep for 3 seconds to make sure node is in halted state - sleep(3) + # Bring down the network for sometime + network_status = bring_down_network_interface(self.servers[1], 150) # Peer probing the node using IP when it is still not online - g.log.info("Peer probing node %s which has been issued a reboot ", + g.log.info("Peer probing node %s when network is down", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to" " fail") - self.assertEqual(err, "peer probe: failed: Probe returned with " - "Transport endpoint is not connected\n") + self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe " + "returned with Transport endpoint" + " is not connected") # Peer probing the node using hostname when it is still not online g.log.info("Peer probing node %s using hostname which is still " @@ -118,35 +112,21 @@ class PeerProbeWhenGlusterdDown(GlusterBaseClass): ret, _, err = peer_probe(self.mnode, hostname[1].strip()) self.assertNotEqual(ret, 0, "Peer probe should not pass when node " "has not come online") - self.assertEqual(err, "peer probe: failed: Probe returned with " - "Transport endpoint is not connected\n") + self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe " + "returned with Transport endpoint" + " is not connected") + + ret, _, _ = network_status.async_communicate() + if ret != 0: + g.log.error("Failed to perform network interface ops") - ret, _, _ = reboot.async_communicate() - self.assertEqual(ret, 255, "reboot failed") - - # Validate if rebooted node is online or not - count = 0 - while count < 40: - sleep(15) - ret, _ = are_nodes_online(self.servers[1]) - if ret: - g.log.info("Node %s is online", self.servers[1]) - break - count += 1 - self.assertTrue(ret, "Node in test not yet online") - - # check if glusterd is running post reboot - ret = wait_for_glusterd_to_start(self.servers[1], - glusterd_start_wait_timeout=120) - self.assertTrue(ret, "Glusterd service is not running post reboot") - - # peer probe the node must pass + # Peer probe the node must pass g.log.info("peer probing node %s", self.servers[1]) ret, _, err = peer_probe(self.mnode, self.servers[1]) self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with " "%s " % err) - # checking if core file created in "/", "/tmp" and "/var/log/core" + # Checking if core file created in "/", "/tmp" and "/var/log/core" ret = is_core_file_created(self.servers, test_timestamp) self.assertTrue(ret, "core file found") diff --git a/tests/functional/glusterd/test_profile_info_without_having_profile_started.py b/tests/functional/glusterd/test_profile_info_without_having_profile_started.py new file mode 100644 index 000000000..e2403a93b --- /dev/null +++ b/tests/functional/glusterd/test_profile_info_without_having_profile_started.py @@ -0,0 +1,188 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Description: + Tests to check profile info without starting profile +""" + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.profile_ops import (profile_start, profile_info) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.lib_utils import is_core_file_created +from glustolibs.gluster.gluster_init import is_glusterd_running +from glustolibs.gluster.volume_ops import get_volume_list +from glustolibs.gluster.volume_libs import (cleanup_volume, setup_volume) + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestProfileInfoWithoutHavingProfileStarted(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + # Uploading file_dir script in all client direcotries + g.log.info("Upload io scripts to clients %s for running IO on " + "mounts", cls.clients) + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + self.get_super_method(self, 'setUp')() + # Creating Volume and mounting volume. + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + g.log.info("Volme created and mounted successfully : %s", + self.volname) + + def tearDown(self): + # Unmounting and cleaning volume. + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + g.log.info("Volume deleted successfully : %s", self.volname) + + # clean up all volumes + vol_list = get_volume_list(self.mnode) + if not vol_list: + raise ExecutionError("Failed to get the volume list") + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Unable to delete volume % s" % volume) + g.log.info("Volume deleted successfully : %s", volume) + + self.get_super_method(self, 'tearDown')() + + def test_profile_operations(self): + """ + Test Case: + 1) Create a volume and start it. + 2) Mount volume on client and start IO. + 3) Start profile on the volume. + 4) Run profile info and see if all bricks are present or not. + 6) Create another volume. + 7) Run profile info without starting the profile. + 8) Run profile info with all possible options without starting + the profile. + """ + # Timestamp of current test case of start time + ret, test_timestamp, _ = g.run_local('date +%s') + test_timestamp = test_timestamp.strip() + + # Start IO on mount points. + g.log.info("Starting IO on all mounts...") + self.all_mounts_procs = [] + counter = 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dir-depth 4 " + "--dir-length 6 " + "--dirname-start-num %d " + "--max-num-of-dirs 3 " + "--num-of-files 5 %s" % ( + self.script_upload_path, + counter, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + counter += 1 + + # Start profile on volume. + ret, _, _ = profile_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start profile on volume: %s" + % self.volname) + g.log.info("Successfully started profile on volume: %s", + self.volname) + + # Getting and checking output of profile info. + ret, out, _ = profile_info(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to run profile info on volume: %s" + % self.volname) + g.log.info("Successfully executed profile info on volume: %s", + self.volname) + + # Checking if all bricks are present in profile info. + brick_list = get_all_bricks(self.mnode, self.volname) + for brick in brick_list: + self.assertTrue(brick in out, + "Brick %s not a part of profile info output." + % brick) + g.log.info("Brick %s showing in profile info output.", + brick) + + # Validate IO + self.assertTrue( + validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on some of the clients" + ) + g.log.info("IO validation complete.") + + # Create and start a volume + self.volume['name'] = "volume_2" + self.volname = "volume_2" + ret = setup_volume(self.mnode, self.all_servers_info, self.volume) + self.assertTrue(ret, "Failed to create and start volume") + g.log.info("Successfully created and started volume_2") + + # Check profile info on volume without starting profile + ret, _, _ = profile_info(self.mnode, self.volname) + self.assertNotEqual(ret, 0, "Unexpected:Successfully ran profile info" + " on volume: %s" % self.volname) + g.log.info("Expected: Failed to run pofile info on volume: %s", + self.volname) + + # Running profile info with different profile options. + profile_options = ('peek', 'incremental', 'clear', + 'incremental peek', 'cumulative') + for option in profile_options: + # Getting and checking output of profile info. + ret, _, _ = profile_info(self.mnode, self.volname, + options=option) + self.assertNotEqual(ret, 0, + "Unexpected: Successfully ran profile info" + " %s on volume: %s" % (option, self.volname)) + g.log.info("Expected: Failed to execute profile info %s on" + " volume: %s", option, self.volname) + + # Chekcing for core files. + ret = is_core_file_created(self.servers, test_timestamp) + self.assertTrue(ret, "glusterd service should not crash") + g.log.info("No core file found, glusterd service running " + "successfully") + + # Checking whether glusterd is running or not + ret = is_glusterd_running(self.servers) + self.assertEqual(ret, 0, "Glusterd has crashed on nodes.") + g.log.info("No glusterd crashes observed.") diff --git a/tests/functional/glusterd/test_profile_simultaneously_on_different_nodes.py b/tests/functional/glusterd/test_profile_simultaneously_on_different_nodes.py new file mode 100644 index 000000000..33d74daf7 --- /dev/null +++ b/tests/functional/glusterd/test_profile_simultaneously_on_different_nodes.py @@ -0,0 +1,185 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Description: + Tests to test profile simultaneously on different nodes. +""" + +from glusto.core import Glusto as g + +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.profile_ops import profile_start +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.lib_utils import is_core_file_created +from glustolibs.gluster.gluster_init import is_glusterd_running +from glustolibs.gluster.volume_ops import get_volume_list +from glustolibs.gluster.volume_libs import (cleanup_volume, setup_volume) + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestProfileSimultaneouslyOnDifferentNodes(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + # Uploading file_dir script in all client direcotries + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + self.get_super_method(self, 'setUp')() + # Creating Volume and mounting volume. + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + g.log.info("Volume created and mounted successfully : %s", + self.volname) + + def tearDown(self): + # Unmounting and cleaning volume. + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + g.log.info("Volume deleted successfully : %s", self.volname) + + # clean up all volumes + vol_list = get_volume_list(self.mnode) + if not vol_list: + raise ExecutionError("Failed to get the volume list") + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Unable to delete volume % s" % volume) + g.log.info("Volume deleted successfully : %s", volume) + + self.get_super_method(self, 'tearDown')() + + def test_profile_simultaneously_on_different_nodes(self): + """ + Test Case: + 1) Create a volume and start it. + 2) Mount volume on client and start IO. + 3) Start profile on the volume. + 4) Create another volume. + 5) Start profile on the volume. + 6) Run volume status in a loop in one of the node. + 7) Run profile info for the new volume on one of the other node + 8) Run profile info for the new volume in loop for 100 times on + the other node + """ + # Timestamp of current test case of start time + ret, test_timestamp, _ = g.run_local('date +%s') + test_timestamp = test_timestamp.strip() + + # Start IO on mount points. + self.all_mounts_procs = [] + counter = 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dir-depth 4 " + "--dir-length 6 " + "--dirname-start-num %d " + "--max-num-of-dirs 3 " + "--num-of-files 5 %s" % ( + self.script_upload_path, + counter, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + counter += 1 + + # Start profile on volume. + ret, _, _ = profile_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start profile on volume: %s" + % self.volname) + g.log.info("Successfully started profile on volume: %s", + self.volname) + + # Validate IO + self.assertTrue( + validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on some of the clients" + ) + g.log.info("IO validation complete.") + + # Create and start a volume + self.volume['name'] = "volume_2" + self.volname = "volume_2" + ret = setup_volume(self.mnode, self.all_servers_info, self.volume) + self.assertTrue(ret, "Failed to create and start volume") + g.log.info("Successfully created and started volume_2") + + # Start profile on volume. + ret, _, _ = profile_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start profile on volume: %s" + % self.volname) + g.log.info("Successfully started profile on volume: %s", + self.volname) + + # Run volume status on one of the node in loop + cmd = "for i in `seq 1 100`;do gluster v status;done" + proc1 = g.run_async(self.servers[1], cmd) + + # Check profile on one of the other node + cmd = "gluster v profile %s info" % self.volname + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to run profile info on volume: %s" + " on node %s" % (self.volname, self.mnode)) + g.log.info("Successfully run pofile info on volume: %s on node %s", + self.volname, self.mnode) + + # Run volume profile info on one of the other node in loop + cmd = """for i in `seq 1 100`;do gluster v profile %s info; + done""" % self.volname + proc2 = g.run_async(self.servers[3], cmd) + + ret1, _, _ = proc1.async_communicate() + ret2, _, _ = proc2.async_communicate() + + self.assertEqual(ret1, 0, "Failed to run volume status in a loop" + " on node %s" % self.servers[1]) + g.log.info("Successfully running volume status in a loop on node" + " %s", self.servers[1]) + + self.assertEqual(ret2, 0, "Failed to run profile info in a loop" + " on node %s" % self.servers[3]) + g.log.info("Successfully running volume status in a loop on node" + " %s", self.servers[3]) + + # Chekcing for core files. + ret = is_core_file_created(self.servers, test_timestamp) + self.assertTrue(ret, "glusterd service should not crash") + g.log.info("No core file found, glusterd service running " + "successfully") + + # Checking whether glusterd is running or not + ret = is_glusterd_running(self.servers) + self.assertEqual(ret, 0, "Glusterd has crashed on nodes.") + g.log.info("No glusterd crashes observed.") diff --git a/tests/functional/glusterd/test_rebalance_hang.py b/tests/functional/glusterd/test_rebalance_hang.py index 7911eb277..90b31d222 100644 --- a/tests/functional/glusterd/test_rebalance_hang.py +++ b/tests/functional/glusterd/test_rebalance_hang.py @@ -18,7 +18,8 @@ from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on from glustolibs.gluster.volume_ops import (volume_create, volume_start, get_volume_list, get_volume_status) -from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.brick_libs import ( + get_all_bricks, wait_for_bricks_to_be_online) from glustolibs.gluster.volume_libs import (cleanup_volume) from glustolibs.gluster.peer_ops import (peer_probe, peer_detach, peer_probe_servers, @@ -61,6 +62,11 @@ class TestRebalanceHang(GlusterBaseClass): vol_list = get_volume_list(self.mnode) if vol_list is not None: for volume in vol_list: + # check all bricks are online + ret = wait_for_bricks_to_be_online(self.mnode, volume) + if not ret: + raise ExecutionError("Failed to bring bricks online" + "for volume %s" % volume) ret = cleanup_volume(self.mnode, volume) if not ret: raise ExecutionError("Failed to cleanup volume") diff --git a/tests/functional/glusterd/test_rebalance_start_not_failed_with_socket_path_too_long.py b/tests/functional/glusterd/test_rebalance_start_not_failed_with_socket_path_too_long.py new file mode 100644 index 000000000..87cab40d0 --- /dev/null +++ b/tests/functional/glusterd/test_rebalance_start_not_failed_with_socket_path_too_long.py @@ -0,0 +1,173 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Description: + Test Rebalance should start successfully if name of volume more than 108 + chars +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.brick_ops import add_brick +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.mount_ops import umount_volume, mount_volume +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, + wait_for_rebalance_to_complete +) +from glustolibs.gluster.volume_libs import ( + volume_start, + cleanup_volume +) +from glustolibs.gluster.volume_ops import volume_create, get_volume_list +from glustolibs.io.utils import run_linux_untar + + +class TestLookupDir(GlusterBaseClass): + def tearDown(self): + cmd = ("sed -i '/transport.socket.bind-address/d'" + " /etc/glusterfs/glusterd.vol") + ret, _, _ = g.run(self.mnode, cmd) + if ret: + raise ExecutionError("Failed to remove entry from 'glusterd.vol'") + for mount_dir in self.mount: + ret = umount_volume(self.clients[0], mount_dir) + if not ret: + raise ExecutionError("Failed to cleanup Volume") + + vol_list = get_volume_list(self.mnode) + if vol_list is not None: + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Volume deleted successfully : %s", volume) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_rebalance_start_not_fail(self): + """ + 1. On Node N1, Add "transport.socket.bind-address N1" in the + /etc/glusterfs/glusterd.vol + 2. Create a replicate (1X3) and disperse (4+2) volumes with + name more than 108 chars + 3. Mount the both volumes using node 1 where you added the + "transport.socket.bind-address" and start IO(like untar) + 4. Perform add-brick on replicate volume 3-bricks + 5. Start rebalance on replicated volume + 6. Perform add-brick for disperse volume 6 bricks + 7. Start rebalance of disperse volume + """ + cmd = ("sed -i 's/end-volume/option " + "transport.socket.bind-address {}\\n&/g' " + "/etc/glusterfs/glusterd.vol".format(self.mnode)) + disperse = ("disperse_e4upxjmtre7dl4797wedbp7r3jr8equzvmcae9f55t6z1" + "ffhrlk40jtnrzgo4n48fjf6b138cttozw3c6of3ze71n9urnjkshoi") + replicate = ("replicate_e4upxjmtre7dl4797wedbp7r3jr8equzvmcae9f55t6z1" + "ffhrlk40tnrzgo4n48fjf6b138cttozw3c6of3ze71n9urnjskahn") + + volnames = (disperse, replicate) + for volume, vol_name in ( + ("disperse", disperse), ("replicate", replicate)): + + bricks_list = form_bricks_list(self.mnode, volume, + 6 if volume == "disperse" else 3, + self.servers, + self.all_servers_info) + if volume == "replicate": + ret, _, _ = volume_create(self.mnode, replicate, + bricks_list, + replica_count=3) + + else: + ret, _, _ = volume_create( + self.mnode, disperse, bricks_list, force=True, + disperse_count=6, redundancy_count=2) + + self.assertFalse( + ret, + "Unexpected: Volume create '{}' failed ".format(vol_name)) + ret, _, _ = volume_start(self.mnode, vol_name) + self.assertFalse(ret, "Failed to start volume") + + # Add entry in 'glusterd.vol' + ret, _, _ = g.run(self.mnode, cmd) + self.assertFalse( + ret, "Failed to add entry in 'glusterd.vol' file") + + self.list_of_io_processes = [] + + # mount volume + self.mount = ("/mnt/replicated_mount", "/mnt/disperse_mount") + for mount_dir, volname in zip(self.mount, volnames): + ret, _, _ = mount_volume( + volname, "glusterfs", mount_dir, self.mnode, + self.clients[0]) + self.assertFalse( + ret, "Failed to mount the volume '{}'".format(mount_dir)) + + # Run IO + # Create a dir to start untar + # for mount_point in self.mount: + self.linux_untar_dir = "{}/{}".format(mount_dir, "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[:1], mount_dir, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Add Brick to replicate Volume + bricks_list = form_bricks_list( + self.mnode, replicate, 3, + self.servers, self.all_servers_info, "replicate") + ret, _, _ = add_brick( + self.mnode, replicate, bricks_list, force=True) + self.assertFalse(ret, "Failed to add-brick '{}'".format(replicate)) + + # Trigger Rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, replicate) + self.assertFalse( + ret, "Failed to start rebalance on the volume '{}'".format( + replicate)) + + # Add Brick to disperse Volume + bricks_list = form_bricks_list( + self.mnode, disperse, 6, + self.servers, self.all_servers_info, "disperse") + + ret, _, _ = add_brick( + self.mnode, disperse, bricks_list, force=True) + self.assertFalse(ret, "Failed to add-brick '{}'".format(disperse)) + + # Trigger Rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, disperse) + self.assertFalse( + ret, + "Failed to start rebalance on the volume {}".format(disperse)) + + # Check if Rebalance is completed on both the volume + for volume in (replicate, disperse): + ret = wait_for_rebalance_to_complete( + self.mnode, volume, timeout=600) + self.assertTrue( + ret, "Rebalance is not Compleated on Volume '{}'".format( + volume)) diff --git a/tests/functional/glusterd/test_reserve_limt_change_while_rebalance.py b/tests/functional/glusterd/test_reserve_limt_change_while_rebalance.py new file mode 100644 index 000000000..2a7aacdac --- /dev/null +++ b/tests/functional/glusterd/test_reserve_limt_change_while_rebalance.py @@ -0,0 +1,127 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, + rebalance_stop, + wait_for_rebalance_to_complete +) +from glustolibs.gluster.volume_libs import expand_volume +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.io.utils import run_linux_untar + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestReserveLimitChangeWhileRebalance(GlusterBaseClass): + + def _set_vol_option(self, option): + """Method for setting volume option""" + ret = set_volume_options( + self.mnode, self.volname, option) + self.assertTrue(ret) + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Set I/O flag to false + cls.is_io_running = False + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + def tearDown(self): + if not wait_for_rebalance_to_complete( + self.mnode, self.volname, timeout=300): + raise ExecutionError( + "Failed to complete rebalance on volume '{}'".format( + self.volname)) + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_reserve_limt_change_while_rebalance(self): + """ + 1) Create a distributed-replicated volume and start it. + 2) Enable storage.reserve option on the volume using below command, + gluster volume set storage.reserve 50 + 3) Mount the volume on a client + 4) Add some data on the mount point (should be within reserve limits) + 5) Now, add-brick and trigger rebalance. + While rebalance is in-progress change the reserve limit to a lower + value say (30) + 6. Stop the rebalance + 7. Reset the storage reserve value to 50 as in step 2 + 8. trigger rebalance + 9. while rebalance in-progress change the reserve limit to a higher + value say (70) + """ + + # Setting storage.reserve 50 + self._set_vol_option({"storage.reserve": "50"}) + + self.list_of_io_processes = [] + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[0].mountpoint, + "linuxuntar") + ret = mkdir(self.clients[0], self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.clients[0], self.mounts[0].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Add bricks to the volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick with rsync on volume %s" + % self.volname) + + # Trigger rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Setting storage.reserve 30 + self._set_vol_option({"storage.reserve": "30"}) + + # Stopping Rebalance + ret, _, _ = rebalance_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to stop rebalance on the volume %s" + % self.volname) + + # Setting storage.reserve 500 + self._set_vol_option({"storage.reserve": "500"}) + + # Trigger rebalance on the volume + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Setting storage.reserve 70 + self._set_vol_option({"storage.reserve": "70"}) diff --git a/tests/functional/glusterd/test_reserved_port_range_for_gluster.py b/tests/functional/glusterd/test_reserved_port_range_for_gluster.py new file mode 100644 index 000000000..b03c74884 --- /dev/null +++ b/tests/functional/glusterd/test_reserved_port_range_for_gluster.py @@ -0,0 +1,152 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Setting reserved port range for gluster +""" + +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.volume_ops import (volume_create, volume_start, + get_volume_list) +from glustolibs.gluster.volume_libs import cleanup_volume +from glustolibs.gluster.lib_utils import get_servers_bricks_dict +from glustolibs.gluster.gluster_init import restart_glusterd +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect + + +class TestReservedPortRangeForGluster(GlusterBaseClass): + def tearDown(self): + # Reset port range if some test fails + if self.port_range_changed: + cmd = "sed -i 's/49200/60999/' /etc/glusterfs/glusterd.vol" + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to set the max-port back to" + " 60999 in glusterd.vol file") + + # clean up all volumes + vol_list = get_volume_list(self.mnode) + if vol_list is None: + raise ExecutionError("Failed to get the volume list") + + for volume in vol_list: + ret = cleanup_volume(self.mnode, volume) + if not ret: + raise ExecutionError("Unable to delete volume %s" % volume) + g.log.info("Volume deleted successfully : %s", volume) + + # Calling baseclass tearDown method + self.get_super_method(self, 'tearDown')() + + def test_reserved_port_range_for_gluster(self): + """ + Test Case: + 1) Set the max-port option in glusterd.vol file to 49200 + 2) Restart glusterd on one of the node + 3) Create 50 volumes in a loop + 4) Try to start the 50 volumes in a loop + 5) Confirm that the 50th volume failed to start + 6) Confirm the error message, due to which volume failed to start + 7) Set the max-port option in glusterd.vol file back to default value + 8) Restart glusterd on the same node + 9) Starting the 50th volume should succeed now + """ + # Set max port number as 49200 in glusterd.vol file + cmd = "sed -i 's/60999/49200/' /etc/glusterfs/glusterd.vol" + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to set the max-port to 49200 in" + " glusterd.vol file") + + self.port_range_changed = True + + # Restart glusterd + ret = restart_glusterd(self.mnode) + self.assertTrue(ret, "Failed to restart glusterd") + g.log.info("Successfully restarted glusterd on node: %s", self.mnode) + + # Check node on which glusterd was restarted is back to 'Connected' + # state from any other peer + ret = wait_for_peers_to_connect(self.servers[1], self.servers) + self.assertTrue(ret, "All the peers are not in connected state") + + # Fetch the available bricks dict + bricks_dict = get_servers_bricks_dict(self.servers, + self.all_servers_info) + self.assertIsNotNone(bricks_dict, "Failed to get the bricks dict") + + # Create 50 volumes in a loop + for i in range(1, 51): + self.volname = "volume-%d" % i + bricks_list = [] + j = 0 + for key, value in bricks_dict.items(): + j += 1 + brick = choice(value) + brick = "{}:{}/{}_brick-{}".format(key, brick, + self.volname, j) + bricks_list.append(brick) + + ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) + self.assertEqual(ret, 0, "Failed to create volume: %s" + % self.volname) + g.log.info("Successfully created volume: %s", self.volname) + + # Try to start 50 volumes in loop + for i in range(1, 51): + self.volname = "volume-%d" % i + ret, _, err = volume_start(self.mnode, self.volname) + if ret: + break + g.log.info("Successfully started all the volumes until volume: %s", + self.volname) + + # Confirm if the 50th volume failed to start + self.assertEqual(i, 50, "Failed to start the volumes volume-1 to" + " volume-49 in a loop") + + # Confirm the error message on volume start fail + err_msg = ("volume start: volume-50: failed: Commit failed on" + " localhost. Please check log file for details.") + self.assertEqual(err.strip(), err_msg, "Volume start failed with" + " a different error message") + + # Confirm the error message from the log file + cmd = ("cat /var/log/glusterfs/glusterd.log | %s" + % "grep -i 'All the ports in the range are exhausted' | wc -l") + ret, out, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to 'grep' the glusterd.log file") + self.assertNotEqual(out, "0", "Volume start didn't fail with expected" + " error message") + + # Set max port number back to default value in glusterd.vol file + cmd = "sed -i 's/49200/60999/' /etc/glusterfs/glusterd.vol" + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to set the max-port back to 60999 in" + " glusterd.vol file") + + self.port_range_changed = False + + # Restart glusterd on the same node + ret = restart_glusterd(self.mnode) + self.assertTrue(ret, "Failed to restart glusterd") + g.log.info("Successfully restarted glusterd on node: %s", self.mnode) + + # Starting the 50th volume should succeed now + self.volname = "volume-%d" % i + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname) diff --git a/tests/functional/glusterd/test_shared_storage.py b/tests/functional/glusterd/test_shared_storage.py index c2fcd00bc..63e996fc6 100644 --- a/tests/functional/glusterd/test_shared_storage.py +++ b/tests/functional/glusterd/test_shared_storage.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,32 +20,32 @@ disabling shared storage """ +from random import choice from time import sleep from glusto.core import Glusto as g +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on -from glustolibs.gluster.volume_ops import (volume_create, - volume_delete, get_volume_list) -from glustolibs.gluster.volume_libs import cleanup_volume from glustolibs.gluster.lib_utils import form_bricks_list from glustolibs.gluster.shared_storage_ops import (enable_shared_storage, is_shared_volume_mounted, disable_shared_storage, check_gluster_shared_volume) -from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import (volume_create, + volume_delete, get_volume_list) +from glustolibs.gluster.volume_libs import cleanup_volume +from glustolibs.misc.misc_libs import reboot_nodes_and_wait_to_come_online -@runs_on([['distributed'], ['glusterfs']]) +@runs_on([['distributed'], ['glusterfs', 'nfs']]) class SharedStorage(GlusterBaseClass): def setUp(self): # calling GlusterBaseClass setUp self.get_super_method(self, 'setUp')() # Creating Volume - g.log.info("Started creating volume") - ret = self.setup_volume() - if not ret: + if not self.setup_volume(): raise ExecutionError("Volume creation failed") - g.log.info("Volume created successfully : %s", self.volname) def tearDown(self): # Stopping and cleaning up the volume @@ -54,31 +54,15 @@ class SharedStorage(GlusterBaseClass): raise ExecutionError("Failed to get volume list") for volume in vol_list: - ret = cleanup_volume(self.mnode, volume) - if not ret: + if not cleanup_volume(self.mnode, volume): raise ExecutionError("Failed Cleanup the Volume") - g.log.info("Volume deleted successfully : %s", volume) # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() - def test_shared_storage(self): - """This test case includes: - -> Enable a shared storage - -> Disable a shared storage - -> Create volume of any type with - name gluster_shared_storage - -> Disable the shared storage - -> Check, volume created in step-3 is - not deleted - -> Delete the volume - -> Enable the shared storage - -> Check volume with name gluster_shared_storage - is created - -> Disable the shared storage - """ - # pylint: disable=too-many-statements, too-many-branches - # Enable a shared storage without specifying the domain + def _enable_and_check_shared_storage(self): + """Enable and check shared storage is present""" + ret = enable_shared_storage(self.mnode) self.assertTrue(ret, ("Failed to enable a shared storage")) g.log.info("Successfully enabled: enable-shared-storage option") @@ -90,13 +74,9 @@ class SharedStorage(GlusterBaseClass): g.log.info("gluster_shared_storage volume created" " successfully") - # Check the shared volume got mounted - ret = is_shared_volume_mounted(self.mnode) - self.assertTrue(ret, ("Shared volume not mounted even" - " after enabling it")) - g.log.info("Shared volume mounted successfully") + def _disable_and_check_shared_storage(self): + """Disable a shared storage without specifying the domain and check""" - # Disable a shared storage without specifying the domain ret = disable_shared_storage(self.mnode) self.assertTrue(ret, ("Failed to disable a shared storage")) g.log.info("Successfully disabled: disable-shared-storage") @@ -108,17 +88,52 @@ class SharedStorage(GlusterBaseClass): g.log.info("gluster_shared_storage volume deleted" " successfully") - # Check the shared volume unmounted - ret = is_shared_volume_mounted(self.mnode) - self.assertFalse(ret, ("Shared volume not unmounted even" - " after disabling it")) - g.log.info("Shared volume unmounted successfully") + def _is_shared_storage_mounted_on_the_nodes(self, brick_details, mounted): + """ + Checks if the shared storage is mounted on the nodes where it is + created. + """ + for node in brick_details: + ret = is_shared_volume_mounted(node.split(":")[0]) + if mounted: + self.assertTrue(ret, ("Shared volume not mounted even after" + " enabling it")) + g.log.info("Shared volume mounted successfully") + else: + self.assertFalse(ret, ("Shared volume not unmounted even" + " after disabling it")) + g.log.info("Shared volume unmounted successfully") + + def _get_all_bricks(self): + """Get all bricks where the shared storage is mounted""" + + brick_list = get_all_bricks(self.mnode, "gluster_shared_storage") + self.assertIsNotNone(brick_list, "Unable to fetch brick list of shared" + " storage") + return brick_list + + def _shared_storage_test_without_node_reboot(self): + """Shared storge testcase till the node reboot scenario""" + + # Enable shared storage and check it is present on the cluster + self._enable_and_check_shared_storage() + + # Get all the bricks where shared storage is mounted + brick_list = self._get_all_bricks() + + # Check the shared volume is mounted on the nodes where it is created + self._is_shared_storage_mounted_on_the_nodes(brick_details=brick_list, + mounted=True) + # Disable shared storage and check it is not present on the cluster + self._disable_and_check_shared_storage() + + # Check the shared volume is unmounted on the nodes where it is created + self._is_shared_storage_mounted_on_the_nodes(brick_details=brick_list, + mounted=False) # Create a volume with name gluster_shared_storage - g.log.info("creation of volume should succeed") volume = "gluster_shared_storage" - bricks_list = form_bricks_list(self.mnode, volume, - 2, self.servers, + bricks_list = form_bricks_list(self.mnode, volume, 2, self.servers, self.all_servers_info) count = 0 while count < 20: @@ -155,38 +170,78 @@ class SharedStorage(GlusterBaseClass): "%s", volume)) g.log.info("Volume deleted successfully : %s", volume) - # Enable the shared storage - ret = enable_shared_storage(self.mnode) - self.assertTrue(ret, ("Failed to enable a shared storage")) - g.log.info("Successfully enabled: enable-shared-storage option") + # Enable shared storage and check it is present on the cluster + self._enable_and_check_shared_storage() - # Check volume list to confirm gluster_shared_storage is created - ret = check_gluster_shared_volume(self.mnode) - self.assertTrue(ret, ("gluster_shared_storage volume not" - " created even after enabling it")) - g.log.info("gluster_shared_storage volume created" - " successfully") + # Check the shared volume is mounted on the nodes where it is created + self._is_shared_storage_mounted_on_the_nodes(brick_details=brick_list, + mounted=True) - # Check the shared volume got mounted - ret = is_shared_volume_mounted(self.mnode) - self.assertTrue(ret, ("Shared volume not mounted even" - " after enabling it")) - g.log.info("Shared volume mounted successfully") + # Disable shared storage and check it is not present on the cluster + self._disable_and_check_shared_storage() - # Disable a shared storage - ret = disable_shared_storage(self.mnode) - self.assertTrue(ret, ("Failed to disable a shared storage")) - g.log.info("Successfully disabled: disable-shared-storage") + # Check the shared volume is unmounted on the nodes where it is created + self._is_shared_storage_mounted_on_the_nodes(brick_details=brick_list, + mounted=False) - # Check volume list to confirm gluster_shared_storage is deleted - ret = check_gluster_shared_volume(self.mnode, present=False) - self.assertTrue(ret, ("gluster_shared_storage volume not" - " deleted even after disabling it")) - g.log.info("gluster_shared_storage volume deleted" - " successfully") + def test_shared_storage(self): + """ + This test case includes: + -> Enable a shared storage + -> Disable a shared storage + -> Create volume of any type with + name gluster_shared_storage + -> Disable the shared storage + -> Check, volume created in step-3 is + not deleted + -> Delete the volume + -> Enable the shared storage + -> Check volume with name gluster_shared_storage + is created + -> Disable the shared storage + -> Enable shared storage and validate whether it is mounted + -> Perform node reboot + -> Post reboot validate the bricks are mounted back or not + """ + # pylint: disable=too-many-statements, too-many-branches + self._shared_storage_test_without_node_reboot() + + # Enable shared storage and check it is present on the cluster + self._enable_and_check_shared_storage() + + # Get all the bricks where shared storage is mounted + brick_list = self._get_all_bricks() + + # Check the shared volume is mounted on the nodes where it is created + self._is_shared_storage_mounted_on_the_nodes(brick_details=brick_list, + mounted=True) + + # Perform node reboot on any of the nodes where the shared storage is + # mounted + node_to_reboot = choice(brick_list) + node_to_reboot = node_to_reboot.split(":")[0] + ret = reboot_nodes_and_wait_to_come_online(node_to_reboot) + self.assertTrue(ret, "Reboot Failed on node: " + "{}".format(node_to_reboot)) + g.log.info("Node: %s rebooted successfully", node_to_reboot) + + # Post reboot checking peers are connected + count = 0 + while count < 10: + ret = self.validate_peers_are_connected() + if ret: + break + sleep(3) + count += 1 + self.assertTrue(ret, "Peers are not in connected state.") + + # Check the shared volume is mounted on the nodes where it is created + self._is_shared_storage_mounted_on_the_nodes(brick_details=brick_list, + mounted=True) + + # Disable shared storage and check it is not present on the cluster + self._disable_and_check_shared_storage() - # Check the shared volume unmounted - ret = is_shared_volume_mounted(self.mnode) - self.assertFalse(ret, ("Shared volume not unmounted even" - " after disabling it")) - g.log.info("Shared volume unmounted successfully") + # Check the shared volume is unmounted on the nodes where it is created + self._is_shared_storage_mounted_on_the_nodes(brick_details=brick_list, + mounted=False) diff --git a/tests/functional/glusterd/test_updates_in_options_file_on_quorum_changes.py b/tests/functional/glusterd/test_updates_in_options_file_on_quorum_changes.py new file mode 100644 index 000000000..98a3ba53f --- /dev/null +++ b/tests/functional/glusterd/test_updates_in_options_file_on_quorum_changes.py @@ -0,0 +1,94 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Description: + Tests to check the 'options' file is updated with quorum changes +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', + 'arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestUpdatesInOptionsFileOnQuorumChanges(GlusterBaseClass): + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setting up Volume + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation/start failed: %s" + % self.volname) + g.log.info("Volme createdand started successfully : %s", + self.volname) + + def tearDown(self): + # stopping the volume and Cleaning up the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed Cleanup the Volume %s" % self.volname) + g.log.info("Volume deleted successfully : %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_updates_in_options_file_on_quorum_changes(self): + """ + Test Case: + 1. Create and start a volume + 2. Check the output of '/var/lib/glusterd/options' file + 3. Store the value of 'global-option-version' + 4. Set server-quorum-ratio to 70% + 5. Check the output of '/var/lib/glusterd/options' file + 6. Compare the value of 'global-option-version' and check + if the value of 'server-quorum-ratio' is set to 70% + """ + # Checking 'options' file for quorum related entries + cmd = "cat /var/lib/glusterd/options | grep global-option-version" + ret, out, _ = g.run(self.mnode, cmd) + previous_global_option_version = out.split('=') + + # Setting Quorum ratio in percentage + self.quorum_perecent = {'cluster.server-quorum-ratio': '70%'} + ret = set_volume_options(self.mnode, 'all', self.quorum_perecent) + self.assertTrue(ret, "Failed to set cluster.server-quorum-ratio" + " option on volumes") + g.log.info("Successfully set cluster.server-quorum-ratio on cluster") + + # Checking 'options' file for quorum related entries + cmd = "cat /var/lib/glusterd/options | grep global-option-version" + ret, out, _ = g.run(self.mnode, cmd) + new_global_option_version = out.split('=') + self.assertEqual(int(previous_global_option_version[1]) + 1, + int(new_global_option_version[1]), + "Failed:The global-option-version didn't change on a" + " volume set operation") + g.log.info("The global-option-version was successfully updated in the" + " options file") + + cmd = "cat /var/lib/glusterd/options | grep server-quorum-ratio" + ret, out, _ = g.run(self.mnode, cmd) + out = out.split("%") + self.assertEqual(out[0], "cluster.server-quorum-ratio=70", + "Server-quorum-ratio is not updated in options file") + g.log.info("The cluster.server-quorum-ratio was successfully set" + " to 70 in the options file") diff --git a/tests/functional/glusterd/test_validate_auth_allow_and_auth_reject.py b/tests/functional/glusterd/test_validate_auth_allow_and_auth_reject.py new file mode 100644 index 000000000..f80b4357b --- /dev/null +++ b/tests/functional/glusterd/test_validate_auth_allow_and_auth_reject.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Description: + Tests to validate auth.allow and auth.reject on a volume +""" + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import (set_volume_options, + volume_reset) +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.mount_ops import (mount_volume, umount_volume, + is_mounted) + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestValidateAuthAllowAndAuthReject(GlusterBaseClass): + + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" + % self.volname) + g.log.info("Volume created successfully : %s", self.volname) + + def tearDown(self): + # Cleanup the volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup the volume %s" + % self.volname) + g.log.info("Volume deleted successfully: %s", self.volname) + + self.get_super_method(self, 'tearDown')() + + def _set_option_and_mount_and_unmount_volumes(self, option="", + is_allowed=True): + """ + Setting volume option and then mounting and unmounting the volume + """ + # Check if an option is passed + if option: + # Setting the option passed as an argument + ret = set_volume_options(self.mnode, self.volname, + {option: self.mounts[0].client_system}) + self.assertTrue(ret, "Failed to set %s option in volume: %s" + % (option, self.volname)) + g.log.info("Successfully set %s option in volume: %s", option, + self.volname) + + # Mounting a volume + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mounts[0].mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + + # Checking if volume was successfully mounted or not + ret = is_mounted(self.volname, mtype=self.mount_type, + mpoint=self.mounts[0].mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + if is_allowed: + self.assertTrue(ret, "Failed to mount the volume: %s" + % self.volname) + else: + self.assertFalse(ret, "Unexpected: Mounting" + " the volume %s was successful" % self.volname) + + # Unmount only if the volume is supposed to be mounted + if is_allowed: + ret, _, _ = umount_volume(self.mounts[0].client_system, + self.mounts[0].mountpoint, + mtype=self.mount_type) + self.assertEqual(ret, 0, "Failed to unmount the volume: %s" + % self.volname) + + def _reset_the_volume(self): + """ + Resetting the volume + """ + ret = volume_reset(self.mnode, self.volname) + self.assertTrue(ret, "Failed to reset volume: %s" % self.volname) + g.log.info("Reseting volume %s was successful", self.volname) + + def _check_validate_test(self): + """ + Checking volume mounting and unmounting with auth.allow + and auth.reject option set for it + """ + # Setting auth.allow option and then mounting and unmounting volume + self._set_option_and_mount_and_unmount_volumes("auth.allow") + g.log.info("Successfully performed the set, mounting and unmounting" + " operation as expected on volume: %s", self.volname) + + # Reseting the volume options + self._reset_the_volume() + + # Setting auth.reject option and then checking mounting of volume + self._set_option_and_mount_and_unmount_volumes("auth.reject", False) + g.log.info("Successfully performed the set and mounting operation" + "as expected on volume: %s", self.volname) + + # Reseting the volume options + self._reset_the_volume() + + # Check mounting and unmounting of volume without setting any options + self._set_option_and_mount_and_unmount_volumes() + g.log.info("Successfully mounted and unmounted the volume: %s", + self.volname) + + def test_validate_auth_allow_and_auth_reject(self): + """ + Test Case: + 1. Create and start a volume + 2. Disable brick mutliplex + 2. Set auth.allow option on volume for the client address on which + volume is to be mounted + 3. Mount the volume on client and then unmmount it. + 4. Reset the volume + 5. Set auth.reject option on volume for the client address on which + volume is to be mounted + 6. Mounting the volume should fail + 7. Reset the volume and mount it on client. + 8. Repeat the steps 2-7 with brick multiplex enabled + """ + # Setting cluster.brick-multiplex to disable + ret = set_volume_options(self.mnode, 'all', + {'cluster.brick-multiplex': 'disable'}) + self.assertTrue(ret, "Failed to set brick-multiplex to enable.") + g.log.info("Successfully set brick-multiplex to disable.") + + # Checking auth options with brick multiplex disabled + self._check_validate_test() + + # Setting cluster.brick-multiplex to enable + ret = set_volume_options(self.mnode, 'all', + {'cluster.brick-multiplex': 'enable'}) + self.assertTrue(ret, "Failed to set brick-multiplex to enable.") + g.log.info("Successfully set brick-multiplex to enable.") + + # Checking auth options with brick multiplex enabled + self._check_validate_test() diff --git a/tests/functional/glusterd/test_validate_glusterd_info.py b/tests/functional/glusterd/test_validate_glusterd_info.py new file mode 100644 index 000000000..e888d5c03 --- /dev/null +++ b/tests/functional/glusterd/test_validate_glusterd_info.py @@ -0,0 +1,96 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.peer_ops import get_peer_status + + +class TestGlusterdInfo(GlusterBaseClass): + + def test_validate_glusterd_info(self): + """ + Steps: + 1. Check for the presence of /var/lib/glusterd/glusterd.info file + 2. Get the UUID of the current NODE + 3. check the value of the uuid returned by executing the command - + "gluster system:: uuid get " + 4. Check the uuid value shown by other node in the cluster + for the same node "gluster peer status" + on one node will give the UUID of the other node + """ + uuid_list = [] + for server in self.servers: + + # Getting UUID from glusterd.info + g.log.info("Getting the UUID from glusterd.info") + ret, glusterd_volinfo, _ = g.run( + server, "grep -i uuid /var/lib/glusterd/glusterd.info") + uuid_list.append(glusterd_volinfo) + glusterd_volinfo = (glusterd_volinfo.split("="))[1] + self.assertFalse( + ret, "Failed to run '{}' on '{}' ".format(server, server)) + self.assertIsNotNone( + glusterd_volinfo, "UUID not found in 'glusterd.info' file ") + + # Getting UUID from cmd 'gluster system uuid get' + ret, get_uuid, _ = g.run( + server, "gluster system uuid get | awk {'print $2'}") + self.assertFalse(ret, "Unable to get the UUID ") + self.assertIsNotNone(get_uuid, "UUID not found") + + # Checking if both the uuid are same + self.assertEquals( + glusterd_volinfo, get_uuid, + "UUID does not match in host {}".format(server)) + + # Geting the UUID from cmd "gluster peer status" + for node in self.servers: + for i in get_peer_status(node): + uuid_list.append(i["uuid"]) + if server != node: + self.assertTrue( + get_uuid.replace("\n", "") in uuid_list, + "uuid not matched in {}".format(node)) + + def test_glusterd_config_file_check(self): + """ + Steps: + 1. Check the location of glusterd socket file ( glusterd.socket ) + ls /var/run/ | grep -i glusterd.socket + 2. systemctl is-enabled glusterd -> enabled + + """ + + cmd = "ls /var/run/ | grep -i glusterd.socket" + ret, out, _ = g.run(self.mnode, cmd) + + # Checking glusterd.socket file + self.assertFalse( + ret, "Failed to get glusterd.socket file on '{}'".format( + self.mnode)) + self.assertEqual( + out.replace("\n", ""), "glusterd.socket", + "Failed to get expected output") + + # Checking for glusterd.service is enabled by default + ret, out, _ = g.run( + self.mnode, "systemctl is-enabled glusterd.service") + self.assertFalse( + ret, "Failed to execute the cmd on {}".format(self.mnode)) + self.assertEqual( + out.replace("\n", ""), "enabled", + "Output of systemctl is-enabled glusterd.service is not enabled") diff --git a/tests/functional/glusterd/test_validate_peer_probe_ip_fqdn_hostname.py b/tests/functional/glusterd/test_validate_peer_probe_ip_fqdn_hostname.py new file mode 100755 index 000000000..7c8fe3612 --- /dev/null +++ b/tests/functional/glusterd/test_validate_peer_probe_ip_fqdn_hostname.py @@ -0,0 +1,146 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from socket import gethostbyname, getfqdn +from random import choice +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass +from glustolibs.gluster.peer_ops import (peer_probe, peer_detach, + peer_probe_servers, + peer_detach_servers, + nodes_from_pool_list) +from glustolibs.gluster.exceptions import ExecutionError + + +# pylint: disable=unsubscriptable-object +class TestPeerProbeScenarios(GlusterBaseClass): + + def setUp(self): + self.get_super_method(self, 'setUp')() + + # Performing peer detach + if not peer_detach_servers(self.mnode, self.servers): + raise ExecutionError("Failed to detach servers %s" + % self.servers) + g.log.info("Peer detach SUCCESSFUL.") + self.peers_in_pool = [] + self.by_type = "" + self.node = None + + def tearDown(self): + """Detach servers from cluster""" + pool = nodes_from_pool_list(self.mnode) + self.assertIsNotNone(pool, "Failed to get pool list") + for node in pool: + if not peer_detach(self.mnode, node): + raise ExecutionError("Failed to detach %s from %s" + % (node, self.mnode)) + # Create a cluster + if not peer_probe_servers(self.mnode, self.servers): + raise ExecutionError("Failed to probe peer " + "servers %s" % self.servers) + g.log.info("Peer probe success for detached " + "servers %s", self.servers) + + self.get_super_method(self, 'tearDown')() + + def _get_node_identifiers(self): + """ Returns node address dict with ip, fqdn, hostname as keys """ + node = {} + node['ip'] = gethostbyname(self.node) + node['fqdn'] = getfqdn(self.node) + node['hostname'] = g.run(self.node, "hostname")[1].strip() + return node + + def _perform_peer_probe(self, peer): + """ Perfroms peer probe to a given node """ + ret, _, err = peer_probe(self.mnode, peer) + self.assertEqual(ret, 0, "Failed to peer probe %s from %s. Error : %s" + % (peer, self.mnode, err)) + + def _get_new_nodes_to_peer_probe(self): + """ Selects a node randomly from the existing set of nodes """ + self.node = None + while self.node is None: + self.node = (gethostbyname(choice(self.servers[1:])) + if gethostbyname(choice(self.servers)) not in + self.peers_in_pool else None) + self.peers_in_pool.append(self.node) + + return self._get_node_identifiers() + + def _verify_pool_list(self, node): + """ Verifies given nodes are there in the gluster pool list""" + pool_list = nodes_from_pool_list(self.mnode) + status = next((n for n in pool_list if n in node.values()), None) + self.assertIsNotNone(status, ("Node %s is not the pool list :" + " %s" % + (node[self.by_type], pool_list))) + g.log.info("The given node is there in the gluster pool list") + + def _verify_cmd_history(self, node): + """Verifies cmd_history for successful entry of peer probe of nodes""" + + # Extract the test specific cmds from cmd_hostory + start_msg = "Starting Test : %s : %s" % (self.id(), + self.glustotest_run_id) + end_msg = "Ending Test: %s : %s" % (self.id(), self.glustotest_run_id) + cmd_history_log = "/var/log/glusterfs/cmd_history.log" + cmd = "awk '/{}/ {{p=1}}; p; /{}/ {{p=0}}' {}".format(start_msg, + end_msg, + cmd_history_log) + ret, test_specific_cmd_history, err = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to extract cmd_history specific to " + "the current test case. Error : %s" % err) + # Verify the cmd is found from the extracted cmd log + peer_probe_cmd = "peer probe {} : SUCCESS".format(node) + self.assertNotEqual(test_specific_cmd_history.count(peer_probe_cmd), + 0, "Peer probe success entry not found" + " in cmd history") + g.log.info("The command history contains a successful entry " + "of peer probe to %s ", node) + + def test_validate_peer_probe(self): + """ + 1. Add one of the node(HOST1-IP) to the other node(HOST2-IP) and + form the cluster + # gluster peer probe <HOST-IP> + 2. Check the return value of the 'peer probe' command + 3. Confirm that the cluster is formed successfully by 'peer status' + command + # gluster peer status + 4. Execute 'pool list' command to get the status of the cluster + including the local node itself + # gluster pool list + 5. Check the cmd_history' for the status message related to + 'peer probe' command + 6. Repeat 1-5 for FQDN and hostnames + """ + + for self.by_type in ('ip', 'fqdn', 'hostname'): + # Get a node to peer probe to + host_node = self._get_new_nodes_to_peer_probe() + + # Perform peer probe and verify the status + self._perform_peer_probe(host_node[self.by_type]) + + # Verify Peer pool list and check whether the node exists or not + self._verify_pool_list(host_node) + + # Verify command history for successful peer probe status + self._verify_cmd_history(host_node[self.by_type]) + + g.log.info("Peer probe scenario validated using %s", self.by_type) diff --git a/tests/functional/glusterd/test_verify_df_output.py b/tests/functional/glusterd/test_verify_df_output.py new file mode 100644 index 000000000..4eac9193b --- /dev/null +++ b/tests/functional/glusterd/test_verify_df_output.py @@ -0,0 +1,171 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, + runs_on) +from glustolibs.gluster.heal_libs import monitor_heal_completion +from glustolibs.io.utils import validate_io_procs +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_libs import (replace_brick_from_volume, + shrink_volume, expand_volume) +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['distributed-dispersed', 'distributed-replicated', + 'distributed-arbiter', 'dispersed', 'replicated', + 'arbiter'], + ['glusterfs']]) +class VerifyDFWithReplaceBrick(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + if not upload_scripts(cls.clients, [cls.script_upload_path]): + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + if not self.setup_volume_and_mount_volume(mounts=self.mounts): + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + + def _perform_io_and_validate(self): + """ Performs IO on the mount points and validates it""" + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d --dir-depth 2 " + "--dir-length 3 --max-num-of-dirs 3 " + "--num-of-files 2 %s" % ( + self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count = count + 10 + + # Validating IO's on mount point and waiting to complete + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated IO's") + + def _replace_bricks_and_wait_for_heal_completion(self): + """ Replaces all the bricks and waits for the heal to complete""" + existing_bricks = get_all_bricks(self.mnode, self.volname) + for brick_to_replace in existing_bricks: + ret = replace_brick_from_volume(self.mnode, self.volname, + self.servers, + self.all_servers_info, + src_brick=brick_to_replace) + self.assertTrue(ret, + "Replace of %s failed" % brick_to_replace) + g.log.info("Replace of brick %s successful for volume %s", + brick_to_replace, self.volname) + + # Monitor heal completion + ret = monitor_heal_completion(self.mnode, self.volname) + self.assertTrue(ret, 'Heal has not yet completed') + g.log.info('Heal has completed successfully') + + def _get_mount_size_from_df_h_output(self): + """ Extracts the mount size from the df -h output""" + + split_cmd = " | awk '{split($0,a,\" \");print a[2]}' | sed 's/.$//'" + cmd = ("cd {};df -h | grep {} {}".format(self.mounts[0].mountpoint, + self.volname, split_cmd)) + ret, mount_size, _ = g.run(self.clients[0], cmd) + self.assertEqual(ret, 0, "Failed to extract mount size") + return float(mount_size.split("\n")[0]) + + def test_verify_df_output_when_brick_replaced(self): + """ + - Take the output of df -h. + - Replace any one brick for the volumes. + - Wait till the heal is completed + - Repeat steps 1, 2 and 3 for all bricks for all volumes. + - Check if there are any inconsistencies in the output of df -h + - Remove bricks from volume and check output of df -h + - Add bricks to volume and check output of df -h + """ + + # Perform some IO on the mount point + self._perform_io_and_validate() + + # Get the mount size from df -h output + initial_mount_size = self._get_mount_size_from_df_h_output() + + # Replace all the bricks and wait till the heal completes + self._replace_bricks_and_wait_for_heal_completion() + + # Get df -h output after brick replace + mount_size_after_replace = self._get_mount_size_from_df_h_output() + + # Verify the mount point size remains the same after brick replace + self.assertEqual(initial_mount_size, mount_size_after_replace, + "The mount sizes before and after replace bricks " + "are not same") + + # Add bricks + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info, force=True) + self.assertTrue(ret, "Failed to add-brick to volume") + + # Get df -h output after volume expand + mount_size_after_expand = self._get_mount_size_from_df_h_output() + + # Verify df -h output returns greater value + self.assertGreater(mount_size_after_expand, initial_mount_size, + "The mount size has not increased after expanding") + + # Remove bricks + ret = shrink_volume(self.mnode, self.volname, force=True) + self.assertTrue(ret, ("Remove brick operation failed on " + "%s", self.volname)) + g.log.info("Remove brick operation is successful on " + "volume %s", self.volname) + + # Get df -h output after volume shrink + mount_size_after_shrink = self._get_mount_size_from_df_h_output() + + # Verify the df -h output returns smaller value + self.assertGreater(mount_size_after_expand, mount_size_after_shrink, + "The mount size has not reduced after shrinking") + + def tearDown(self): + """ + Cleanup and umount volume + """ + # Cleanup and umount volume + if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterd/test_volume_create_with_glusterd_restarts.py b/tests/functional/glusterd/test_volume_create_with_glusterd_restarts.py index 8c3d77b9d..1a7fe8a1b 100644 --- a/tests/functional/glusterd/test_volume_create_with_glusterd_restarts.py +++ b/tests/functional/glusterd/test_volume_create_with_glusterd_restarts.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -71,10 +71,15 @@ class TestVolumeCreateWithGlusterdRestarts(GlusterBaseClass): server_info_for_three_nodes) # Restarting glusterd in a loop restart_cmd = ("for i in `seq 1 5`; do " - "service glusterd restart; sleep 3; " + "service glusterd restart; " + "systemctl reset-failed glusterd; " + "sleep 3; " "done") proc1 = g.run_async(self.servers[3], restart_cmd) + # After running restart in g.async adding 10 sec sleep + sleep(10) + # Creating volumes using 3 servers ret, _, _ = volume_create(self.mnode, self.volname, bricks_list) @@ -97,10 +102,15 @@ class TestVolumeCreateWithGlusterdRestarts(GlusterBaseClass): # Restarting glusterd in a loop restart_cmd = ("for i in `seq 1 5`; do " - "service glusterd restart; sleep 3; " + "service glusterd restart; " + "systemctl reset-failed glusted; " + "sleep 3; " "done") proc1 = g.run_async(self.servers[3], restart_cmd) + # After running restart in g.async adding 10 sec sleep + sleep(10) + # Start the volume created. ret, _, _ = volume_start(self.mnode, self.volname) self.assertEqual(ret, 0, "Volume start failed") diff --git a/tests/functional/glusterd/test_volume_set_when_glusterd_stopped_on_one_node.py b/tests/functional/glusterd/test_volume_set_when_glusterd_stopped_on_one_node.py new file mode 100644 index 000000000..d99fa185f --- /dev/null +++ b/tests/functional/glusterd/test_volume_set_when_glusterd_stopped_on_one_node.py @@ -0,0 +1,193 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Volume set operation when glusterd is stopped on one node +""" + +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.volume_ops import ( + set_volume_options, get_volume_info) +from glustolibs.gluster.brick_libs import get_online_bricks_list +from glustolibs.gluster.gluster_init import ( + start_glusterd, stop_glusterd, wait_for_glusterd_to_start) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed'], ['glusterfs']]) +class TestVolumeSetWhenGlusterdStoppedOnOneNode(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + + # Uploading file_dir script in all client direcotries + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + self.get_super_method(self, 'setUp')() + # Creating Volume and mounting volume. + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + g.log.info("Volme created and mounted successfully : %s", + self.volname) + + def tearDown(self): + # Check if a node is still down + if self.glusterd_is_stopped: + ret = start_glusterd(self.random_server) + self.assertTrue(ret, "Failed to start glusterd on %s" + % self.random_server) + g.log.info("Successfully started glusterd on node: %s", + self.random_server) + + # Waiting for glusterd to start completely + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", + self.random_server) + + # Unmounting and cleaning volume. + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume % s" % self.volname) + g.log.info("Volume deleted successfully : %s", self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_volume_set_when_glusterd_stopped_on_one_node(self): + """ + Test Case: + 1) Setup and mount a volume on client. + 2) Stop glusterd on a random server. + 3) Start IO on mount points + 4) Set an option on the volume + 5) Start glusterd on the stopped node. + 6) Verify all the bricks are online after starting glusterd. + 7) Check if the volume info is synced across the cluster. + """ + # Fetching the bricks list and storing it for later use + list1 = get_online_bricks_list(self.mnode, self.volname) + self.assertIsNotNone(list1, "Failed to get the list of online bricks " + "for volume: %s" % self.volname) + + # Fetching a random server from list. + self.random_server = choice(self.servers[1:]) + + # Stopping glusterd on one node. + ret = stop_glusterd(self.random_server) + self.assertTrue(ret, "Failed to stop glusterd on one node.") + g.log.info("Successfully stopped glusterd on one node.") + + self.glusterd_is_stopped = True + + # Start IO on mount points. + self.all_mounts_procs = [] + counter = 1 + for mount_obj in self.mounts: + g.log.info("Starting IO on %s:%s", mount_obj.client_system, + mount_obj.mountpoint) + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dir-depth 4 " + "--dir-length 6 " + "--dirname-start-num %d " + "--max-num-of-dirs 3 " + "--num-of-files 5 %s" % ( + self.script_upload_path, + counter, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + self.all_mounts_procs.append(proc) + counter += 1 + + # Validate IO + self.assertTrue( + validate_io_procs(self.all_mounts_procs, self.mounts), + "IO failed on some of the clients" + ) + g.log.info("IO validation complete.") + + # set a option on volume, stat-prefetch on + self.options = {"stat-prefetch": "on"} + ret = set_volume_options(self.mnode, self.volname, self.options) + self.assertTrue(ret, ("Failed to set option stat-prefetch to on" + "for the volume %s" % self.volname)) + g.log.info("Succeeded in setting stat-prefetch option to on" + "for the volume %s", self.volname) + + # start glusterd on the node where glusterd is stopped + ret = start_glusterd(self.random_server) + self.assertTrue(ret, "Failed to start glusterd on %s" + % self.random_server) + g.log.info("Successfully started glusterd on node: %s", + self.random_server) + + # Waiting for glusterd to start completely + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", self.random_server) + + self.glusterd_is_stopped = False + + # Confirm if all the bricks are online or not + count = 0 + while count < 10: + list2 = get_online_bricks_list(self.mnode, self.volname) + if list1 == list2: + break + sleep(2) + count += 1 + + self.assertListEqual(list1, list2, "Unexpected: All the bricks in the" + "volume are not online") + g.log.info("All the bricks in the volume are back online") + + # volume info should be synced across the cluster + out1 = get_volume_info(self.mnode, self.volname) + self.assertIsNotNone(out1, "Failed to get the volume info from %s" + % self.mnode) + g.log.info("Getting volume info from %s is success", self.mnode) + + count = 0 + while count < 60: + out2 = get_volume_info(self.random_server, self.volname) + self.assertIsNotNone(out2, "Failed to get the volume info from %s" + % self.random_server) + if out1 == out2: + break + sleep(2) + count += 1 + + self.assertDictEqual(out1, out2, "Volume info is not synced in the" + "restarted node") + g.log.info("Volume info is successfully synced across the cluster") diff --git a/tests/functional/glusterd/test_volume_status_show_bricks_online_though_brickpath_deleted.py b/tests/functional/glusterd/test_volume_status_show_bricks_online_though_brickpath_deleted.py new file mode 100644 index 000000000..05bb47c40 --- /dev/null +++ b/tests/functional/glusterd/test_volume_status_show_bricks_online_though_brickpath_deleted.py @@ -0,0 +1,138 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" Description: + Volume status when one of the brickpath is not available. +""" + +import random +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.brick_libs import (are_bricks_online, get_all_bricks, + bring_bricks_online, + bring_bricks_offline, + are_bricks_offline) +from glustolibs.gluster.volume_ops import (volume_start) + + +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) +class TestVolumeStatusShowBrickOnlineThoughBrickpathDeleted(GlusterBaseClass): + def setUp(self): + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + ret = self.setup_volume() + if not ret: + raise ExecutionError("Volume creation failed: %s" + % self.volname) + g.log.info("Volume created successfully : %s", self.volname) + + def tearDown(self): + # Stopping the volume and Cleaning up the volume + if self.check_for_remount: + ret, _, _ = g.run(self.brick_node, 'mount %s' % self.node_brick) + if ret: + raise ExecutionError('Failed to remount brick %s' + % self.node_brick) + g.log.info('Successfully remounted %s with read-write option', + self.node_brick) + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup the volume %s" + % self.volname) + g.log.info("Volume deleted successfully: %s", self.volname) + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_volume_status_show_brick_online_though_brickpath_deleted(self): + """ + Test Case: + 1) Create a volume and start it. + 2) Fetch the brick list + 3) Bring any one brick down umount the brick + 4) Force start the volume and check that all the bricks are not online + 5) Remount the removed brick and bring back the brick online + 6) Force start the volume and check if all the bricks are online + """ + # Fetching the brick list + brick_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(brick_list, "Failed to get the bricks in" + " the volume") + + # Bringing one brick down + random_brick = random.choice(brick_list) + ret = bring_bricks_offline(self.volname, random_brick) + self.assertTrue(ret, "Failed to bring offline") + + # Creating a list of bricks to be removed + remove_bricks_list = [] + remove_bricks_list.append(random_brick) + + # Checking if the brick is offline or not + ret = are_bricks_offline(self.mnode, self.volname, + remove_bricks_list) + self.assertTrue(ret, 'Bricks %s are not offline' + % random_brick) + g.log.info('Brick %s is offline as expected', random_brick) + + # umounting the brick which was made offline + self.brick_node, volume_brick = random_brick.split(':') + self.node_brick = '/'.join(volume_brick.split('/')[0:3]) + g.log.info('Start umount brick %s...', self.node_brick) + ret, _, _ = g.run(self.brick_node, 'umount %s' % self.node_brick) + self.assertFalse(ret, 'Failed to umount brick %s' % self.node_brick) + g.log.info('Successfully umounted brick %s', self.node_brick) + + self.check_for_remount = True + + # Force starting the volume + ret, _, _ = volume_start(self.mnode, self.volname, True) + self.assertEqual(ret, 0, "Faile to force start volume") + g.log.info("Successfully force start volume") + + # remounting the offline brick + g.log.info('Start remount brick %s with read-write option...', + self.node_brick) + ret, _, _ = g.run(self.brick_node, 'mount %s' % self.node_brick) + self.assertFalse(ret, 'Failed to remount brick %s' % self.node_brick) + g.log.info('Successfully remounted %s with read-write option', + self.node_brick) + + self.check_for_remount = False + + # Checking that all the bricks shouldn't be online + ret = are_bricks_online(self.mnode, self.volname, brick_list) + self.assertFalse(ret, "Unexpected: All the bricks are online") + g.log.info("Expected: All the bricks are not online") + + # Bringing back the offline brick online + ret = bring_bricks_online(self.mnode, self.volname, remove_bricks_list) + self.assertTrue(ret, "Failed to bring bricks online") + g.log.info("Successfully brought bricks online") + + # Force starting the volume + ret, _, _ = volume_start(self.mnode, self.volname, True) + self.assertEqual(ret, 0, "Faile to force start volume") + g.log.info("Successfully force start volume") + + # Checking if all the bricks are online or not + ret = are_bricks_online(self.mnode, self.volname, brick_list) + self.assertTrue(ret, "Unexpected: All the bricks are not online") + g.log.info("Expected: All the bricks are online") diff --git a/tests/functional/glusterd/test_volume_status_with_absent_bricks.py b/tests/functional/glusterd/test_volume_status_with_absent_bricks.py index f47c6b61c..5aed2af3b 100644 --- a/tests/functional/glusterd/test_volume_status_with_absent_bricks.py +++ b/tests/functional/glusterd/test_volume_status_with_absent_bricks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2018-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,64 +22,49 @@ import random from glusto.core import Glusto as g from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on -from glustolibs.gluster.volume_ops import (volume_create, volume_start, - volume_status) +from glustolibs.gluster.volume_ops import (volume_start, volume_status) +from glustolibs.gluster.brick_libs import get_all_bricks from glustolibs.gluster.volume_libs import cleanup_volume -from glustolibs.gluster.lib_utils import form_bricks_list -@runs_on([['distributed', 'replicated', 'distributed-replicated'], - ['glusterfs']]) +@runs_on([['distributed', 'replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'arbiter', + 'distributed-arbiter'], ['glusterfs']]) class TestVolumeStatusWithAbsentBricks(GlusterBaseClass): + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Creating Volume + g.log.info("Started creating volume") + ret = self.setup_volume(False, True) + if ret: + g.log.info("Volme created successfully : %s", self.volname) + else: + raise ExecutionError("Volume creation failed: %s" % self.volname) def tearDown(self): - """ - tearDown for every test - """ - # stopping the volume and Cleaning up the volume + # Stopping the volume and Cleaning up the volume ret = cleanup_volume(self.mnode, self.volname) if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Volume deleted successfully : %s", self.volname) + # Calling GlusterBaseClass tearDown self.get_super_method(self, 'tearDown')() def test_volume_absent_bricks(self): - ''' - -> Create Volume - -> Remove any one Brick directory - -> Start Volume - -> Check the gluster volume status - ''' - num_of_bricks = 0 - replica = True - - if self.volume_type == 'distributed': - num_of_bricks = 3 - replica = False - - elif self.volume_type == 'replicated': - num_of_bricks = 3 - - elif self.volume_type == 'distributed-replicated': - num_of_bricks = 6 - - # Forming brick list - brick_list = form_bricks_list(self.mnode, self.volname, num_of_bricks, - self.servers, self.all_servers_info) - if replica: - # Creating Volume - ret, _, _ = volume_create(self.mnode, self.volname, brick_list, - replica_count=3) - self.assertEqual(ret, 0, "Volume creation failed for %s" - % self.volname) - g.log.info("volume created successfully %s", self.volname) - else: - # Creating Volume - ret, _, _ = volume_create(self.mnode, self.volname, brick_list) - self.assertEqual(ret, 0, "Volume creation failed for %s" - % self.volname) - g.log.info("volume created successfully %s", self.volname) + """ + Test Case: + 1) Create Volume + 2) Remove any one Brick directory + 3) Start Volume and compare the failure message + 4) Check the gluster volume status nad compare the status message + """ + # Fetching the brick list + brick_list = get_all_bricks(self.mnode, self.volname) + self.assertIsNotNone(brick_list, "Failed to get the bricks in" + " the volume") # Command for removing brick directory random_brick = random.choice(brick_list) diff --git a/tests/functional/glusterd/test_volume_status_xml.py b/tests/functional/glusterd/test_volume_status_xml.py index 69f1488ba..568d6906d 100644 --- a/tests/functional/glusterd/test_volume_status_xml.py +++ b/tests/functional/glusterd/test_volume_status_xml.py @@ -61,6 +61,22 @@ class TestVolumeStatusxml(GlusterBaseClass): "servers %s" % self.servers) self.get_super_method(self, 'tearDown')() + def _get_test_specific_glusterd_log(self, node): + """Gets the test specific glusterd log""" + # Extract the test specific cmds from cmd_hostory + start_msg = "Starting Test : %s : %s" % (self.id(), + self.glustotest_run_id) + end_msg = "Ending Test: %s : %s" % (self.id(), + self.glustotest_run_id) + glusterd_log = "/var/log/glusterfs/glusterd.log" + cmd = ("awk '/{}/ {{p=1}}; p; /{}/ {{p=0}}' {}" + .format(start_msg, end_msg, glusterd_log)) + ret, test_specific_glusterd_log, err = g.run(node, cmd) + self.assertEqual(ret, 0, "Failed to extract glusterd log specific" + " to the current test case. " + "Error : %s" % err) + return test_specific_glusterd_log + def test_volume_status_xml(self): # create a two node cluster @@ -109,3 +125,14 @@ class TestVolumeStatusxml(GlusterBaseClass): self.assertIsNotNone(vol_status, ("Failed to get volume " "status --xml for %s" % self.volname)) + + # Verify there are no crashes while executing gluster volume status + status = True + glusterd_log = (self._get_test_specific_glusterd_log(self.mnode) + .split("\n")) + for line in glusterd_log: + if ' E ' in glusterd_log: + status = False + g.log.info("Unexpected! Error found %s", line) + + self.assertTrue(status, "Error found in glusterd logs") diff --git a/tests/functional/glusterd/test_xml_dump_of_gluster_volume_status_during_rebalance.py b/tests/functional/glusterd/test_xml_dump_of_gluster_volume_status_during_rebalance.py new file mode 100644 index 000000000..5712dcf32 --- /dev/null +++ b/tests/functional/glusterd/test_xml_dump_of_gluster_volume_status_during_rebalance.py @@ -0,0 +1,185 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.gluster_init import ( + stop_glusterd, start_glusterd, + is_glusterd_running +) +from glustolibs.gluster.lib_utils import form_bricks_list +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect +from glustolibs.gluster.rebalance_ops import ( + get_rebalance_status, + rebalance_start +) +from glustolibs.gluster.volume_libs import ( + cleanup_volume +) +from glustolibs.gluster.volume_ops import ( + volume_stop, volume_create, volume_start, get_volume_status +) +from glustolibs.io.utils import ( + list_all_files_and_dirs_mounts, + wait_for_io_to_complete +) +from glustolibs.misc.misc_libs import upload_scripts + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class XmlDumpGlusterVolumeStatus(GlusterBaseClass): + """ + xml Dump of gluster volume status during rebalance, when one gluster + node is down + """ + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Setup Volume and Mount Volume + ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + # Start IO on mounts + cls.all_mounts_procs = [] + for index, mount_obj in enumerate(cls.mounts, start=1): + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 1 " + "--dir-length 5 " + "--max-num-of-dirs 10 " + "--num-of-files 60 %s" % ( + cls.script_upload_path, + index + 10, mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + cls.all_mounts_procs.append(proc) + cls.io_validation_complete = False + + # Wait for IO to complete + if not cls.io_validation_complete: + g.log.info("Wait for IO to complete") + ret = wait_for_io_to_complete(cls.all_mounts_procs, cls.mounts) + if not ret: + raise ExecutionError("IO failed on some of the clients") + + ret = list_all_files_and_dirs_mounts(cls.mounts) + if not ret: + raise ExecutionError("Failed to list all files and dirs") + + def test_xml_dump_of_gluster_volume_status_during_rebalance(self): + """ + 1. Create a trusted storage pool by peer probing the node + 2. Create a distributed-replicated volume + 3. Start the volume and fuse mount the volume and start IO + 4. Create another replicated volume and start it and stop it + 5. Start rebalance on the volume + 6. While rebalance in progress, stop glusterd on one of the nodes + in the Trusted Storage pool. + 7. Get the status of the volumes with --xml dump + """ + self.volname_2 = "test_volume_2" + + # create volume + # Fetching all the parameters for volume_create + list_of_three_servers = [] + server_info_for_three_nodes = {} + for server in self.servers[:3]: + list_of_three_servers.append(server) + server_info_for_three_nodes[server] = self.all_servers_info[ + server] + + bricks_list = form_bricks_list(self.mnode, self.volname, + 3, list_of_three_servers, + server_info_for_three_nodes) + # Creating volumes using 3 servers + ret, _, _ = volume_create(self.mnode, self.volname_2, + bricks_list, force=True) + self.assertFalse(ret, "Volume creation failed") + g.log.info("Volume %s created successfully", self.volname_2) + ret, _, _ = volume_start(self.mnode, self.volname_2) + self.assertFalse( + ret, "Failed to start volume {}".format(self.volname_2)) + ret, _, _ = volume_stop(self.mnode, self.volname_2) + self.assertFalse( + ret, "Failed to stop volume {}".format(self.volname_2)) + + # Start Rebalance + ret, _, _ = rebalance_start(self.mnode, self.volname) + self.assertEqual(ret, 0, ("Failed to start rebalance on the volume " + "%s", self.volname)) + + # Get rebalance status + status_info = get_rebalance_status(self.mnode, self.volname) + status = status_info['aggregate']['statusStr'] + + self.assertIn('in progress', status, + "Rebalance process is not running") + g.log.info("Rebalance process is running") + + # Stop glusterd + ret = stop_glusterd(self.servers[2]) + self.assertTrue(ret, "Failed to stop glusterd") + + ret, out, _ = g.run( + self.mnode, + "gluster v status | grep -A 4 'Rebalance' | awk 'NR==3{print " + "$3,$4}'") + + ret = get_volume_status(self.mnode, self.volname, options="tasks") + rebalance_status = ret[self.volname]['task_status'][0]['statusStr'] + self.assertIn(rebalance_status, out.replace("\n", "")) + + def tearDown(self): + ret = is_glusterd_running(self.servers) + if ret: + ret = start_glusterd(self.servers) + if not ret: + raise ExecutionError("Failed to start glusterd on %s" + % self.servers) + g.log.info("Glusterd started successfully on %s", self.servers) + + # Checking for peer status from every node + for server in self.servers: + ret = wait_for_peers_to_connect(server, self.servers) + if not ret: + raise ExecutionError("Servers are not in peer probed state") + + ret = cleanup_volume(self.mnode, self.volname_2) + if not ret: + raise ExecutionError( + "Unable to delete volume % s" % self.volname_2) + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() diff --git a/tests/functional/glusterfind/test_gfind_list_cli.py b/tests/functional/glusterfind/test_gfind_list_cli.py new file mode 100644 index 000000000..bfc27da97 --- /dev/null +++ b/tests/functional/glusterfind/test_gfind_list_cli.py @@ -0,0 +1,111 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterfind_ops import (gfind_list, gfind_create, + gfind_delete) + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed', 'arbiter', + 'dispersed', 'replicated'], ['glusterfs']]) +class TestGlusterFindListCLI(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Setup Volume + if not self.setup_volume(): + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + + def tearDown(self): + + # Cleanup glusterfind session and volume + ret, _, _ = gfind_delete(self.mnode, self.volname, self.session) + if ret: + raise ExecutionError("Failed to delete session '%s'" + % self.session) + + if not self.cleanup_volume(): + raise ExecutionError("Failed to Cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _check_glusterfind_list_output(self, out): + """Check if glusterfind list output is proper or not.""" + out = list( + filter(None, list(filter(None, out.split("\n")))[2].split(" "))) + self.assertEqual(out[0], self.session, + "Unexpected: Session name not poper in output") + self.assertEqual(out[1], self.volname, + "Unecpected: Volume name not proper in output") + + def test_gfind_list_cli(self): + """ + Verifying the glusterfind list command functionality with valid + and invalid values for the required and optional parameters. + + * Create a volume + * Create a session on the volume and call glusterfind list with the + following combinations: + - Valid values for optional parameters + - Invalid values for optional parameters + + NOTE: + There are no required parameters for glusterfind list command. + """ + # Creating a glusterfind session + self.session = "session1" + ret, _, _ = gfind_create(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, "Glusterfind session creation for the " + "volume %s failed" % self.volname) + + # Checking output of glusterfind list + ret, out, _ = gfind_list(self.mnode) + self.assertEqual(ret, 0, "Glusterfind list failed") + self._check_glusterfind_list_output(out) + g.log.info("glusterfind list cmd validation without any param passed") + + # Check output for glusterfind list with valid and invalid volume name + for volume, expected_value, validation in ((self.volname, 0, 'valid'), + ("abc", 1, 'invalid')): + ret, out, _ = gfind_list(self.mnode, volname=volume) + self.assertEqual(ret, expected_value, + "Glusterfind list --volume check with %s " + "parameter failed" % validation) + if not ret: + self._check_glusterfind_list_output(out) + g.log.info("glusterind list cmd check with --volume param passed") + + # Check output for glusterfind list with valid and invalid session name + for session, expected_value, validation in ((self.session, 0, 'valid'), + ("abc", 1, 'invalid')): + ret, out, _ = gfind_list(self.mnode, sessname=session) + self.assertEqual(ret, expected_value, + "Glusterfind list --session check with %s " + "parameter failed" % validation) + if not ret: + self._check_glusterfind_list_output(out) + g.log.info("glusterfind list cmd check with --session param passed") + + # Check output of glusterind list with debug parameter + ret, _, _ = gfind_list(self.mnode, debug=True) + self.assertEqual(ret, 0, "Glusterfind list --debug parameter failed") + g.log.info("glusterfind list cmd check with --debug param passed") diff --git a/tests/functional/glusterfind/test_gfind_type_option.py b/tests/functional/glusterfind/test_gfind_type_option.py new file mode 100644 index 000000000..98e808f69 --- /dev/null +++ b/tests/functional/glusterfind/test_gfind_type_option.py @@ -0,0 +1,175 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.glusterfile import ( + file_exists, + remove_file, + check_if_pattern_in_file) +from glustolibs.gluster.glusterfind_ops import ( + gfind_create, + gfind_list, + gfind_pre, + gfind_query, + gfind_delete) + + +@runs_on([["replicated", "distributed-replicated", "dispersed", + "distributed", "distributed-dispersed", "arbiter", + "distributed-arbiter"], ["glusterfs"]]) +class TestGlusterfindTypeOption(GlusterBaseClass): + """ + TestGlusterfindTypeOption contains tests which verifies the + glusterfind functionality with --full --type options. + """ + def setUp(self): + """ + setup volume and mount volume + Initiate necessary variables + """ + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + # Setup Volume and Mount Volume + g.log.info("Starting to Setup %s", self.volname) + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + g.log.info("Successful in Setup Volume %s", self.volname) + self.session = "test-session-%s" % self.volname + self.outfile = "/tmp/test-outfile-%s.txt" % self.volname + + def tearDown(self): + """ + tearDown for every test + Clean up and unmount the volume + """ + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Delete the glusterfind sessions + ret, _, _ = gfind_delete(self.mnode, self.volname, self.session) + if ret: + raise ExecutionError("Failed to delete session %s" % self.session) + g.log.info("Successfully deleted session %s", self.session) + + # Remove the outfile created during 'glusterfind pre and query' + ret = remove_file(self.mnode, self.outfile, force=True) + if not ret: + raise ExecutionError("Failed to remove the outfile") + g.log.info("Successfully removed the outfile") + + # Cleanup the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Cleanup Volume") + g.log.info("Successful in Cleanup Volume") + + def _check_contents_of_outfile(self, gftype): + """Check contents of outfile created by query and pre""" + if gftype == 'f': + content = self.list_of_files + elif gftype == 'd': + content = self.list_of_dirs + else: + content = self.list_of_files + self.list_of_dirs + + # Check if outfile is created or not + ret = file_exists(self.mnode, self.outfile) + self.assertTrue(ret, "Unexpected: File '%s' does not exist" + % self.outfile) + + for value in content: + ret = check_if_pattern_in_file(self.mnode, value, self.outfile) + self.assertEqual(ret, 0, "Entry for '%s' not listed in %s" + % (value, self.outfile)) + + def test_gfind_full_type(self): + """ + Verifying the glusterfind --full functionality with --type f, + --type f and --type both + + * Create a volume + * Create a session on the volume + * Create various files on mount point + * Create various directories on point + * Perform glusterfind pre with --full --type f --regenerate-outfile + * Check the contents of outfile + * Perform glusterfind pre with --full --type d --regenerate-outfile + * Check the contents of outfile + * Perform glusterfind pre with --full --type both --regenerate-outfile + * Check the contents of outfile + * Perform glusterfind query with --full --type f + * Check the contents of outfile + * Perform glusterfind query with --full --type d + * Check the contents of outfile + * Perform glusterfind query with --full --type both + * Check the contents of outfile + """ + + # Create some files and directories from the mount point + cmd = ("cd {}; mkdir dir;mkdir .hiddendir;touch file;touch .hiddenfile" + ";mknod blockfile b 1 5;mknod charfile b 1 5; mkfifo pipefile;" + "touch fileforhardlink;touch fileforsoftlink;" + "ln fileforhardlink hardlinkfile;ln -s fileforsoftlink " + "softlinkfile".format(self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + + # Create list of files and dir to be used for checking + self.list_of_files = ['file', '.hiddenfile', 'blockfile', 'charfile', + 'pipefile', 'fileforhardlink', 'fileforsoftlink', + 'hardlinkfile', 'softlinkfile'] + self.list_of_dirs = ['dir', '.hiddendir'] + + self.assertEqual(ret, 0, "Failed to create files and dirs") + g.log.info("Files and Dirs created successfully on mountpoint") + + # Create session for volume + ret, _, _ = gfind_create(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the" + " volume %s failed" % self.volname)) + g.log.info("Successfully created a session for the volume %s", + self.volname) + + # Perform glusterfind list to check if session exists + _, out, _ = gfind_list(self.mnode, volname=self.volname, + sessname=self.session) + self.assertNotEqual(out, "No sessions found.", + "Failed to list the glusterfind session") + g.log.info("Successfully listed the glusterfind session") + + # Perform glusterfind full pre for the session with --type option + for gftype in ('f', 'd', 'both'): + ret, _, _ = gfind_pre( + self.mnode, self.volname, self.session, self.outfile, + full=True, gftype=gftype, regenoutfile=True) + self.assertEqual(ret, 0, "glusterfind pre command successful " + "with --type %s" % gftype) + + # Check the contents of the outfile + self._check_contents_of_outfile(gftype) + + # Perform glusterfind full query with the --type option + for gftype in ('f', 'd', 'both'): + ret, _, _ = gfind_query(self.mnode, self.volname, self.outfile, + full=True, gftype=gftype) + self.assertEqual(ret, 0, "glusterfind query command successful " + "with --type %s" % gftype) + + # Check the contents of the outfile + self._check_contents_of_outfile(gftype) diff --git a/tests/functional/glusterfind/test_glusterfind_when_brick_down.py b/tests/functional/glusterfind/test_glusterfind_when_brick_down.py new file mode 100644 index 000000000..de1ebaf23 --- /dev/null +++ b/tests/functional/glusterfind/test_glusterfind_when_brick_down.py @@ -0,0 +1,219 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test Glusterfind when brick is down +""" + +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect +from glustolibs.gluster.lib_utils import list_files +from glustolibs.gluster.volume_libs import volume_start +from glustolibs.gluster.glusterfile import ( + file_exists, + remove_file, + check_if_pattern_in_file) +from glustolibs.gluster.glusterfind_ops import ( + gfind_create, + gfind_list, + gfind_pre, + gfind_post, + gfind_delete) +from glustolibs.gluster.brick_libs import ( + get_all_bricks, + bring_bricks_offline) + + +@runs_on([["replicated", "distributed-replicated", "dispersed", + "distributed", "distributed-dispersed"], + ["glusterfs"]]) +class TestGlusterFindBrickDown(GlusterBaseClass): + """ + Test glusterfind operation when a brick is down. + """ + + def setUp(self): + """ + setup volume and mount volume + Initiate necessary variables + """ + + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.file_limit = 0 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + g.log.info("Successful in Setup Volume %s", self.volname) + self.session = "test-session-%s" % self.volname + self.outfiles = [("/tmp/test-outfile-%s-%s.txt" + % (self.volname, i))for i in range(0, 2)] + + # Set the changelog rollover-time to 1 second + # This needs to be done in order for glusterfind to keep checking + # for changes in the mount point + option = {'changelog.rollover-time': '1'} + ret = set_volume_options(self.mnode, self.volname, option) + if not ret: + raise ExecutionError("Failed to set the volume option %s for %s" + % (option, self.volname)) + g.log.info("Successfully set the volume option for the volume %s", + self.volname) + + def _perform_io_and_validate_presence_of_files(self): + """ + Function to perform the IO and validate the presence of files. + """ + self.file_limit += 10 + # Starting IO on the mounts + cmd = ("cd %s ; touch file{%d..%d}" % (self.mounts[0].mountpoint, + self.file_limit-10, + self.file_limit)) + + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Files created successfully on mountpoint") + + # Gather the list of files from the mount point + files = list_files(self.mounts[0].client_system, + self.mounts[0].mountpoint) + self.assertIsNotNone(files, "Failed to get the list of files") + g.log.info("Successfully gathered the list of files from mount point") + + # Check if the files exist + for filename in files: + ret = file_exists(self.mounts[0].client_system, filename) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % filename)) + g.log.info("Successfully validated existence of '%s'", filename) + + def _perform_glusterfind_pre_and_validate_outfile(self): + """ + Function to perform glusterfind pre and validate outfile + """ + # Perform glusterfind pre for the session + ret, _, _ = gfind_pre(self.mnode, self.volname, self.session, + self.outfiles[0], full=True, noencode=True, + debug=True) + self.assertEqual(ret, 0, ("Failed to perform glusterfind pre")) + g.log.info("Successfully performed glusterfind pre") + + # Check if the outfile exists + ret = file_exists(self.mnode, self.outfiles[0]) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % self.outfiles[0])) + g.log.info("Successfully validated existence of '%s'", + self.outfiles[0]) + + # Check if all the files are listed in the outfile + for i in range(1, self.file_limit+1): + ret = check_if_pattern_in_file(self.mnode, "file%s" % i, + self.outfiles[0]) + self.assertEqual(ret, 0, ("File 'file%s' not listed in %s" + % (i, self.outfiles[0]))) + g.log.info("File 'file%s' listed in %s", i, self.outfiles[0]) + + def test_gfind_when_brick_down(self): + """ + Verifying the glusterfind functionality when a brick is down. + + 1. Create a volume + 2. Create a session on the volume + 3. Create various files from mount point + 4. Bring down brick process on one of the node + 5. Perform glusterfind pre + 6. Perform glusterfind post + 7. Check the contents of outfile + """ + + # pylint: disable=too-many-statements + # Create a session for the volume + ret, _, _ = gfind_create(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the " + "volume %s failed" % self.volname)) + g.log.info("Successfully created a session for the volume %s", + self.volname) + + # Perform glusterfind list to check if session exists + _, out, _ = gfind_list(self.mnode, volname=self.volname, + sessname=self.session) + self.assertNotEqual(out, "No sessions found.", + "Failed to list the glusterfind session") + g.log.info("Successfully listed the glusterfind session") + + self._perform_io_and_validate_presence_of_files() + + # Wait for changelog to get updated + sleep(2) + + # Bring one of the brick down. + brick_list = get_all_bricks(self.mnode, self.volname) + ret = bring_bricks_offline(self.volname, choice(brick_list)) + self.assertTrue(ret, "Failed to bring down the brick.") + g.log.info("Succesfully brought down one brick.") + + self._perform_glusterfind_pre_and_validate_outfile() + + # Perform glusterfind post for the session + ret, _, _ = gfind_post(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Failed to perform glusterfind post")) + g.log.info("Successfully performed glusterfind post") + + # Bring the brick process up. + ret = volume_start(self.mnode, self.volname, force=True) + self.assertTrue(ret, "Failed to start the volume.") + g.log.info("Successfully started the volume.") + + def tearDown(self): + """ + tearDown for every test + Clean up and unmount the volume + """ + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Delete the glusterfind sessions + ret, _, _ = gfind_delete(self.mnode, self.volname, self.session) + if ret: + raise ExecutionError("Failed to delete session %s" % self.session) + g.log.info("Successfully deleted session %s", self.session) + + # Remove the outfiles created during 'glusterfind pre' + for out in self.outfiles: + ret = remove_file(self.mnode, out, force=True) + if not ret: + raise ExecutionError("Failed to remove the outfile %s" % out) + g.log.info("Successfully removed the outfiles") + + # Wait for the peers to be connected. + ret = wait_for_peers_to_connect(self.mnode, self.servers, 100) + if not ret: + raise ExecutionError("Peers are not in connected state.") + + # Cleanup the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Cleanup Volume") + g.log.info("Successful in Cleanup Volume") diff --git a/tests/functional/glusterfind/test_glusterfind_when_node_down.py b/tests/functional/glusterfind/test_glusterfind_when_node_down.py new file mode 100644 index 000000000..1d8b2572a --- /dev/null +++ b/tests/functional/glusterfind/test_glusterfind_when_node_down.py @@ -0,0 +1,280 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" +Description: + Test Glusterfind when node is down +""" + +from random import choice +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect +from glustolibs.gluster.lib_utils import list_files +from glustolibs.gluster.glusterfile import ( + file_exists, + remove_file, + check_if_pattern_in_file) +from glustolibs.gluster.glusterfind_ops import ( + gfind_create, + gfind_list, + gfind_pre, + gfind_post, + gfind_delete) +from glustolibs.gluster.gluster_init import ( + stop_glusterd, + start_glusterd, + wait_for_glusterd_to_start) +from glustolibs.misc.misc_libs import ( + reboot_nodes, + are_nodes_online) + + +@runs_on([["replicated", "distributed-replicated", "dispersed", + "distributed", "distributed-dispersed"], + ["glusterfs"]]) +class TestGlusterFindNodeDown(GlusterBaseClass): + """ + Test glusterfind operation when a node is down. + """ + + def setUp(self): + """ + setup volume and mount volume + Initiate necessary variables + """ + + # calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.file_limit = 0 + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume %s" % self.volname) + g.log.info("Successful in Setup Volume %s", self.volname) + self.session = "test-session-%s" % self.volname + self.outfiles = [("/tmp/test-outfile-%s-%s.txt" + % (self.volname, i))for i in range(0, 2)] + + # Set the changelog rollover-time to 1 second + # This needs to be done in order for glusterfind to keep checking + # for changes in the mount point + option = {'changelog.rollover-time': '1'} + ret = set_volume_options(self.mnode, self.volname, option) + if not ret: + raise ExecutionError("Failed to set the volume option %s for %s" + % (option, self.volname)) + g.log.info("Successfully set the volume option for the volume %s", + self.volname) + + def _perform_io_and_validate_presence_of_files(self): + """ + Function to perform the IO and validate the presence of files. + """ + self.file_limit += 10 + # Starting IO on the mounts + cmd = ("cd %s ; touch file{%d..%d}" % (self.mounts[0].mountpoint, + self.file_limit-10, + self.file_limit)) + + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create files on mountpoint") + g.log.info("Files created successfully on mountpoint") + + # Gather the list of files from the mount point + files = list_files(self.mounts[0].client_system, + self.mounts[0].mountpoint) + self.assertIsNotNone(files, "Failed to get the list of files") + g.log.info("Successfully gathered the list of files from mount point") + + # Check if the files exist + for filename in files: + ret = file_exists(self.mounts[0].client_system, filename) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % filename)) + g.log.info("Successfully validated existence of '%s'", filename) + + def _perform_glusterfind_pre_and_validate_outfile(self): + """ + Function to perform glusterfind pre and validate outfile + """ + # Perform glusterfind pre for the session + ret, _, _ = gfind_pre(self.mnode, self.volname, self.session, + self.outfiles[0], full=True, noencode=True, + debug=True) + self.assertEqual(ret, 0, ("Failed to perform glusterfind pre")) + g.log.info("Successfully performed glusterfind pre") + + # Check if the outfile exists + ret = file_exists(self.mnode, self.outfiles[0]) + self.assertTrue(ret, ("Unexpected: File '%s' does not exist" + % self.outfiles[0])) + g.log.info("Successfully validated existence of '%s'", + self.outfiles[0]) + + # Check if all the files are listed in the outfile + for i in range(1, self.file_limit+1): + ret = check_if_pattern_in_file(self.mnode, "file%s" % i, + self.outfiles[0]) + self.assertEqual(ret, 0, ("File 'file%s' not listed in %s" + % (i, self.outfiles[0]))) + g.log.info("File 'file%s' listed in %s", i, self.outfiles[0]) + + def test_gfind_when_node_down(self): + """ + Verifying the glusterfind functionality when node is down. + + 1. Create a volume + 2. Create a session on the volume + 3. Create various files from mount point + 4. Bring down glusterd on one of the node + 5. Perform glusterfind pre + 6. Perform glusterfind post + 7. Check the contents of outfile + 8. Create more files from mountpoint + 9. Reboot one of the nodes + 10. Perform gluserfind pre + 11. Perform glusterfind post + 12. Check the contents of outfile + """ + + # pylint: disable=too-many-statements + # Create a session for the volume + ret, _, _ = gfind_create(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the " + "volume %s failed" % self.volname)) + g.log.info("Successfully created a session for the volume %s", + self.volname) + + # Perform glusterfind list to check if session exists + _, out, _ = gfind_list(self.mnode, volname=self.volname, + sessname=self.session) + self.assertNotEqual(out, "No sessions found.", + "Failed to list the glusterfind session") + g.log.info("Successfully listed the glusterfind session") + + self._perform_io_and_validate_presence_of_files() + + # Wait for changelog to get updated + sleep(2) + + # Bring one of the node down. + self.random_server = choice(self.servers[1:]) + ret = stop_glusterd(self.random_server) + self.assertTrue(ret, "Failed to stop glusterd on one node.") + g.log.info("Succesfully stopped glusterd on one node.") + + self._perform_glusterfind_pre_and_validate_outfile() + + # Perform glusterfind post for the session + ret, _, _ = gfind_post(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Failed to perform glusterfind post")) + g.log.info("Successfully performed glusterfind post") + + # Bring glusterd which was downed on a random node, up. + ret = start_glusterd(self.random_server) + self.assertTrue(ret, "Failed to start glusterd on %s" + % self.random_server) + g.log.info("Successfully started glusterd on node : %s", + self.random_server) + + # Waiting for glusterd to start completely. + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", + self.random_server) + + self._perform_io_and_validate_presence_of_files() + + # Perform IO + self._perform_io_and_validate_presence_of_files() + + # Wait for changelog to get updated + sleep(2) + + # Reboot one of the nodes. + self.random_server = choice(self.servers[1:]) + ret = reboot_nodes(self.random_server) + self.assertTrue(ret, "Failed to reboot the said node.") + g.log.info("Successfully started reboot process on one node.") + + self._perform_glusterfind_pre_and_validate_outfile() + + # Perform glusterfind post for the session + ret, _, _ = gfind_post(self.mnode, self.volname, self.session) + self.assertEqual(ret, 0, ("Failed to perform glusterfind post")) + g.log.info("Successfully performed glusterfind post") + + # Gradual sleep backoff till the node has rebooted. + counter = 0 + timeout = 300 + ret = False + while counter < timeout: + ret, _ = are_nodes_online(self.random_server) + if not ret: + g.log.info("Node's offline, Retrying after 5 seconds ...") + sleep(5) + counter += 5 + else: + ret = True + break + self.assertTrue(ret, "Node is still offline.") + g.log.info("Rebooted node is online") + + # Wait for glusterd to start completely + ret = wait_for_glusterd_to_start(self.random_server) + self.assertTrue(ret, "glusterd is not running on %s" + % self.random_server) + g.log.info("glusterd is started and running on %s", + self.random_server) + + def tearDown(self): + """ + tearDown for every test + Clean up and unmount the volume + """ + # calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Delete the glusterfind sessions + ret, _, _ = gfind_delete(self.mnode, self.volname, self.session) + if ret: + raise ExecutionError("Failed to delete session %s" % self.session) + g.log.info("Successfully deleted session %s", self.session) + + # Remove the outfiles created during 'glusterfind pre' + for out in self.outfiles: + ret = remove_file(self.mnode, out, force=True) + if not ret: + raise ExecutionError("Failed to remove the outfile %s" % out) + g.log.info("Successfully removed the outfiles") + + # Wait for the peers to be connected. + ret = wait_for_peers_to_connect(self.mnode, self.servers, 100) + if not ret: + raise ExecutionError("Peers are not in connected state.") + + # Cleanup the volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Cleanup Volume") + g.log.info("Successful in Cleanup Volume") diff --git a/tests/functional/multiplex/test_enabling_brick_mux.py b/tests/functional/multiplex/test_enabling_brick_mux.py index d83c4ebd9..395de3b25 100755 --- a/tests/functional/multiplex/test_enabling_brick_mux.py +++ b/tests/functional/multiplex/test_enabling_brick_mux.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2019-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,13 +14,18 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from random import choice +import string from glusto.core import Glusto as g from glustolibs.gluster.gluster_base_class import (GlusterBaseClass, runs_on) from glustolibs.gluster.exceptions import ExecutionError from glustolibs.gluster.brickmux_ops import (disable_brick_mux, is_brick_mux_enabled, - get_brick_mux_status) + get_brick_mux_status, + enable_brick_mux) from glustolibs.gluster.lib_utils import search_pattern_in_file +from glustolibs.gluster.volume_ops import (set_volume_options, + reset_volume_option) @runs_on([['replicated'], @@ -38,9 +43,18 @@ class TestBrickMultiplexing(GlusterBaseClass): raise ExecutionError("Failed to disable brick multiplexing") g.log.info("Disabled brick multiplexing successfully") + ret, _, _ = reset_volume_option(self.mnode, "all", "all") + if ret: + raise ExecutionError("Unable to reset all volume options") + g.log.info("Successfully reset all the volume options.") + # Calling GlusterBaseClass teardown self.get_super_method(self, 'tearDown')() + @staticmethod + def get_random_string(chars, str_len=4): + return ''.join((choice(chars) for _ in range(str_len))) + def test_enabling_brick_mux(self): """ Test case: @@ -71,7 +85,7 @@ class TestBrickMultiplexing(GlusterBaseClass): g.log.info('Checking for warning message in output...') if "volume set: success" not in out: - self.assertIn(out, warning_message, + self.assertIn(warning_message, out, 'There is no warning message in ' 'output or message is incorrect.') g.log.info('Warning message is correct.') @@ -125,3 +139,114 @@ class TestBrickMultiplexing(GlusterBaseClass): cmd = "yes | gluster v set all cluster.brick-multiplex incorrect" ret, _, _ = g.run(self.mnode, cmd) self.assertEqual(ret, 1, 'Incorrect status has passed') + + def test_enabling_brick_mux_with_wrong_values(self): + """ + Test Case: + - Create a gluster cluster + - Set cluster.brick-multiplex value to random string(Must fail) + - Set cluster.brick-multiplex value to random int(Must fail) + - Set cluster.brick-multiplex value to random + special characters(Must fail) + """ + # Creation of random data for cluster.brick-multiplex option + # Data has: alphabets, numbers, punctuations and their combinations + key = 'cluster.brick-multiplex' + for char_type in (string.ascii_letters, string.punctuation, + string.printable, string.digits): + + temp_val = self.get_random_string(char_type) + value = "{}".format(temp_val) + ret = set_volume_options(self.mnode, 'all', {key: value}) + self.assertFalse(ret, "Unexpected: Erroneous value {}, to option " + "{} should result in failure".format(value, key)) + g.log.info("Expected: Erroneous value %s, to option " + "%s resulted in failure", value, key) + + def set_max_brick_process_to_string(self): + """Set cluster.max-bricks-per-process to string""" + key = 'cluster.max-bricks-per-process' + for char_type in (string.ascii_letters, string.punctuation): + + temp_val = self.get_random_string(char_type) + value = "{}".format(temp_val) + ret = set_volume_options(self.mnode, 'all', {key: value}) + self.assertFalse(ret, "Unexpected: Erroneous value {}, to option " + "{} should result in failure".format(value, key)) + g.log.info("Expected: Erroneous value %s, to option " + "%s resulted in failure", value, key) + + def test_enable_brick_mux_with_max_bricks_per_process(self): + """ + Test Case: + - Create a gluster cluster + - With brick mux set to disable: + 1.Set cluster.max-bricks-per-process to int and check + error message(Must fail) + 2.Set cluster.max-bricks-per-process to string(Must fail) + - With brick mux set to enable: + 1.Set cluster.max-bricks-per-process to string(Must fail) + 2.Set cluster.max-bricks-per-process to 0 + 3.Set cluster.max-bricks-per-process to 1 and check + error message.(Must fail) + 4.Set cluster.max-bricks-per-process to int value > 1. + """ + # Disabling cluster.brick-multiplex if not. + if is_brick_mux_enabled(self.mnode): + ret = disable_brick_mux(self.mnode) + self.assertTrue(ret, "Unable to disable brickmux") + g.log.info("Brick mux is disabled") + + # Set cluster.max-bricks-per-process to int and check + # error message(Must fail) + cmd = "gluster v set all cluster.max-bricks-per-process 10" + ret, _, err = g.run(self.mnode, cmd) + self.assertEqual(ret, 1, 'Able to set max-bricks-per-process' + 'without enabling brick mux') + self.assertIn( + "volume set: failed: Brick-multiplexing is not enabled. " + "Please enable brick multiplexing before" + " trying to set this option.", err, + "Error message not proper on trying to " + "set max-bricks-per-process without brickmux") + + # Set cluster.max-bricks-per-process to string(Must fail) + self.set_max_brick_process_to_string() + + # Enable cluster.brick-multiplex. + ret = enable_brick_mux(self.mnode) + self.assertTrue(ret, "Unable to enable cluster.brick-multiplex") + g.log.info("Brick mux is enabled") + + # Set cluster.max-bricks-per-process to string(Must fail) + self.set_max_brick_process_to_string() + + # Set cluster.max-bricks-per-process to 0. + ret = set_volume_options(self.mnode, 'all', + {'cluster.max-bricks-per-process': '0'}) + self.assertTrue(ret, "Unable to set " + "cluster.max-bricks-per-process to 0") + g.log.info("Successfully set cluster.max-bricks-per-process to 0") + + # Set cluster.max-bricks-per-process to 1 and check + # error message.(Must fail) + cmd = "gluster v set all cluster.max-bricks-per-process 1" + ret, _, err = g.run(self.mnode, cmd) + self.assertEqual(ret, 1, 'Able to set max-bricks-per-process' + 'with enabling brick mux') + self.assertIn( + "volume set: failed: Brick-multiplexing is enabled." + " Please set this option to a value other than 1 to" + " make use of the brick-multiplexing feature.", err, + "Error message not proper on trying to set max-bricks-per-process" + " with brickmux") + + # Set cluster.max-bricks-per-process to int value > 1 + key = 'cluster.max-bricks-per-process' + temp_val = self.get_random_string(string.digits) + value = "{}".format(temp_val) + ret = set_volume_options(self.mnode, 'all', + {key: value}) + self.assertTrue(ret, "Unexpected: Erroneous value {}, to option " + "{} should not result in failure".format(value, key)) + g.log.info("Value %s, set to option %s", value, key) diff --git a/tests/functional/nfs_ganesha/root-squash/__init__.py b/tests/functional/nfs_ganesha/root-squash/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/functional/nfs_ganesha/root-squash/__init__.py diff --git a/tests/functional/nfs_ganesha/root-squash/test_nfs_ganesha_root_squash.py b/tests/functional/nfs_ganesha/root-squash/test_nfs_ganesha_root_squash.py new file mode 100644 index 000000000..1f91b33d0 --- /dev/null +++ b/tests/functional/nfs_ganesha/root-squash/test_nfs_ganesha_root_squash.py @@ -0,0 +1,162 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Cases in this module tests the nfs ganesha version 3 and 4 + rootsquash functionality cases. +""" +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass +from glustolibs.gluster.nfs_ganesha_libs import ( + wait_for_nfs_ganesha_volume_to_get_unexported) +from glustolibs.io.utils import validate_io_procs, get_mounts_stat +from glustolibs.gluster.nfs_ganesha_ops import ( + set_root_squash, + unexport_nfs_ganesha_volume) + + +@runs_on([['replicated', 'distributed', 'distributed-replicated', + 'dispersed', 'distributed-dispersed'], + ['nfs']]) +class TestNfsGaneshaRootSquash(GlusterBaseClass): + """ + Tests to verify Nfs Ganesha v3/v4 rootsquash stability + Steps: + 1. Create some files and dirs inside mount point + 2. Check for owner and group + 3. Set permission as 777 for mount point + 4. Enable root-squash on volume + 5. Create some more files and dirs + 6. Check for owner and group for any file + 7. Edit file created by root user + """ + def setUp(self): + """ + Setup Volume + """ + self.get_super_method(self, 'setUp')() + + # Setup and mount volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup and mount volume %s" + % self.volname) + g.log.info("Successful in setup and mount volume %s", self.volname) + + def test_rootsquash_enable(self): + # Start IO on mount point. + self.all_mounts_procs = [] + cmd = ("for i in {1..10}; do touch %s/file$i; done" + % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successfull in getting stats of files/dirs " + "from mount point") + + # Check for owner and group of random file + for mount_obj in self.mounts: + cmd = ("ls -l %s/file5 | awk '{ print $3, $4 }' |sort" + % mount_obj.mountpoint) + ret, out, err = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, err) + self.assertIn("root root", out, "Owner and group is not ROOT") + g.log.info("Owner and group of file is ROOT") + + # Set mount point permission to 777 + for mount_obj in self.mounts: + cmd = ("chmod 777 %s" % mount_obj.mountpoint) + ret, _, err = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, err) + g.log.info("Mount point permission changed to 777") + + # Enable root-squash on volume + ret = set_root_squash(self.servers[0], self.volname) + self.assertTrue(ret, "Failed to enable root-squash on volume") + g.log.info("root-squash is enable on the volume") + + # Start IO on mount point. + self.all_mounts_procs = [] + cmd = ("for i in {1..10}; do touch %s/Squashfile$i; done" + % self.mounts[0].mountpoint) + proc = g.run_async(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Validate IO + ret = validate_io_procs(self.all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successfull in getting stats of files/dirs " + "from mount point") + + # Check for owner and group of random file + for mount_obj in self.mounts: + cmd = ("ls -l %s/Squashfile5 | awk '{print $3, $4}' | sort" + % mount_obj.mountpoint) + ret, out, err = g.run(mount_obj.client_system, cmd) + self.assertFalse(ret, err) + self.assertIn("nfsnobody nfsnobody", out, + "Owner and group of file is NOT NFSNOBODY") + g.log.info("Owner and group of file is NFSNOBODY") + + # Edit file created by root user + for mount_obj in self.mounts: + cmd = ("echo hello > %s/file10" % mount_obj.mountpoint) + ret, _, _ = g.run(mount_obj.client_system, cmd) + self.assertEqual(ret, 1, "nfsnobody user editing file created by " + "root user should FAIL") + g.log.info("nfsnobody user failed to edit file " + "created by root user") + + def tearDown(self): + + # Disable root-squash + ret = set_root_squash(self.mnode, self.volname, squash=False, + do_refresh_config=True) + if not ret: + raise ExecutionError("Failed to disable root-squash on nfs " + "ganesha cluster") + g.log.info("root-squash is disabled on volume") + + # Unexport volume + unexport_nfs_ganesha_volume(self.mnode, self.volname) + ret = wait_for_nfs_ganesha_volume_to_get_unexported(self.mnode, + self.volname) + if not ret: + raise ExecutionError("Volume %s is not unexported." % self.volname) + g.log.info("Unexporting of volume is successful") + + # Unmount and cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if ret: + g.log.info("Successfull unmount and cleanup of volume") + else: + raise ExecutionError("Failed to unmount and cleanup volume") diff --git a/tests/functional/nfs_ganesha/root-squash/test_nfs_ganesha_rootsquash_multiple_client.py b/tests/functional/nfs_ganesha/root-squash/test_nfs_ganesha_rootsquash_multiple_client.py new file mode 100644 index 000000000..918f4038c --- /dev/null +++ b/tests/functional/nfs_ganesha/root-squash/test_nfs_ganesha_rootsquash_multiple_client.py @@ -0,0 +1,174 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Cases in this module tests the nfs ganesha version 3 and 4 + rootsquash functionality cases. +""" +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass +from glustolibs.gluster.nfs_ganesha_libs import ( + wait_for_nfs_ganesha_volume_to_get_unexported) +from glustolibs.io.utils import get_mounts_stat +from glustolibs.gluster.nfs_ganesha_ops import ( + set_root_squash, + unexport_nfs_ganesha_volume) +from glustolibs.gluster.lib_utils import (append_string_to_file) +from glustolibs.gluster.glusterfile import set_file_permissions + + +@runs_on([['replicated', 'distributed', 'distributed-replicated', + 'dispersed', 'distributed-dispersed'], + ['nfs']]) +class TestNfsGaneshaRootSquash(GlusterBaseClass): + + def setUp(self): + """ + Setup Volume + """ + self.get_super_method(self, 'setUp')() + + # Setup and mount volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup and mount volume %s" + % self.volname) + g.log.info("Successful in setup and mount volume %s", self.volname) + + def test_root_squash_enable(self): + """ + Tests to verify Nfs Ganesha rootsquash functionality with multi + client + Steps: + 1. Create some directories on mount point. + 2. Create some files inside those directories + 3. Set permission as 777 for mount point + 4. Enable root-squash on volume + 5. Edit file created by root user from client 2 + It should not allow to edit the file + 6. Create some directories on mount point. + 7. Create some files inside the directories + Files and directories will be created by + nfsnobody user + 8. Edit the file created in step 7 + It should allow to edit the file + 9. Disable root squash + 10. Edit the file created at step 7 + It should allow to edit the file + """ + # Create Directories on Mount point + cmd = ("for i in {1..10}; do mkdir %s/dir$i; done" + % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Create files inside directories on mount point. + cmd = ("for i in {1..10}; do touch %s/dir$i/file$i; done" + % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successful in getting stats of files/dirs " + "from mount point") + + # Set mount point permission to 777 + ret = set_file_permissions(self.mounts[0].client_system, + self.mounts[0].mountpoint, 777) + self.assertTrue(ret, "Failed to set permission for directory") + g.log.info("Successfully set permissions for directory") + + # Enable root-squash on volume + ret = set_root_squash(self.servers[0], self.volname) + self.assertTrue(ret, "Failed to enable root-squash on volume") + g.log.info("root-squash is enable on the volume") + + # Edit file created by root user from client 2 + ret = append_string_to_file(self.mounts[1].client_system, + "%s/dir5/file5" + % self.mounts[1].mountpoint, 'hello') + self.assertFalse(ret, "Unexpected:nfsnobody user editing file " + "created by root user should FAIL") + g.log.info("Successful:nfsnobody user failed to edit file " + "created by root user") + + # Create Directories on Mount point + cmd = ("for i in {1..10}; do mkdir %s/SquashDir$i; done" + % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Create files inside directories on mount point + cmd = ("for i in {1..10}; do touch %s/SquashDir$i/Squashfile$i;" + "done" % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successful in getting stats of files/dirs " + "from mount point") + + # Edit the file created by nfsnobody user from client 2 + ret = append_string_to_file(self.mounts[1].client_system, + "%s/SquashDir5/Squashfile5" + % self.mounts[1].mountpoint, + 'hello') + self.assertTrue(ret, "Unexpected:nfsnobody user failed to edit " + "the file created by nfsnobody user") + g.log.info("Successful:nfsnobody user successfully edited the " + "file created by nfsnobody user") + + # Disable root-squash + ret = set_root_squash(self.servers[0], self.volname, squash=False, + do_refresh_config=True) + self.assertTrue(ret, "Failed to disable root-squash on volume") + g.log.info("root-squash is disabled on the volume") + + # Edit the file created by nfsnobody user from root user + ret = append_string_to_file(self.mounts[1].client_system, + "%s/SquashDir10/Squashfile10" + % self.mounts[1].mountpoint, 'hello') + self.assertTrue(ret, "Unexpected:root user failed to edit " + "the file created by nfsnobody user") + g.log.info("Successful:root user successfully edited the " + "file created by nfsnobody user") + + def tearDown(self): + + # Unexport volume + unexport_nfs_ganesha_volume(self.mnode, self.volname) + ret = wait_for_nfs_ganesha_volume_to_get_unexported(self.mnode, + self.volname) + if not ret: + raise ExecutionError("Failed:Volume %s is not unexported." + % self.volname) + g.log.info("Unexporting of volume is successful") + + # Unmount and cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if ret: + g.log.info("Successful unmount and cleanup of volume") + else: + raise ExecutionError("Failed to unmount and cleanup volume") diff --git a/tests/functional/nfs_ganesha/root-squash/test_root_squash_with_glusterd_restart.py b/tests/functional/nfs_ganesha/root-squash/test_root_squash_with_glusterd_restart.py new file mode 100644 index 000000000..5ed925400 --- /dev/null +++ b/tests/functional/nfs_ganesha/root-squash/test_root_squash_with_glusterd_restart.py @@ -0,0 +1,170 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Cases in this module tests the nfs ganesha version 3 and 4 + rootsquash functionality cases. +""" +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass +from glustolibs.gluster.nfs_ganesha_libs import ( + wait_for_nfs_ganesha_volume_to_get_unexported) +from glustolibs.io.utils import get_mounts_stat +from glustolibs.gluster.nfs_ganesha_ops import ( + set_root_squash, + unexport_nfs_ganesha_volume) +from glustolibs.gluster.gluster_init import ( + is_glusterd_running, restart_glusterd) +from glustolibs.gluster.peer_ops import wait_for_peers_to_connect +from glustolibs.gluster.lib_utils import (append_string_to_file) +from glustolibs.gluster.glusterfile import set_file_permissions + + +@runs_on([['replicated', 'distributed', 'distributed-replicated', + 'dispersed', 'distributed-dispersed'], + ['nfs']]) +class TestNfsGaneshaRootSquash(GlusterBaseClass): + def setUp(self): + """ + Setup Volume + """ + self.get_super_method(self, 'setUp')() + + # Setup and mount volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup and mount volume %s" + % self.volname) + g.log.info("Successful in setup and mount volume %s", self.volname) + + def test_root_squash_enable(self): + """ + Tests to verify Nfs Ganesha rootsquash functionality when glusterd + service is restarted + Steps: + 1. Create some files and dirs inside mount point + 2. Set permission as 777 for mount point + 3. Enable root-squash on volume + 4. Create some more files and dirs + 5. Restart glusterd on all the nodes + 6. Try to edit file created in step 1 + It should not allow to edit the file + 7. Try to edit the file created in step 5 + It should allow to edit the file + """ + # Start IO on mount point. + cmd = ("for i in {1..10}; do touch %s/file$i; done" + % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successful in getting stats of files/dirs " + "from mount point") + + # Set mount point permission to 777 + ret = set_file_permissions(self.mounts[0].client_system, + self.mounts[0].mountpoint, 777) + self.assertTrue(ret, "Failed to set permission for directory") + g.log.info("Successfully set permissions for directory") + + # Enable root-squash on volume + ret = set_root_squash(self.servers[0], self.volname) + self.assertTrue(ret, "Failed to enable root-squash on volume") + g.log.info("root-squash is enable on the volume") + + # Start IO on mount point. + cmd = ("for i in {1..10}; do touch %s/Squashfile$i; done" + % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successfull in getting stats of files/dirs " + "from mount point") + + # Restart glusterd on all servers + ret = restart_glusterd(self.servers) + self.assertTrue(ret, ("Failed to restart glusterd on all servers %s", + self.servers)) + g.log.info("Successfully restarted glusterd on all servers %s", + self.servers) + + # Check if glusterd is running on all servers + ret = is_glusterd_running(self.servers) + self.assertEqual(ret, 0, ("Failed:Glusterd is not running on all " + "servers %s", + self.servers)) + g.log.info("Glusterd is running on all the servers %s", self.servers) + + # Checking if peer is connected. + ret = wait_for_peers_to_connect(self.mnode, self.servers) + self.assertTrue(ret, "Failed:Peer is not in connected state.") + g.log.info("Peers are in connected state.") + + # Edit file created by root user + for mount_obj in self.mounts: + ret = append_string_to_file(mount_obj.client_system, + "%s/file10" % mount_obj.mountpoint, + 'hello') + self.assertFalse(ret, "Unexpected:nfsnobody user editing file " + "created by root user should FAIL") + g.log.info("Successful:nfsnobody user failed to edit file " + "created by root user") + + # Edit the file created by nfsnobody user + for mount_obj in self.mounts: + ret = append_string_to_file(mount_obj.client_system, + "%s/Squashfile5" + % mount_obj.mountpoint, + 'hello') + self.assertTrue(ret, "Unexpected:nfsnobody user failed to edit " + "the file created by nfsnobody user") + g.log.info("Successful:nfsnobody user successfully edited the " + "file created by nfsnobody user") + + def tearDown(self): + + # Disable root-squash + ret = set_root_squash(self.mnode, self.volname, squash=False, + do_refresh_config=True) + if not ret: + raise ExecutionError("Failed to disable root-squash on nfs " + "ganesha cluster") + g.log.info("root-squash is disabled on volume") + + # Unexport volume + unexport_nfs_ganesha_volume(self.mnode, self.volname) + ret = wait_for_nfs_ganesha_volume_to_get_unexported(self.mnode, + self.volname) + if not ret: + raise ExecutionError("Failed:Volume %s is not unexported." + % self.volname) + g.log.info("Unexporting of volume is successful") + + # Unmount and cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if ret: + g.log.info("Successful unmount and cleanup of volume") + else: + raise ExecutionError("Failed to unmount and cleanup volume") diff --git a/tests/functional/nfs_ganesha/root-squash/test_root_squash_with_volume_restart.py b/tests/functional/nfs_ganesha/root-squash/test_root_squash_with_volume_restart.py new file mode 100755 index 000000000..424cda09b --- /dev/null +++ b/tests/functional/nfs_ganesha/root-squash/test_root_squash_with_volume_restart.py @@ -0,0 +1,177 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Test Cases in this module tests the nfs ganesha version 3 and 4 + rootsquash functionality cases. +""" +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass +from glustolibs.gluster.nfs_ganesha_libs import ( + wait_for_nfs_ganesha_volume_to_get_unexported, + wait_for_nfs_ganesha_volume_to_get_exported) +from glustolibs.io.utils import get_mounts_stat +from glustolibs.gluster.nfs_ganesha_ops import ( + set_root_squash, + unexport_nfs_ganesha_volume) +from glustolibs.gluster.volume_ops import (volume_stop, volume_start) +from glustolibs.gluster.lib_utils import (append_string_to_file) +from glustolibs.gluster.glusterfile import set_file_permissions + + +@runs_on([['replicated', 'distributed', 'distributed-replicated', + 'dispersed', 'distributed-dispersed'], + ['nfs']]) +class TestNfsGaneshaRootSquash(GlusterBaseClass): + + def setUp(self): + """ + Setup Volume + """ + self.get_super_method(self, 'setUp')() + + # Setup and mount volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup and mount volume %s" + % self.volname) + g.log.info("Successful in setup and mount volume %s", self.volname) + + def test_root_squash_enable(self): + """ + Tests to verify Nfs Ganesha rootsquash functionality when volume + is restarted + Steps: + 1. Create some files and dirs inside mount point + 2. Set permission as 777 for mount point + 3. Enable root-squash on volume + 4. Create some more files and dirs + 5. Restart volume + 6. Try to edit file created in step 1 + It should not allow to edit the file + 7. Try to edit the file created in step 5 + It should allow to edit the file + """ + # Start IO on mount point. + cmd = ("for i in {1..10}; do touch %s/file$i; done" + % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successful in getting stats of files/dirs " + "from mount point") + + # Set mount point permission to 777 + ret = set_file_permissions(self.mounts[0].client_system, + self.mounts[0].mountpoint, 777) + self.assertTrue(ret, "Failed to set permission for directory") + g.log.info("Successfully set permissions for directory") + + # Enable root-squash on volume + ret = set_root_squash(self.servers[0], self.volname) + self.assertTrue(ret, "Failed to enable root-squash on volume") + g.log.info("root-squash is enable on the volume") + + # Start IO on mount point. + cmd = ("for i in {1..10}; do touch %s/Squashfile$i; done" + % self.mounts[0].mountpoint) + ret, _, err = g.run(self.mounts[0].client_system, cmd, + user=self.mounts[0].user) + self.assertEqual(ret, 0, err) + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successful in getting stats of files/dirs " + "from mount point") + + # Stopping volume + ret = volume_stop(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to stop volume %s" % self.volname)) + g.log.info("Successful in stopping volume %s" % self.volname) + + # Waiting for few seconds for volume unexport. Max wait time is + # 120 seconds. + ret = wait_for_nfs_ganesha_volume_to_get_unexported(self.mnode, + self.volname) + self.assertTrue(ret, ("Failed to unexport volume %s after " + "stopping volume" % self.volname)) + g.log.info("Volume is unexported successfully") + + # Starting volume + ret = volume_start(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to start volume %s" % self.volname)) + g.log.info("Successful in starting volume %s" % self.volname) + + # Waiting for few seconds for volume export. Max wait time is + # 120 seconds. + ret = wait_for_nfs_ganesha_volume_to_get_exported(self.mnode, + self.volname) + self.assertTrue(ret, ("Failed to export volume %s after " + "starting volume" % self.volname)) + g.log.info("Volume is exported successfully") + + # Edit file created by root user + for mount_obj in self.mounts: + ret = append_string_to_file(mount_obj.client_system, + "%s/file10" % mount_obj.mountpoint, + 'hello') + self.assertFalse(ret, "Unexpected:nfsnobody user editing file " + "created by root user should FAIL") + g.log.info("Successful:nfsnobody user failed to edit file " + "created by root user") + + # Edit the file created by nfsnobody user + for mount_obj in self.mounts: + ret = append_string_to_file(mount_obj.client_system, + "%s/Squashfile5" + % mount_obj.mountpoint, + 'hello') + self.assertTrue(ret, "Unexpected:nfsnobody user failed to edit " + "the file created by nfsnobody user") + g.log.info("Successful:nfsnobody user successfully edited the " + "file created by nfsnobody user") + + def tearDown(self): + + # Disable root-squash + ret = set_root_squash(self.mnode, self.volname, squash=False, + do_refresh_config=True) + if not ret: + raise ExecutionError("Failed to disable root-squash on nfs " + "ganesha cluster") + g.log.info("root-squash is disabled on volume") + + # Unexport volume + unexport_nfs_ganesha_volume(self.mnode, self.volname) + ret = wait_for_nfs_ganesha_volume_to_get_unexported(self.mnode, + self.volname) + if not ret: + raise ExecutionError("Failed:Volume %s is not unexported." + % self.volname) + g.log.info("Unexporting of volume is successful") + + # Unmount and cleanup Volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if ret: + g.log.info("Successful unmount and cleanup of volume") + else: + raise ExecutionError("Failed to unmount and cleanup volume") diff --git a/tests/functional/nfs_ganesha/test_cthon.py b/tests/functional/nfs_ganesha/test_cthon.py index 9b950fe59..78232fdac 100644 --- a/tests/functional/nfs_ganesha/test_cthon.py +++ b/tests/functional/nfs_ganesha/test_cthon.py @@ -20,8 +20,7 @@ """ from glusto.core import Glusto as g -from glustolibs.gluster.gluster_base_class import runs_on -from glustolibs.gluster.nfs_ganesha_libs import NfsGaneshaClusterSetupClass +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.io.utils import run_cthon from glustolibs.misc.misc_libs import git_clone_and_compile @@ -30,7 +29,7 @@ from glustolibs.misc.misc_libs import git_clone_and_compile @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestCthon(NfsGaneshaClusterSetupClass): +class TestCthon(GlusterBaseClass): """ Cthon test on NFS Ganesha v4.0, v4.1 """ @@ -42,12 +41,6 @@ class TestCthon(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Cloning the cthon test repo cls.dir_name = "repo_dir" link = 'git://linux-nfs.org/~steved/cthon04.git' diff --git a/tests/functional/nfs_ganesha/test_ganesha_add_brick.py b/tests/functional/nfs_ganesha/test_ganesha_add_brick.py index 946b64c3b..e3fc6adc9 100644 --- a/tests/functional/nfs_ganesha/test_ganesha_add_brick.py +++ b/tests/functional/nfs_ganesha/test_ganesha_add_brick.py @@ -16,8 +16,7 @@ from glusto.core import Glusto as g -from glustolibs.gluster.nfs_ganesha_libs import NfsGaneshaClusterSetupClass -from glustolibs.gluster.gluster_base_class import runs_on +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import validate_io_procs, get_mounts_stat @@ -31,7 +30,7 @@ from glustolibs.gluster.volume_libs import ( @runs_on([['distributed-replicated', 'replicated', 'distributed', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestGaneshaAddBrick(NfsGaneshaClusterSetupClass): +class TestGaneshaAddBrick(GlusterBaseClass): """ Test cases to validate add-brick and rebalance functionality on volumes exported through nfs-ganesha @@ -45,12 +44,6 @@ class TestGaneshaAddBrick(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -177,8 +170,3 @@ class TestGaneshaAddBrick(NfsGaneshaClusterSetupClass): if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) diff --git a/tests/functional/nfs_ganesha/test_ganesha_remove_brick.py b/tests/functional/nfs_ganesha/test_ganesha_remove_brick.py new file mode 100644 index 000000000..9e9cf39c2 --- /dev/null +++ b/tests/functional/nfs_ganesha/test_ganesha_remove_brick.py @@ -0,0 +1,140 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.io.utils import validate_io_procs, get_mounts_stat +from glustolibs.gluster.volume_libs import ( + log_volume_info_and_status, shrink_volume, + wait_for_volume_process_to_be_online) + + +@runs_on([['distributed', 'distributed-arbiter', + 'distributed-replicated', 'distributed-dispersed'], + ['nfs']]) +class TestGaneshaRemoveBrick(GlusterBaseClass): + """ + This test case validates remove brick functionality on volumes exported + through nfs-ganesha + """ + + @classmethod + def setUpClass(cls): + """ + Setup nfs-ganesha if not exists. + Upload IO scripts to clients + """ + cls.get_super_method(cls, 'setUpClass')() + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" % + cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + """ + Setup Volume and Mount Volume + """ + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup and mount volume %s" + % self.volname) + g.log.info("Successful in setup and mount volume %s", self.volname) + + def test_nfs_ganesha_remove_brick(self): + """ + Verify remove brick operation while IO is running + Steps: + 1. Start IO on mount points + 2. Perform remove brick operation + 3. Validate IOs + """ + # pylint: disable=too-many-statements + # Start IO on all mount points + all_mounts_procs, count = [], 1 + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_deep_dirs_with_files " + "--dirname-start-num %d " + "--dir-depth 2 " + "--dir-length 10 " + "--max-num-of-dirs 5 " + "--num-of-files 5 %s" % (self.script_upload_path, count, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + count += 10 + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successfully got stat of all files/dirs created") + + # Perform remove brick operation + ret = shrink_volume(self.mnode, self.volname) + self.assertTrue(ret, ("Remove brick operation failed on " + "%s", self.volname)) + g.log.info("Remove brick operation is successful on " + "volume %s", self.volname) + + # Wait for volume processes to be online + ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) + self.assertTrue(ret, ("All volume %s processes failed to come up " + "online", self.volname)) + g.log.info("All volume %s processes came up " + "online successfully after remove brick operation", + self.volname) + + # Log volume info and status after performing remove brick + ret = log_volume_info_and_status(self.mnode, self.volname) + self.assertTrue(ret, ("Logging volume info and status failed on " + "volume %s", self.volname)) + g.log.info("Successful in logging volume info and status of volume %s", + self.volname) + + # Validate IO + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("Successfully validated all io's") + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successfully got stat of all files/dirs created") + + def tearDown(self): + """ + Unmount and cleanup volume + """ + # Unmount volume + ret = self.unmount_volume(self.mounts) + if ret: + g.log.info("Successfully unmounted the volume") + else: + g.log.error("Failed to unmount volume") + + # Cleanup volume + ret = self.cleanup_volume() + if not ret: + raise ExecutionError("Failed to cleanup volume") + g.log.info("Cleanup volume %s completed successfully", self.volname) diff --git a/tests/functional/nfs_ganesha/test_ganesha_replace_brick.py b/tests/functional/nfs_ganesha/test_ganesha_replace_brick.py index 12cec1a74..f87fd03f2 100644 --- a/tests/functional/nfs_ganesha/test_ganesha_replace_brick.py +++ b/tests/functional/nfs_ganesha/test_ganesha_replace_brick.py @@ -16,8 +16,7 @@ from glusto.core import Glusto as g -from glustolibs.gluster.nfs_ganesha_libs import NfsGaneshaClusterSetupClass -from glustolibs.gluster.gluster_base_class import runs_on +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import validate_io_procs, get_mounts_stat @@ -31,7 +30,7 @@ from glustolibs.gluster.heal_libs import monitor_heal_completion @runs_on([['distributed-replicated', 'replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestGaneshaReplaceBrick(NfsGaneshaClusterSetupClass): +class TestGaneshaReplaceBrick(GlusterBaseClass): """ Test cases to validate remove brick functionality on volumes exported through nfs-ganesha @@ -45,12 +44,6 @@ class TestGaneshaReplaceBrick(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -169,8 +162,3 @@ class TestGaneshaReplaceBrick(NfsGaneshaClusterSetupClass): if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) diff --git a/tests/functional/nfs_ganesha/test_new_mount_while_io_in_progress.py b/tests/functional/nfs_ganesha/test_new_mount_while_io_in_progress.py index 1c6fc313c..798d5b7df 100644 --- a/tests/functional/nfs_ganesha/test_new_mount_while_io_in_progress.py +++ b/tests/functional/nfs_ganesha/test_new_mount_while_io_in_progress.py @@ -15,8 +15,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from copy import deepcopy from glusto.core import Glusto as g -from glustolibs.gluster.nfs_ganesha_libs import NfsGaneshaClusterSetupClass -from glustolibs.gluster.gluster_base_class import runs_on +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import validate_io_procs, get_mounts_stat @@ -24,7 +23,7 @@ from glustolibs.io.utils import validate_io_procs, get_mounts_stat @runs_on([['replicated', 'distributed', 'distributed-replicated'], ['nfs']]) -class TestMountWhileIoInProgress(NfsGaneshaClusterSetupClass): +class TestMountWhileIoInProgress(GlusterBaseClass): """ Test cases to validate new mount while IO is going on """ @@ -37,12 +36,6 @@ class TestMountWhileIoInProgress(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -141,8 +134,3 @@ class TestMountWhileIoInProgress(NfsGaneshaClusterSetupClass): if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) diff --git a/tests/functional/nfs_ganesha/test_new_volume_while_io_in_progress.py b/tests/functional/nfs_ganesha/test_new_volume_while_io_in_progress.py index c367d3006..e8491ebfb 100644 --- a/tests/functional/nfs_ganesha/test_new_volume_while_io_in_progress.py +++ b/tests/functional/nfs_ganesha/test_new_volume_while_io_in_progress.py @@ -22,8 +22,8 @@ from copy import deepcopy from glusto.core import Glusto as g from glustolibs.gluster.nfs_ganesha_libs import ( - NfsGaneshaClusterSetupClass, wait_for_nfs_ganesha_volume_to_get_exported) -from glustolibs.gluster.gluster_base_class import runs_on + wait_for_nfs_ganesha_volume_to_get_exported) +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.misc.misc_libs import upload_scripts from glustolibs.io.utils import validate_io_procs, get_mounts_stat @@ -37,7 +37,7 @@ from glustolibs.gluster.nfs_ganesha_ops import export_nfs_ganesha_volume @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNewVolumeWhileIoInProgress(NfsGaneshaClusterSetupClass): +class TestNewVolumeWhileIoInProgress(GlusterBaseClass): """ Test cases to verify creation, export and mount of new volume while IO is going on another volume exported through nfs-ganesha. @@ -50,12 +50,6 @@ class TestNewVolumeWhileIoInProgress(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -242,8 +236,3 @@ class TestNewVolumeWhileIoInProgress(NfsGaneshaClusterSetupClass): if not ret: raise ExecutionError("Failed to cleanup volume %s", volume) g.log.info("Volume %s deleted successfully", volume) - - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) diff --git a/tests/functional/nfs_ganesha/test_nfs_ganesha_acls.py b/tests/functional/nfs_ganesha/test_nfs_ganesha_acls.py index 5921f3b90..7b0865c0a 100644 --- a/tests/functional/nfs_ganesha/test_nfs_ganesha_acls.py +++ b/tests/functional/nfs_ganesha/test_nfs_ganesha_acls.py @@ -19,40 +19,25 @@ ACL functionality. """ +import time +import re from glusto.core import Glusto as g from glustolibs.gluster.nfs_ganesha_ops import ( set_acl, unexport_nfs_ganesha_volume) from glustolibs.gluster.nfs_ganesha_libs import ( - NfsGaneshaClusterSetupClass, - wait_for_nfs_ganesha_volume_to_get_unexported) -from glustolibs.gluster.gluster_base_class import runs_on + wait_for_nfs_ganesha_volume_to_get_unexported) +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError -import time -import re @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNfsGaneshaAcls(NfsGaneshaClusterSetupClass): +class TestNfsGaneshaAcls(GlusterBaseClass): """ Tests to verify Nfs Ganesha v4 ACL stability """ - - @classmethod - def setUpClass(cls): - """ - Setup nfs-ganesha if not exists. - """ - cls.get_super_method(cls, 'setUpClass')() - - # Setup nfs-ganesha - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setuo nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - def setUp(self): """ Setup Volume diff --git a/tests/functional/nfs_ganesha/test_nfs_ganesha_run_io_multiple_clients.py b/tests/functional/nfs_ganesha/test_nfs_ganesha_run_io_multiple_clients.py index 39a50766a..0f9c17156 100644 --- a/tests/functional/nfs_ganesha/test_nfs_ganesha_run_io_multiple_clients.py +++ b/tests/functional/nfs_ganesha/test_nfs_ganesha_run_io_multiple_clients.py @@ -20,9 +20,8 @@ """ from glusto.core import Glusto as g -from glustolibs.gluster.gluster_base_class import runs_on +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError -from glustolibs.gluster.nfs_ganesha_libs import NfsGaneshaClusterSetupClass from glustolibs.gluster.lib_utils import install_epel from glustolibs.io.utils import run_bonnie, run_fio, run_mixed_io @@ -30,7 +29,7 @@ from glustolibs.io.utils import run_bonnie, run_fio, run_mixed_io @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNfsGaneshaWithDifferentIOPatterns(NfsGaneshaClusterSetupClass): +class TestNfsGaneshaWithDifferentIOPatterns(GlusterBaseClass): """ Tests Nfs Ganesha stability by running different IO Patterns """ @@ -42,12 +41,6 @@ class TestNfsGaneshaWithDifferentIOPatterns(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Install epel if not install_epel(cls.clients): raise ExecutionError("Failed to install epel") @@ -128,8 +121,3 @@ class TestNfsGaneshaWithDifferentIOPatterns(NfsGaneshaClusterSetupClass): if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) diff --git a/tests/functional/nfs_ganesha/test_nfs_ganesha_sanity.py b/tests/functional/nfs_ganesha/test_nfs_ganesha_sanity.py index 2bca6d1c9..18feef31b 100755 --- a/tests/functional/nfs_ganesha/test_nfs_ganesha_sanity.py +++ b/tests/functional/nfs_ganesha/test_nfs_ganesha_sanity.py @@ -19,23 +19,21 @@ """ from glusto.core import Glusto as g -from glustolibs.gluster.gluster_base_class import runs_on -from glustolibs.gluster.nfs_ganesha_libs import ( - NfsGaneshaClusterSetupClass) +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.exceptions import ExecutionError from glustolibs.misc.misc_libs import ( - upload_scripts, - git_clone_and_compile) + upload_scripts, + git_clone_and_compile) from glustolibs.gluster.nfs_ganesha_ops import ( - is_nfs_ganesha_cluster_in_healthy_state, - set_acl) + is_nfs_ganesha_cluster_in_healthy_state, + set_acl) from glustolibs.io.utils import validate_io_procs @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNfsGaneshaSanity(NfsGaneshaClusterSetupClass): +class TestNfsGaneshaSanity(GlusterBaseClass): """ Tests to verify NFS Ganesha Sanity. """ @@ -47,12 +45,6 @@ class TestNfsGaneshaSanity(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -233,8 +225,3 @@ class TestNfsGaneshaSanity(NfsGaneshaClusterSetupClass): "Check log errors for more info") else: g.log.info("Test repo cleanup successfull on all clients") - - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) diff --git a/tests/functional/nfs_ganesha/test_nfs_ganesha_volume_exports.py b/tests/functional/nfs_ganesha/test_nfs_ganesha_volume_exports.py index 8c90f953d..bb1f2f71e 100755 --- a/tests/functional/nfs_ganesha/test_nfs_ganesha_volume_exports.py +++ b/tests/functional/nfs_ganesha/test_nfs_ganesha_volume_exports.py @@ -26,9 +26,8 @@ from time import sleep from glusto.core import Glusto as g -from glustolibs.gluster.gluster_base_class import runs_on +from glustolibs.gluster.gluster_base_class import runs_on, GlusterBaseClass from glustolibs.gluster.nfs_ganesha_libs import ( - NfsGaneshaClusterSetupClass, wait_for_nfs_ganesha_volume_to_get_exported, wait_for_nfs_ganesha_volume_to_get_unexported) from glustolibs.gluster.nfs_ganesha_ops import ( @@ -49,23 +48,11 @@ from glustolibs.gluster.lib_utils import get_servers_unused_bricks_dict @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNfsGaneshaVolumeExports(NfsGaneshaClusterSetupClass): +class TestNfsGaneshaVolumeExports(GlusterBaseClass): """ Tests to verify Nfs Ganesha exports, cluster enable/disable functionality. """ - @classmethod - def setUpClass(cls): - """ - Setup nfs-ganesha if not exists. - """ - cls.get_super_method(cls, 'setUpClass')() - - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") def setUp(self): """ @@ -172,6 +159,59 @@ class TestNfsGaneshaVolumeExports(NfsGaneshaClusterSetupClass): "ganesha.enable 'on'" % self.volname) g.log.info("Exported volume after enabling nfs-ganesha cluster") + def test_nfs_ganesha_exportID_after_vol_restart(self): + """ + Tests script to check nfs-ganesha volume gets exported with same + Export ID after multiple volume restarts. + Steps: + 1. Create and Export the Volume + 2. Stop and Start the volume multiple times + 3. Check for export ID + Export ID should not change + """ + for i in range(1, 4): + g.log.info("Testing nfs ganesha exportID after volume stop and " + "start.\n Count : %s", str(i)) + + # Stopping volume + ret = volume_stop(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to stop volume %s" % self.volname)) + g.log.info("Volume is stopped") + + # Waiting for few seconds for volume unexport. Max wait time is + # 120 seconds. + ret = wait_for_nfs_ganesha_volume_to_get_unexported(self.mnode, + self.volname) + self.assertTrue(ret, ("Failed to unexport volume %s after " + "stopping volume" % self.volname)) + g.log.info("Volume is unexported via ganesha") + + # Starting volume + ret = volume_start(self.mnode, self.volname) + self.assertTrue(ret, ("Failed to start volume %s" % self.volname)) + g.log.info("Volume is started") + + # Waiting for few seconds for volume export. Max wait time is + # 120 seconds. + ret = wait_for_nfs_ganesha_volume_to_get_exported(self.mnode, + self.volname) + self.assertTrue(ret, ("Failed to export volume %s after " + "starting volume" % self.volname)) + g.log.info("Volume is exported via ganesha") + + # Check for Export ID + cmd = ("cat /run/gluster/shared_storage/nfs-ganesha/exports/" + "export.*.conf | grep Export_Id | grep -Eo '[0-9]'") + ret, out, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Unable to get export ID of the volume %s" + % self.volname) + g.log.info("Successful in getting volume export ID: %s " % out) + self.assertEqual(out.strip("\n"), "2", + "Export ID changed after export and unexport " + "of volume: %s" % out) + g.log.info("Export ID of volume is same after export " + "and export: %s" % out) + def tearDown(self): """ Unexport volume @@ -197,16 +237,11 @@ class TestNfsGaneshaVolumeExports(NfsGaneshaClusterSetupClass): raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) - @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNfsGaneshaVolumeExportsWithIO(NfsGaneshaClusterSetupClass): +class TestNfsGaneshaVolumeExportsWithIO(GlusterBaseClass): """ Tests to verify nfs ganesha features when IO is in progress. """ @@ -218,12 +253,6 @@ class TestNfsGaneshaVolumeExportsWithIO(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -338,16 +367,11 @@ class TestNfsGaneshaVolumeExportsWithIO(NfsGaneshaClusterSetupClass): raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) - @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNfsGaneshaMultiVolumeExportsWithIO(NfsGaneshaClusterSetupClass): +class TestNfsGaneshaMultiVolumeExportsWithIO(GlusterBaseClass): """ Tests to verify multiple volumes gets exported when IO is in progress. """ @@ -359,12 +383,6 @@ class TestNfsGaneshaMultiVolumeExportsWithIO(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -515,16 +533,11 @@ class TestNfsGaneshaMultiVolumeExportsWithIO(NfsGaneshaClusterSetupClass): raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) - @runs_on([['replicated', 'distributed', 'distributed-replicated', 'dispersed', 'distributed-dispersed'], ['nfs']]) -class TestNfsGaneshaSubDirExportsWithIO(NfsGaneshaClusterSetupClass): +class TestNfsGaneshaSubDirExportsWithIO(GlusterBaseClass): """ Tests to verify nfs-ganesha sub directory exports. """ @@ -536,12 +549,6 @@ class TestNfsGaneshaSubDirExportsWithIO(NfsGaneshaClusterSetupClass): """ cls.get_super_method(cls, 'setUpClass')() - # Setup nfs-ganesha if not exists. - ret = cls.setup_nfs_ganesha() - if not ret: - raise ExecutionError("Failed to setup nfs-ganesha cluster") - g.log.info("nfs-ganesha cluster is healthy") - # Upload IO scripts for running IO on mounts g.log.info("Upload io scripts to clients %s for running IO on " "mounts", cls.clients) @@ -754,8 +761,3 @@ class TestNfsGaneshaSubDirExportsWithIO(NfsGaneshaClusterSetupClass): if not ret: raise ExecutionError("Failed to cleanup volume") g.log.info("Cleanup volume %s completed successfully", self.volname) - - @classmethod - def tearDownClass(cls): - cls.get_super_method(cls, 'tearDownClass')( - delete_nfs_ganesha_cluster=False) diff --git a/tests/functional/quota/test_alert_time_out.py b/tests/functional/quota/test_alert_time_out.py index 1eaa8258b..626a04d2b 100755 --- a/tests/functional/quota/test_alert_time_out.py +++ b/tests/functional/quota/test_alert_time_out.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -55,20 +55,17 @@ class QuotaTimeOut(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ - # Stopping the volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # Calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_alert_time_out(self): """ @@ -281,9 +278,9 @@ class QuotaTimeOut(GlusterBaseClass): g.log.info("Files creation stopped on mountpoint once exceeded " "hard limit") - # Inserting sleep of 2 seconds so the alert message gets enough time + # Inserting sleep of 6 seconds so the alert message gets enough time # to be logged - time.sleep(2) + time.sleep(6) # Append unique string to the brick log g.log.info("Appending string 'appended_string_6' to the log:") diff --git a/tests/functional/quota/test_deem_statfs.py b/tests/functional/quota/test_deem_statfs.py index 5da70d896..92cc44e74 100644 --- a/tests/functional/quota/test_deem_statfs.py +++ b/tests/functional/quota/test_deem_statfs.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2019 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -48,21 +48,17 @@ class QuotaStatvfs(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ + def tearDown(self): - # stopping the volume + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_statvfs(self): """ diff --git a/tests/functional/quota/test_deem_statfs_quotad.py b/tests/functional/quota/test_deem_statfs_quotad.py index 7773d4d36..06e87f7e8 100644 --- a/tests/functional/quota/test_deem_statfs_quotad.py +++ b/tests/functional/quota/test_deem_statfs_quotad.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -40,20 +40,17 @@ class QuotaDeemStatfsAndQuotad(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume") g.log.info("Successful in Setup Volume") - @classmethod - def tearDownClass(cls): - """ - Clean up the volume - """ - # stopping the volume and clean up the volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Cleanup Volume") - ret = cls.cleanup_volume() + ret = self.cleanup_volume() if not ret: - raise ExecutionError("Failed to Cleanup Volume") - g.log.info("Successful in Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_deem_statfs_quotad(self): """ diff --git a/tests/functional/quota/test_limit_usage_deep_dir.py b/tests/functional/quota/test_limit_usage_deep_dir.py index 94dacfe9f..b024f575d 100644 --- a/tests/functional/quota/test_limit_usage_deep_dir.py +++ b/tests/functional/quota/test_limit_usage_deep_dir.py @@ -32,8 +32,7 @@ from glustolibs.gluster.glusterdir import (mkdir, rmdir) -@runs_on([['distributed-replicated', 'replicated', 'distributed', - 'dispersed', 'distributed-dispersed'], +@runs_on([['distributed-replicated', 'replicated', 'distributed'], ['glusterfs', 'nfs']]) class LimitUsageDeepDir(GlusterBaseClass): diff --git a/tests/functional/quota/test_multi_value_limit.py b/tests/functional/quota/test_multi_value_limit.py index 046f0f6a3..76187bb64 100644 --- a/tests/functional/quota/test_multi_value_limit.py +++ b/tests/functional/quota/test_multi_value_limit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -50,21 +50,17 @@ class QuotaMultiValueLimits(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ + def tearDown(self): - # stopping the volume + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_multi_value_limits(self): # pylint: disable=too-many-statements diff --git a/tests/functional/quota/test_quota_file_larger_than_limit.py b/tests/functional/quota/test_quota_file_larger_than_limit.py index 4bee23b0a..ed777fbf5 100644 --- a/tests/functional/quota/test_quota_file_larger_than_limit.py +++ b/tests/functional/quota/test_quota_file_larger_than_limit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -50,20 +50,17 @@ class QuotaFileLargerThanLimit(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ - # stopping the volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_file_larger_than_limit(self): # pylint: disable=too-many-statements diff --git a/tests/functional/quota/test_quota_renamed_dir.py b/tests/functional/quota/test_quota_renamed_dir.py index d8b6acae6..e4bc11233 100755 --- a/tests/functional/quota/test_quota_renamed_dir.py +++ b/tests/functional/quota/test_quota_renamed_dir.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -47,20 +47,17 @@ class TestQuotaRenamedDir(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ - # stopping the volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_with_renamed_dir(self): """ diff --git a/tests/functional/quota/test_quota_single_brick_volume.py b/tests/functional/quota/test_quota_single_brick_volume.py index 33537a115..cb228baee 100644 --- a/tests/functional/quota/test_quota_single_brick_volume.py +++ b/tests/functional/quota/test_quota_single_brick_volume.py @@ -66,21 +66,17 @@ class QuotaListPathValues(GlusterBaseClass): raise ExecutionError("Failed to Setup_Volume and Mount_Volume") g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ + def tearDown(self): - # stopping the volume + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_single_brick_volume(self): """ diff --git a/tests/functional/quota/test_quota_symlink_limit.py b/tests/functional/quota/test_quota_symlink_limit.py index 165640eee..a4c27dac9 100644 --- a/tests/functional/quota/test_quota_symlink_limit.py +++ b/tests/functional/quota/test_quota_symlink_limit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -45,20 +45,17 @@ class QuotaUniqueSoftLimit(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ - # Stopping the volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # Calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_symlink_limit(self): """ diff --git a/tests/functional/quota/test_quota_unique_soft_limit.py b/tests/functional/quota/test_quota_unique_soft_limit.py index ad7d3e4c9..14bb15148 100644 --- a/tests/functional/quota/test_quota_unique_soft_limit.py +++ b/tests/functional/quota/test_quota_unique_soft_limit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -48,20 +48,17 @@ class QuotaUniqueSoftLimit(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ - # Stopping the volume + def tearDown(self): + + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # Calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_unique_soft_limit(self): """ diff --git a/tests/functional/quota/test_quota_volume_subdir_limits.py b/tests/functional/quota/test_quota_volume_subdir_limits.py index 4da2ae86d..aa5a8c13b 100644 --- a/tests/functional/quota/test_quota_volume_subdir_limits.py +++ b/tests/functional/quota/test_quota_volume_subdir_limits.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -49,21 +49,17 @@ class QuotaVolumeAndSubdirLimits(GlusterBaseClass): % cls.volname) g.log.info("Successful in Setup and Mount Volume %s", cls.volname) - @classmethod - def tearDownClass(cls): - """ - Clean up the volume and umount volume from client - """ + def tearDown(self): - # stopping the volume + # Unmount and cleanup original volume g.log.info("Starting to Unmount Volume and Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(mounts=cls.mounts) + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Unmount Volume and Cleanup Volume") - g.log.info("Successful in Unmount Volume and Cleanup Volume") + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_quota_volume_subdir_limits(self): """ diff --git a/tests/functional/resource_leak/__init__.py b/tests/functional/resource_leak/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/functional/resource_leak/__init__.py diff --git a/tests/functional/resource_leak/test_basic_memory_leaks.py b/tests/functional/resource_leak/test_basic_memory_leaks.py new file mode 100644 index 000000000..46b2c0c6d --- /dev/null +++ b/tests/functional/resource_leak/test_basic_memory_leaks.py @@ -0,0 +1,120 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import mkdir +from glustolibs.io.utils import (run_linux_untar, validate_io_procs, + wait_for_io_to_complete) +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'distributed', 'replicated', + 'arbiter', 'dispersed'], ['glusterfs']]) +class TestBasicMemoryleak(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Set test_id for get gathering + self.test_id = self.id() + + # Set I/O flag to false + self.is_io_running = False + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Wait for I/O to complete + if self.is_io_running: + if wait_for_io_to_complete(self.list_of_io_processes, + self.mounts): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_basic_memory_leak(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Start I/O from mount point. + 3. Check if there are any memory leaks and OOM killers. + """ + # Start monitoring resource usage on servers and clients + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=30) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + + # Create a dir to start untar + self.linux_untar_dir = "{}/{}".format(self.mounts[1].mountpoint, + "linuxuntar") + ret = mkdir(self.mounts[1].client_system, self.linux_untar_dir) + self.assertTrue(ret, "Failed to create dir linuxuntar for untar") + + # Start multiple I/O from mount points + self.list_of_io_processes = [] + cmd = ("cd {};for i in `seq 1 100`; do mkdir dir.$i ;" + "for j in `seq 1 1000`; do dd if=/dev/random " + "of=dir.$i/testfile.$j bs=1k count=10;done;done" + .format(self.mounts[0].mountpoint)) + ret = g.run_async(self.mounts[0].client_system, cmd) + self.list_of_io_processes = [ret] + + # Start linux untar on dir linuxuntar + ret = run_linux_untar(self.mounts[1].client_system, + self.mounts[1].mountpoint, + dirs=tuple(['linuxuntar'])) + self.list_of_io_processes += ret + self.is_io_running = True + + # Wait for I/O to complete and validate I/O on mount points + ret = validate_io_procs(self.list_of_io_processes, self.mounts) + self.assertTrue(ret, "I/O failed on mount point") + self.is_io_running = False + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, + "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") + + ret = self.check_for_memory_leaks_and_oom_kills_on_clients( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on clients") + g.log.info("No memory leaks or OOM kills found on serves and clients") diff --git a/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py b/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py new file mode 100644 index 000000000..3a22a5068 --- /dev/null +++ b/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py @@ -0,0 +1,117 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.heal_ops import trigger_heal +from glustolibs.gluster.volume_ops import set_volume_options +from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete) +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) + + +@runs_on([['distributed-replicated', 'distributed-arbiter', + 'distributed-dispersed', 'replicated', + 'arbiter', 'dispersed'], ['glusterfs']]) +class TestMemoryLeakInShdWithCacheInvalidationOn(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Set test_id for get gathering + self.test_id = self.id() + + # Set I/O flag to false + self.is_io_running = False + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Wait for I/O to complete + if self.is_io_running: + if wait_for_io_to_complete(self.list_of_io_processes, + self.mounts[0]): + raise ExecutionError("Failed to wait for I/O to complete") + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_memory_leak_in_shd_with_cache_invalidation_on(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Set features.cache-invalidation to ON. + 3. Start I/O from mount point. + 4. Run gluster volume heal command in a loop + 5. Check if there are any memory leaks and OOM killers on servers. + """ + # Start monitoring resource usage on servers and clients + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=10) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and" + " clients") + + # Set features.cache-invalidation to ON + ret = set_volume_options(self.mnode, self.volname, + {'features.cache-invalidation': 'on'}) + self.assertTrue(ret, "Failed to set features.cache-invalidation to ON") + g.log.info("Successfully set features.cache-invalidation to ON") + + # Start multiple I/O from mount points + self.list_of_io_processes = [] + cmd = ("cd {};for i in `seq 1 1000`;do echo 'abc' > myfile;done" + .format(self.mounts[0].mountpoint)) + ret = g.run_async(self.mounts[0].client_system, cmd) + self.list_of_io_processes = [ret] + self.is_io_running = True + + # Run gluster volume heal command in a loop for 100 iterations + for iteration in range(0, 100): + g.log.info("Running gluster volume heal command for %d time", + iteration) + ret = trigger_heal(self.mnode, self.volname) + self.assertTrue(ret, "Heal command triggered successfully") + g.log.info("Ran gluster volume heal command in a loop for " + "100 iterations.") + + # Wait for I/O to complete and validate I/O on mount points + ret = validate_io_procs(self.list_of_io_processes, self.mounts[0]) + self.assertTrue(ret, "I/O failed on mount point") + self.is_io_running = False + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, + "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") diff --git a/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py b/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py new file mode 100644 index 000000000..ab29fdbe7 --- /dev/null +++ b/tests/functional/resource_leak/test_memory_leaks_with_files_delete.py @@ -0,0 +1,113 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.glusterdir import get_dir_contents +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) +from glustolibs.gluster.brick_libs import get_all_bricks + + +@runs_on([['arbiter', 'distributed-arbiter'], ['glusterfs']]) +class TestMemoryLeakWithRm(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + # Set test_id for get gathering + self.test_id = self.id() + + # Set I/O flag to false + self.is_io_running = False + + # Creating Volume and mounting the volume + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + raise ExecutionError("Volume creation or mount failed: %s" + % self.volname) + + def tearDown(self): + + # Unmounting and cleaning volume + ret = self.unmount_volume_and_cleanup_volume(self.mounts) + if not ret: + raise ExecutionError("Unable to delete volume %s" % self.volname) + + self.get_super_method(self, 'tearDown')() + + def test_memory_leak_with_rm(self): + """ + Test case: + 1. Create a volume, start it and mount it. + 2. Create 10,000 files each of size 200K + 3. Delete the files created at step 2 + 4. Check if the files are deleted from backend + 5. Check if there are any memory leaks and OOM killers. + """ + # Start monitoring resource usage on servers and clients + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=30) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + # Create files on mount point + cmd = ('cd %s;for i in {1..10000};' + 'do dd if=/dev/urandom bs=200K count=1 of=file$i;done;' + 'rm -rf %s/file*' + % (self.mounts[0].mountpoint, self.mounts[0].mountpoint)) + ret, _, _ = g.run(self.mounts[0].client_system, cmd) + self.assertEqual(ret, 0, "Failed to create and delete files on" + " mountpoint") + g.log.info("Successfully created and removed files on mountpoint") + + # Delete files from mount point and check if all files + # are deleted or not from mount point as well as backend bricks. + ret, _, _ = g.run(self.clients[0], + "rm -rf {}/*".format(self.mounts[0].mountpoint)) + self.assertFalse(ret, "rm -rf * failed on mount point") + + ret = get_dir_contents(self.clients[0], + "{}/".format(self.mounts[0].mountpoint)) + self.assertEqual(ret, [], "Unexpected: Files and directories still " + "seen from mount point") + + for brick in get_all_bricks(self.mnode, self.volname): + node, brick_path = brick.split(":") + ret = get_dir_contents(node, "{}/".format(brick_path)) + self.assertEqual(ret, [], "Unexpected: Files and dirs still seen " + "on brick %s on node %s" % (brick_path, node)) + g.log.info("rm -rf * on mount point successful") + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, + "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") + + ret = self.check_for_memory_leaks_and_oom_kills_on_clients( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on clients") + g.log.info("No memory leaks or OOM kills found on serves and clients") diff --git a/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py b/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py new file mode 100644 index 000000000..25f8325df --- /dev/null +++ b/tests/functional/resource_leak/test_verify_gluster_memleak_with_management_encryption.py @@ -0,0 +1,231 @@ +# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from datetime import datetime, timedelta +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.volume_libs import (get_subvols, bulk_volume_creation, + volume_stop, volume_start, + set_volume_options) +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.io.utils import validate_io_procs +from glustolibs.gluster.brickmux_ops import (enable_brick_mux, + disable_brick_mux, + is_brick_mux_enabled) +from glustolibs.gluster.mount_ops import mount_volume, umount_volume + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestMemLeakAfterMgmntEncrypEnabled(GlusterBaseClass): + + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + self.test_id = self.id() + # Setup Volume + self.volume['dist_count'] = 2 + self.volume['replica_count'] = 3 + + ret = self.setup_volume_and_mount_volume([self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to Setup and Mount Volume") + + # Disable I/O encryption + self._disable_io_encryption() + + def tearDown(self): + # Disable brick_mux + if is_brick_mux_enabled(self.mnode): + ret = disable_brick_mux(self.mnode) + self.assertTrue(ret, "Failed to brick multiplex") + g.log.info("Disable brick multiplex") + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=[self.mounts[0]]) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _run_io(self): + """ Run IO and fill vol upto ~88%""" + bricks = get_all_bricks(self.mnode, self.volname) + usable_size = int(get_usable_size_per_disk(bricks[0]) * 0.88) + + self.procs = [] + counter = 1 + for _ in get_subvols(self.mnode, self.volname)['volume_subvols']: + filename = "{}/test_file_{}".format(self.mounts[0].mountpoint, + str(counter)) + proc = g.run_async(self.mounts[0].client_system, + "fallocate -l {}G {}".format(usable_size, + filename)) + self.procs.append(proc) + counter += 1 + + def _perform_gluster_v_heal_for_12_hrs(self): + """ Run 'guster v heal info' for 12 hours""" + # Perform gluster heal info for 12 hours + end_time = datetime.now() + timedelta(hours=12) + while True: + curr_time = datetime.now() + cmd = "gluster volume heal %s info" % self.volname + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to execute heal info cmd") + if curr_time > end_time: + g.log.info("Successfully ran for 12 hours. Checking for " + "memory leaks") + break + + def _verify_memory_leak(self): + """ Verify memory leak is found """ + + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") + + ret = self.check_for_memory_leaks_and_oom_kills_on_clients( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on clients") + + def _disable_io_encryption(self): + """ Disables IO encryption """ + # UnMount Volume + g.log.info("Starting to Unmount Volume %s", self.volname) + ret, _, _ = umount_volume(self.mounts[0].client_system, + self.mounts[0].mountpoint, + mtype=self.mount_type) + self.assertEqual(ret, 0, "Failed to Unmount volume") + + # Stop Volume + ret, _, _ = volume_stop(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to Stop volume") + + # Disable server and client SSL usage + options = {"server.ssl": "off", + "client.ssl": "off"} + ret = set_volume_options(self.mnode, self.volname, options) + self.assertTrue(ret, "Failed to set volume options") + + # Start Volume + ret, _, _ = volume_start(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to Start volume") + + # Mount Volume + ret, _, _ = mount_volume(self.volname, mtype=self.mount_type, + mpoint=self.mounts[0].mountpoint, + mserver=self.mnode, + mclient=self.mounts[0].client_system) + self.assertEqual(ret, 0, "Failed to mount the volume back") + + def test_mem_leak_on_gluster_procs_with_management_encrpytion(self): + """ + Steps: + 1) Enable management encryption on the cluster. + 2) Create a 2X3 volume. + 3) Mount the volume using FUSE on a client node. + 4) Start doing IO on the mount (ran IO till the volume is ~88% full) + 5) Simultaneously start collecting the memory usage for + 'glusterfsd' process. + 6) Issue the command "# gluster v heal <volname> info" continuously + in a loop. + """ + # Run IO + self._run_io() + + # Start monitoring resource usage on servers and clients + # default interval = 60 sec, count = 780 (60 *12) => for 12 hrs + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=780) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + + ret = validate_io_procs(self.procs, self.mounts) + self.assertTrue(ret, "IO Failed") + + self._perform_gluster_v_heal_for_12_hrs() + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + self._verify_memory_leak() + g.log.info("No memory leaks/OOM kills found on serves and clients") + + def test_mem_leak_on_gluster_procs_with_brick_multiplex(self): + """ + Steps: + 1) Enable cluster.brick-multiplex + 2) Enable SSL on management layer + 3) Start creating volumes + 4) Mount a volume and starting I/O + 5) Monitor the memory consumption by glusterd process + """ + + # Enable cluster.brick-mulitplex + ret = enable_brick_mux(self.mnode) + self.assertTrue(ret, "Failed to enable brick-multiplex") + + # Verify the operation + ret = is_brick_mux_enabled(self.mnode) + self.assertTrue(ret, "Brick mux enble op not successful") + + # Create few volumes + self.volume['replica_count'] = 3 + ret = bulk_volume_creation(self.mnode, 20, self.all_servers_info, + self.volume, is_force=True) + + self.assertTrue(ret, "Failed to create bulk volume") + + # Run IO + self._run_io() + + # Start memory usage logging + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=60) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + + ret = validate_io_procs(self.procs, self.mounts) + self.assertTrue(ret, "IO Failed") + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + self._verify_memory_leak() + g.log.info("No memory leaks/OOM kills found on serves and clients") + + # Disable Brick multiplex + ret = disable_brick_mux(self.mnode) + self.assertTrue(ret, "Failed to brick multiplex") diff --git a/tests/functional/resource_leak/test_verify_gluster_memleak_with_ssl.py b/tests/functional/resource_leak/test_verify_gluster_memleak_with_ssl.py new file mode 100644 index 000000000..1d7edbe32 --- /dev/null +++ b/tests/functional/resource_leak/test_verify_gluster_memleak_with_ssl.py @@ -0,0 +1,128 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from datetime import datetime, timedelta +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.lib_utils import get_usable_size_per_disk +from glustolibs.gluster.volume_libs import get_subvols +from glustolibs.io.memory_and_cpu_utils import ( + wait_for_logging_processes_to_stop) +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.io.utils import validate_io_procs + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestMemLeakAfterSSLEnabled(GlusterBaseClass): + + def setUp(self): + """ + Setup and mount volume or raise ExecutionError + """ + self.get_super_method(self, 'setUp')() + self.test_id = self.id() + # Setup Volume + self.volume['dist_count'] = 2 + self.volume['replica_count'] = 3 + ret = self.setup_volume_and_mount_volume(self.mounts) + if not ret: + g.log.error("Failed to Setup and Mount Volume") + raise ExecutionError("Failed to Setup and Mount Volume") + + def tearDown(self): + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount the vol & cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def test_mem_leak_on_gluster_procs_after_ssl_enabled(self): + """ + Steps: + Scenario 1: + 1) Enable management encryption on the cluster. + 2) Create a 2X3 volume. + 3) Mount the volume using FUSE on a client node. + 4) Start doing IO on the mount (ran IO till the volume is ~88% full) + 5) Simultaneously start collecting the memory usage for + 'glusterfsd' process. + 6) Issue the command "# gluster v heal <volname> info" continuously + in a loop. + """ + + # Fill the vol approx 88% + bricks = get_all_bricks(self.mnode, self.volname) + usable_size = int(get_usable_size_per_disk(bricks[0]) * 0.88) + + procs = [] + counter = 1 + for _ in get_subvols(self.mnode, self.volname)['volume_subvols']: + filename = "{}/test_file_{}".format(self.mounts[0].mountpoint, + str(counter)) + proc = g.run_async(self.mounts[0].client_system, + "fallocate -l {}G {}".format(usable_size, + filename)) + procs.append(proc) + counter += 1 + + # Start monitoring resource usage on servers and clients + # default interval = 60 sec + # count = 780 (60 *12) => for 12 hrs + monitor_proc_dict = self.start_memory_and_cpu_usage_logging( + self.test_id, count=780) + self.assertIsNotNone(monitor_proc_dict, + "Failed to start monitoring on servers and " + "clients") + + ret = validate_io_procs(procs, self.mounts) + self.assertTrue(ret, "IO Failed") + + # Perform gluster heal info for 12 hours + end_time = datetime.now() + timedelta(hours=12) + while True: + curr_time = datetime.now() + cmd = "gluster volume heal %s info" % self.volname + ret, _, _ = g.run(self.mnode, cmd) + self.assertEqual(ret, 0, "Failed to execute heal info cmd") + if curr_time > end_time: + g.log.info("Successfully ran for 12 hours. Checking for " + "memory leaks") + break + + # Wait for monitoring processes to complete + ret = wait_for_logging_processes_to_stop(monitor_proc_dict, + cluster=True) + self.assertTrue(ret, + "ERROR: Failed to stop monitoring processes") + + # Check if there are any memory leaks and OOM killers + ret = self.check_for_memory_leaks_and_oom_kills_on_servers( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on servers") + + ret = self.check_for_memory_leaks_and_oom_kills_on_clients( + self.test_id) + self.assertFalse(ret, + "Memory leak and OOM kills check failed on clients") + g.log.info( + "No memory leaks/OOM kills found on serves and clients") diff --git a/tests/functional/snapshot/test_activate_deactivate.py b/tests/functional/snapshot/test_activate_deactivate.py index d75931307..e3b46bb9c 100644 --- a/tests/functional/snapshot/test_activate_deactivate.py +++ b/tests/functional/snapshot/test_activate_deactivate.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,23 +58,15 @@ class TestActivateDeactivate(GlusterBaseClass): if ret != 0: raise ExecutionError("Snapshot Delete Failed") g.log.info("Successfully deleted all snapshots") - # Calling GlusterBaseClass tearDown - self.get_super_method(self, 'tearDown')() - @classmethod - def tearDownClass(cls): - """ - Clean up the volume & mount - """ - # stopping the volume and clean up the volume - g.log.info("Starting to Cleanup Volume") - ret = cls.cleanup_volume() + # Cleanup-volume + ret = self.cleanup_volume() if not ret: - raise ExecutionError("Failed to Cleanup Volume and mount") - g.log.info("Successful in Cleanup Volume and mount") + raise ExecutionError("Failed to Cleanup Volume") + g.log.info("Successful in Cleanup Volume") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_activate_deactivate(self): # pylint: disable=too-many-branches, too-many-statements diff --git a/tests/functional/snapshot/test_activate_on_create.py b/tests/functional/snapshot/test_activate_on_create.py index 82d8401af..939641b9c 100644 --- a/tests/functional/snapshot/test_activate_on_create.py +++ b/tests/functional/snapshot/test_activate_on_create.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -67,23 +67,14 @@ class TestActivateOnCreate(GlusterBaseClass): g.log.info("set_snap_config Success to disable " "activate-on-create") - # Calling GlusterBaseClass tearDown - self.get_super_method(self, 'tearDown')() - - @classmethod - def tearDownClass(cls): - """ - Clean up the volume & mount - """ - # stopping the volume and clean up the volume - g.log.info("Starting to Cleanup Volume") - ret = cls.cleanup_volume() + # Cleanup-volume + ret = self.cleanup_volume() if not ret: - raise ExecutionError("Failed to Cleanup Volume and mount") - g.log.info("Successful in Cleanup Volume and mount") + raise ExecutionError("Failed to Cleanup Volume") + g.log.info("Successful in Cleanup Volume") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_activate_on_create(self): # pylint: disable=too-many-branches, too-many-statements diff --git a/tests/functional/snapshot/test_auto_delete.py b/tests/functional/snapshot/test_auto_delete.py index 41aa6dc64..d1e934c02 100644 --- a/tests/functional/snapshot/test_auto_delete.py +++ b/tests/functional/snapshot/test_auto_delete.py @@ -85,13 +85,8 @@ class TestSnapAutoDelete(GlusterBaseClass): "auto-delete")
g.log.info("Successfully set the snapshot config options to default")
- @classmethod
- def tearDownClass(cls):
- # calling GlusterBaseClass tearDownClass
- cls.get_super_method(cls, 'tearDownClass')()
-
- # Clean up the volume
- ret = cls.cleanup_volume()
+ # Cleanup-volume
+ ret = self.cleanup_volume()
if not ret:
raise ExecutionError("Failed to Cleanup Volume")
g.log.info("Successful in Cleanup Volume")
diff --git a/tests/functional/snapshot/test_mount_snap.py b/tests/functional/snapshot/test_mount_snap.py index 9b0bf2bfe..ef918ba8b 100644 --- a/tests/functional/snapshot/test_mount_snap.py +++ b/tests/functional/snapshot/test_mount_snap.py @@ -84,14 +84,17 @@ class TestSnapMountSnapshot(GlusterBaseClass): g.log.info("Starting IO on all mounts...") g.log.info("mounts: %s", self.mounts) all_mounts_procs = [] + self.counter = 1 for mount_obj in self.mounts: cmd = ("/usr/bin/env python %s create_files " - "-f 10 --base-file-name file %s" % ( + "-f 10 --base-file-name file%d %s" % ( self.script_upload_path, + self.counter, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) all_mounts_procs.append(proc) + self.counter += 100 # Validate I/O self.assertTrue( diff --git a/tests/functional/snapshot/test_restore_online_vol.py b/tests/functional/snapshot/test_restore_online_vol.py index 2a7f39cae..2fa46012b 100644 --- a/tests/functional/snapshot/test_restore_online_vol.py +++ b/tests/functional/snapshot/test_restore_online_vol.py @@ -112,9 +112,9 @@ class SnapRSOnline(GlusterBaseClass): "--num-of-files 2 %s" % ( self.script_upload_path, self.counter, mount_obj.mountpoint)) - proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user) + self.counter += 100 self.all_mounts_procs.append(proc) self.io_validation_complete = False diff --git a/tests/functional/snapshot/test_snap_delete_multiple.py b/tests/functional/snapshot/test_snap_delete_multiple.py index ed3088ca0..e1be4732f 100644 --- a/tests/functional/snapshot/test_snap_delete_multiple.py +++ b/tests/functional/snapshot/test_snap_delete_multiple.py @@ -99,7 +99,7 @@ class SnapshotCloneDeleteMultiple(GlusterBaseClass): """ # Perform I/O - def io_operation(): + def io_operation(name): g.log.info("Starting to Perform I/O") all_mounts_procs = [] for mount_obj in self.mounts: @@ -107,10 +107,11 @@ class SnapshotCloneDeleteMultiple(GlusterBaseClass): mount_obj.client_system, mount_obj.mountpoint) # Create files g.log.info('Creating files...') - command = ("/usr/bin/env python %s create_files -f 100 " - "--fixed-file-size 1k %s" % ( - self.script_upload_path, - mount_obj.mountpoint)) + fname = "{}-{}".format(mount_obj.client_system, name) + command = ("/usr/bin/env python {} create_files -f 100 " + "--fixed-file-size 1k --base-file-name {}" + " {}".format(self.script_upload_path, + fname, mount_obj.mountpoint)) proc = g.run_async(mount_obj.client_system, command, user=mount_obj.user) all_mounts_procs.append(proc) @@ -217,14 +218,14 @@ class SnapshotCloneDeleteMultiple(GlusterBaseClass): self.assertEqual(ret1, 30, "Failed") ret2 = mount_clone_and_io(self.clone1, self.mpoint1) self.assertEqual(ret2, 0, "Failed to mount volume") - ret = io_operation() + ret = io_operation("first") self.assertEqual(ret, 0, "Failed to perform io") ret3 = create_snap(value2, self.clone1, self.snap2, self.clone2, ret1) self.assertEqual(ret3, 40, "Failed") ret4 = mount_clone_and_io(self.clone2, self.mpoint2) self.assertEqual(ret4, 0, "Failed to mount volume") - ret = io_operation() + ret = io_operation("second") self.assertEqual(ret, 0, "Failed to perform io") ret1 = create_snap(value3, self.clone2, self.snap2, self.clone2, ret3) diff --git a/tests/functional/snapshot/test_snap_self_heal.py b/tests/functional/snapshot/test_snap_self_heal.py index 3ee80e100..9cc6d8298 100644 --- a/tests/functional/snapshot/test_snap_self_heal.py +++ b/tests/functional/snapshot/test_snap_self_heal.py @@ -166,10 +166,8 @@ class SnapshotSelfheal(GlusterBaseClass): g.log.info("Starting to bring bricks to offline") bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) - bricks_to_bring_offline = list(filter(None, ( - bricks_to_bring_offline_dict['hot_tier_bricks'] + - bricks_to_bring_offline_dict['cold_tier_bricks'] + - bricks_to_bring_offline_dict['volume_bricks']))) + bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks'] + g.log.info("Brick to bring offline: %s ", bricks_to_bring_offline) ret = bring_bricks_offline(self.clone, bricks_to_bring_offline) self.assertTrue(ret, "Failed to bring the bricks offline") diff --git a/tests/functional/snapshot/test_snap_uss_snapd.py b/tests/functional/snapshot/test_snap_uss_snapd.py new file mode 100644 index 000000000..e008a679a --- /dev/null +++ b/tests/functional/snapshot/test_snap_uss_snapd.py @@ -0,0 +1,377 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Description: + Test Cases in this module tests the USS functionality + before and after snapd is killed. validate snapd after + volume is started with force option. +""" +from os import path +from time import sleep +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.brick_libs import get_all_bricks +from glustolibs.gluster.mount_ops import (mount_volume, + is_mounted, unmount_mounts) +from glustolibs.gluster.volume_ops import (volume_start, + get_volume_info, + volume_stop) +from glustolibs.gluster.volume_libs import (log_volume_info_and_status, + cleanup_volume) +from glustolibs.gluster.snap_ops import (get_snap_list, + snap_create, + snap_activate, + snap_clone, terminate_snapd_on_node) +from glustolibs.gluster.uss_ops import (is_snapd_running, is_uss_enabled, + enable_uss, disable_uss, + uss_list_snaps) +from glustolibs.gluster.mount_ops import create_mount_objs +from glustolibs.io.utils import validate_io_procs, view_snaps_from_mount + + +@runs_on([['replicated', 'distributed-replicated', + 'dispersed', 'distributed-dispersed', 'distributed'], + ['glusterfs']]) +class SnapshotSnapdCloneVol(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + cls.get_super_method(cls, 'setUpClass')() + cls.mount1 = [] + cls.mpoint = "/mnt/clone1" + cls.server_list = [] + cls.server_lists = [] + + # Upload io scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, cls.script_upload_path) + if not ret: + raise ExecutionError("Failed to upload IO scripts " + "to clients %s" % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + + self.snap = 'test_snap_clone_snapd-snap' + self.clone_vol1 = 'clone-of-test_snap_clone_snapd-clone1' + # SettingUp volume and Mounting the volume + self.get_super_method(self, 'setUp')() + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to setup volume %s" % self.volname) + g.log.info("Volume %s has been setup successfully", self.volname) + + def validate_snapd(self, check_condition=True): + """ Validate snapd running """ + for server in self.server_list: + ret = is_snapd_running(server, self.clone_vol1) + if check_condition: + self.assertTrue( + ret, "Unexpected: Snapd is Not running for " + "volume %s on node %s" % (self.clone_vol1, server)) + g.log.info( + "Snapd Running for volume %s " + "on node: %s", self.clone_vol1, server) + else: + self.assertFalse( + ret, "Unexpected: Snapd is running for" + "volume %s on node %s" % (self.clone_vol1, server)) + g.log.info("Expected: Snapd is not Running for volume" + " %s on node: %s", self.clone_vol1, server) + + def check_snaps(self): + """ Check snapshots under .snaps folder """ + ret, _, _ = uss_list_snaps(self.clients[0], self.mpoint) + self.assertEqual(ret, 0, "Unexpected: .snaps directory not found") + g.log.info("Expected: .snaps directory is present") + + def validate_uss(self): + """ Validate USS running """ + ret = is_uss_enabled(self.mnode, self.clone_vol1) + self.assertTrue(ret, "USS is disabled in clone volume " + "%s" % self.clone_vol1) + g.log.info("USS enabled in cloned Volume %s", self.clone_vol1) + + def validate_snaps(self): + """ Validate snapshots under .snaps folder """ + for count in range(0, 40): + ret = view_snaps_from_mount(self.mount1, self.snaps_list) + if ret: + break + sleep(2) + count += 1 + self.assertTrue(ret, "Failed to lists .snaps folder") + g.log.info("Successfully validated snapshots from .snaps folder") + + def test_snap_clone_snapd(self): + """ + Steps: + + 1. create a volume + 2. Create a snapshots and activate + 3. Clone the snapshot and mount it + 4. Check for snapd daemon + 5. enable uss and validate snapd + 5. stop cloned volume + 6. Validate snapd + 7. start cloned volume + 8. validate snapd + 9. Create 5 more snapshot + 10. Validate total number of + snapshots created. + 11. Activate 5 snapshots + 12. Enable USS + 13. Validate snapd + 14. kill snapd on all nodes + 15. validate snapd running + 16. force start clone volume + 17. validate snaps inside .snaps directory + """ + # pylint: disable=too-many-statements, too-many-locals + + # Starting I/O + all_mounts_procs = [] + for mount_obj in self.mounts: + cmd = ("/usr/bin/env python %s create_files " + "-f 10 --base-file-name file %s" % ( + self.script_upload_path, + mount_obj.mountpoint)) + proc = g.run_async(mount_obj.client_system, cmd, + user=mount_obj.user) + all_mounts_procs.append(proc) + + # Validate I/O + ret = validate_io_procs(all_mounts_procs, self.mounts) + self.assertTrue(ret, "IO failed on some of the clients") + g.log.info("IO is successful on all mounts") + + # Creating snapshot + ret, _, _ = snap_create(self.mnode, self.volname, self.snap) + self.assertEqual(ret, 0, ("Failed to create snapshot for volume %s" + % self.volname)) + g.log.info("Snapshot %s created successfully for " + "volume %s", self.snap, self.volname) + + # Activating created snapshots + ret, _, _ = snap_activate(self.mnode, self.snap) + self.assertEqual(ret, 0, ("Failed to activate snapshot %s" + % self.snap)) + g.log.info("Snapshot snap%s activated successfully", self.snap) + + # Snapshot list + self.assertIsNotNone( + get_snap_list(self.mnode), "Failed to list snapshot") + g.log.info("Snapshot list command Successful") + + # Creating and starting a Clone of snapshot: + ret, _, _ = snap_clone(self.mnode, self.snap, self.clone_vol1) + self.assertEqual(ret, 0, "Failed to clone %s" % self.clone_vol1) + g.log.info("Clone volume %s created successfully", self.clone_vol1) + + # Start the clone volumes + ret, _, _ = volume_start(self.mnode, self.clone_vol1) + self.assertEqual(ret, 0, "Failed to start %s" % self.clone_vol1) + g.log.info("%s started successfully", self.clone_vol1) + + # Form server list + brick_list = get_all_bricks(self.mnode, self.clone_vol1) + for bricks in brick_list: + self.server_lists.append(bricks.split(":")[0]) + self.server_list = list(set(self.server_lists)) + + # Get volume info + vol_info = get_volume_info(self.mnode, self.clone_vol1) + self.assertIsNotNone(vol_info, "Failed to get vol info") + g.log.info("Successfully in getting vol info") + + # Redefining mounts for cloned volume + self.mount_points, self.mounts_dict_list = [], [] + for client in self.all_clients_info: + mount = { + 'protocol': self.mount_type, + 'server': self.mnode, + 'volname': self.volname, + 'client': self.all_clients_info[client], + 'mountpoint': (path.join( + "%s" % self.mpoint)), + 'options': '' + } + self.mounts_dict_list.append(mount) + self.mount1 = create_mount_objs(self.mounts_dict_list) + self.mount_points.append(self.mpoint) + g.log.info("Successfully made entry in self.mount1") + + # FUSE mount clone1 volume + for mount_obj in self.mounts: + ret, _, _ = mount_volume(self.clone_vol1, self.mount_type, + self.mpoint, + self.mnode, mount_obj.client_system) + self.assertEqual(ret, 0, "Volume mount failed for clone1") + g.log.info("%s mounted Successfully", self.clone_vol1) + + # Validate clone volume is mounted or not + ret = is_mounted(self.clone_vol1, self.mpoint, self.mnode, + mount_obj.client_system, self.mount_type) + self.assertTrue(ret, "Volume not mounted on mount point: " + "%s" % self.mpoint) + g.log.info("Volume %s mounted on %s", self.clone_vol1, self.mpoint) + + # Log Cloned Volume information + ret = log_volume_info_and_status(self.mnode, self.clone_vol1) + self.assertTrue("Failed to Log Info and Status of Volume " + "%s" % self.clone_vol1) + g.log.info("Successfully Logged Info and Status") + + # Validate snapd running on all nodes + self.validate_snapd(check_condition=False) + + # Enable USS + ret, _, _ = enable_uss(self.mnode, self.clone_vol1) + self.assertEqual(ret, 0, "Failed to enable USS on cloned volume") + g.log.info("Successfully enabled USS on Cloned volume") + + # Validate USS running + self.validate_uss() + + # Validate snapd running on all nodes + self.validate_snapd() + + # Stop cloned volume + ret, _, _ = volume_stop(self.mnode, self.clone_vol1) + self.assertEqual(ret, 0, "Failed to stop cloned volume " + "%s" % self.clone_vol1) + g.log.info("Successfully Stopped Cloned volume %s", self.clone_vol1) + + # Validate snapd running on all nodes + self.validate_snapd(check_condition=False) + + # Start cloned volume + ret, _, _ = volume_start(self.mnode, self.clone_vol1) + self.assertEqual(ret, 0, "Failed to start cloned volume" + " %s" % self.clone_vol1) + g.log.info("Successfully started cloned volume" + " %s", self.clone_vol1) + + # Validate snapd running on all nodes + self.validate_snapd() + + # Create 5 snapshots + self.snaps_list = [('test_snap_clone_snapd-snap%s' + % i)for i in range(0, 5)] + for snapname in self.snaps_list: + ret, _, _ = snap_create(self.mnode, self.clone_vol1, + snapname) + self.assertEqual(ret, 0, ("Failed to create snapshot for volume" + " %s" % self.clone_vol1)) + g.log.info("Snapshot %s created successfully for volume " + "%s", snapname, self.clone_vol1) + + # Validate USS running + self.validate_uss() + + # Check snapshot under .snaps directory + self.check_snaps() + + # Activate Snapshots + for snapname in self.snaps_list: + ret, _, _ = snap_activate(self.mnode, snapname) + self.assertEqual(ret, 0, ("Failed to activate snapshot %s" + % snapname)) + g.log.info("Snapshot %s activated " + "successfully", snapname) + + # Validate USS running + self.validate_uss() + + # Validate snapshots under .snaps folder + self.validate_snaps() + + # Kill snapd on node and validate snapd except management node + for server in self.servers[1:]: + ret, _, _ = terminate_snapd_on_node(server) + self.assertEqual(ret, 0, "Failed to Kill snapd on node %s" + % server) + g.log.info("snapd Killed Successfully on node %s", server) + + # Check snapd running + ret = is_snapd_running(server, self.clone_vol1) + self.assertTrue(ret, "Unexpected: Snapd running on node: " + "%s" % server) + g.log.info("Expected: Snapd is not running on node:%s", server) + + # Check snapshots under .snaps folder + g.log.info("Validating snapshots under .snaps") + ret, _, _ = uss_list_snaps(self.clients[0], self.mpoint) + self.assertEqual(ret, 0, "Target endpoint not connected") + g.log.info("Successfully listed snapshots under .snaps") + + # Kill snapd in management node + ret, _, _ = terminate_snapd_on_node(self.servers[0]) + self.assertEqual(ret, 0, "Failed to Kill snapd on node %s" + % self.servers[0]) + g.log.info("snapd Killed Successfully on node %s", self.servers[0]) + + # Validate snapd running on all nodes + self.validate_snapd(check_condition=False) + + # Validating snapshots under .snaps + ret, _, _ = uss_list_snaps(self.clients[0], self.mpoint) + self.assertNotEqual(ret, 0, "Unexpected: Successfully listed " + "snapshots under .snaps") + g.log.info("Expected: Target endpoint not connected") + + # Start the Cloned volume(force start) + ret, _, _ = volume_start(self.mnode, self.clone_vol1, force=True) + self.assertEqual(ret, 0, "Failed to start cloned volume " + "%s" % self.clone_vol1) + g.log.info("Successfully Started Cloned volume %s", self.clone_vol1) + + # Validate snapd running on all nodes + self.validate_snapd() + + # Validate snapshots under .snaps folder + self.validate_snaps() + + def tearDown(self): + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + # Disable USS on cloned volume + ret, _, _ = disable_uss(self.mnode, self.clone_vol1) + if ret: + raise ExecutionError("Failed to disable USS on cloned volume") + g.log.info("Successfully disabled USS on Cloned volume") + + # Cleanup cloned volume + ret = unmount_mounts(self.mount1) + if not ret: + raise ExecutionError("Failed to unmount cloned volume") + ret = cleanup_volume(self.mnode, self.clone_vol1) + if not ret: + raise ExecutionError("Failed to unmount and cleanup cloned volume") + g.log.info("Successfully umounted and cleanup cloned volume") + + # Unmount and cleanup-volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Unmount and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") diff --git a/tests/functional/snapshot/test_snapshot_restore.py b/tests/functional/snapshot/test_snapshot_restore.py index 1e84c800f..99a82e2b7 100644 --- a/tests/functional/snapshot/test_snapshot_restore.py +++ b/tests/functional/snapshot/test_snapshot_restore.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2016-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -95,20 +95,14 @@ class SnapRestore(GlusterBaseClass): ret, _, _ = snap_delete_all(self.mnode) if not ret: raise ExecutionError("Snapshot delete failed.") - self.get_super_method(self, 'tearDown')() - @classmethod - def tearDownClass(cls): - """ - Clean up the volume & mount - """ - g.log.info("Starting volume and mount cleanup") - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + # Unmount and cleanup-volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to cleanup volume and mount") - g.log.info("Cleanup successful for the volume and mount") + raise ExecutionError("Failed to Unmount and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") - cls.get_super_method(cls, 'tearDownClass')() + self.get_super_method(self, 'tearDown')() def test_validate_snaps_restore(self): # pylint: disable=too-many-statements diff --git a/tests/functional/snapshot/test_uss_snap_restore.py b/tests/functional/snapshot/test_uss_snap_restore.py new file mode 100644 index 000000000..45de07c93 --- /dev/null +++ b/tests/functional/snapshot/test_uss_snap_restore.py @@ -0,0 +1,239 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from glusto.core import Glusto as g +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.io.utils import ( + wait_for_io_to_complete, + get_mounts_stat) +from glustolibs.gluster.snap_ops import ( + snap_create, + get_snap_list, + snap_activate, + snap_restore_complete) +from glustolibs.gluster.uss_ops import ( + enable_uss, + is_uss_enabled, + get_uss_list_snaps, + is_snapd_running, + disable_uss) +from glustolibs.misc.misc_libs import upload_scripts +from glustolibs.gluster.volume_libs import ( + verify_all_process_of_volume_are_online) + + +@runs_on([['replicated', 'distributed-replicated', 'dispersed', + 'distributed', 'distributed-dispersed'], + ['glusterfs', 'nfs']]) +class TestUssSnapRestore(GlusterBaseClass): + + @classmethod + def setUpClass(cls): + # Calling GlusterBaseClass setUpClass + cls.get_super_method(cls, 'setUpClass')() + + # Upload IO scripts for running IO on mounts + cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/" + "file_dir_ops.py") + ret = upload_scripts(cls.clients, [cls.script_upload_path]) + if not ret: + raise ExecutionError("Failed to upload IO scripts to clients %s" + % cls.clients) + g.log.info("Successfully uploaded IO scripts to clients %s", + cls.clients) + + def setUp(self): + # Calling GlusterBaseClass setUp + self.get_super_method(self, 'setUp')() + + self.all_mounts_procs = [] + + # Setup Volume and Mount Volume + ret = self.setup_volume_and_mount_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to Setup_Volume and Mount_Volume") + g.log.info("Successful in Setup Volume and Mount Volume") + self.snapshots = [('snap-test-uss-snap-restore-%s-%s' + % (self.volname, i))for i in range(0, 2)] + + def tearDown(self): + + # Disable uss for volume + ret, _, _ = disable_uss(self.mnode, self.volname) + if ret: + raise ExecutionError("Failed to disable uss") + g.log.info("Successfully disabled uss for volume %s", self.volname) + + # Unmount and cleanup original volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) + if not ret: + raise ExecutionError("Failed to umount and cleanup Volume") + g.log.info("Successful in umounting the volume and Cleanup") + + # Calling GlusterBaseClass teardown + self.get_super_method(self, 'tearDown')() + + def test_uss_snap_restore(self): + """ + Description: + This test case will validate USS after Snapshot restore. + The restored snapshot should not be listed under the '.snaps' + directory. + + * Perform I/O on mounts + * Enable USS on volume + * Validate USS is enabled + * Create a snapshot + * Activate the snapshot + * Perform some more I/O + * Create another snapshot + * Activate the second + * Restore volume to the second snapshot + * From mount point validate under .snaps + - first snapshot should be listed + - second snapshot should not be listed + """ + + # pylint: disable=too-many-statements + # Perform I/O + cmd = ( + "/usr/bin/env python %s create_files " + "-f 10 --base-file-name firstfiles %s" + % (self.script_upload_path, + self.mounts[0].mountpoint)) + proc = g.run_async( + self.mounts[0].client_system, cmd, user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete and validate IO + self.assertTrue( + wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]), + "IO failed on %s" % self.mounts[0]) + g.log.info("IO is successful on all mounts") + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successfully got stat of all files/dirs created") + + # Enable USS + ret, _, _ = enable_uss(self.mnode, self.volname) + self.assertEqual(ret, 0, "Failed to enable USS on volume") + g.log.info("Successfully enabled USS on volume") + + # Validate USS is enabled + ret = is_uss_enabled(self.mnode, self.volname) + self.assertTrue(ret, "USS is disabled on volume %s" % self.volname) + g.log.info("USS enabled on volume %s", self.volname) + + # Create a snapshot + ret, _, _ = snap_create(self.mnode, self.volname, self.snapshots[0]) + self.assertEqual(ret, 0, ("Failed to create snapshot for %s" + % self.volname)) + g.log.info("Snapshot %s created successfully for volume %s", + self.snapshots[0], self.volname) + + # Check for number of snaps using snap_list it should be 1 now + snap_list = get_snap_list(self.mnode) + self.assertEqual(1, len(snap_list), "No of snaps not consistent " + "for volume %s" % self.volname) + g.log.info("Successfully validated number of snapshots") + + # Activate the snapshot + ret, _, _ = snap_activate(self.mnode, self.snapshots[0]) + self.assertEqual(ret, 0, ("Failed to activate snapshot %s" + % self.snapshots[0])) + g.log.info("Snapshot %s activated successfully", self.snapshots[0]) + + # Perform I/O + self.all_mounts_procs = [] + cmd = ( + "/usr/bin/env python %s create_files " + "-f 10 --base-file-name secondfiles %s" + % (self.script_upload_path, + self.mounts[0].mountpoint)) + proc = g.run_async( + self.mounts[0].client_system, cmd, user=self.mounts[0].user) + self.all_mounts_procs.append(proc) + + # Wait for IO to complete and validate IO + self.assertTrue( + wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]), + "IO failed on %s" % self.mounts[0]) + g.log.info("IO is successful on all mounts") + + # Get stat of all the files/dirs created. + ret = get_mounts_stat(self.mounts) + self.assertTrue(ret, "Stat failed on some of the clients") + g.log.info("Successfully got stat of all files/dirs created") + + # Create another snapshot + ret, _, _ = snap_create(self.mnode, self.volname, self.snapshots[1]) + self.assertEqual(ret, 0, ("Failed to create snapshot for volume %s" + % self.volname)) + g.log.info("Snapshot %s created successfully for volume %s", + self.snapshots[1], self.volname) + + # Check for number of snaps using snap_list it should be 2 now + snap_list = get_snap_list(self.mnode) + self.assertEqual(2, len(snap_list), "No of snaps not consistent " + "for volume %s" % self.volname) + g.log.info("Successfully validated number of snapshots") + + # Activate the second snapshot + ret, _, _ = snap_activate(self.mnode, self.snapshots[1]) + self.assertEqual(ret, 0, ("Failed to activate snapshot %s" + % self.snapshots[1])) + g.log.info("Snapshot %s activated successfully", self.snapshots[1]) + + # Restore volume to the second snapshot + ret = snap_restore_complete( + self.mnode, self.volname, self.snapshots[1]) + self.assertTrue(ret, ("Failed to restore snap %s on the " + "volume %s" % (self.snapshots[1], self.volname))) + g.log.info("Restore of volume is successful from %s on " + "volume %s", self.snapshots[1], self.volname) + + # Verify all volume processes are online + ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) + self.assertTrue(ret, "Failed: All volume processes are not online") + g.log.info("All volume processes are online") + ret = is_snapd_running(self.mnode, self.volname) + self.assertTrue( + ret, "Failed: snapd is not running for volume %s" % self.volname) + g.log.info("Successful: snapd is running") + + # List activated snapshots under the .snaps directory + snap_dir_list = get_uss_list_snaps(self.mounts[0].client_system, + self.mounts[0].mountpoint) + self.assertIsNotNone( + snap_dir_list, "Failed to list snapshots under .snaps directory") + g.log.info("Successfully gathered list of snapshots under the .snaps" + " directory") + + # Check for first snapshot as it should get listed here + self.assertIn(self.snapshots[0], snap_dir_list, + ("Unexpected : %s not listed under .snaps " + "directory" % self.snapshots[0])) + g.log.info("Activated Snapshot %s listed Successfully", + self.snapshots[0]) + + # Check for second snapshot as it should not get listed here + self.assertNotIn(self.snapshots[1], snap_dir_list, + ("Unexpected : %s listed in .snaps " + "directory" % self.snapshots[1])) + g.log.info("Restored Snapshot %s not listed ", self.snapshots[1]) diff --git a/tests/functional/snapshot/test_validate_snaps_dir_over_uss.py b/tests/functional/snapshot/test_validate_snaps_dir_over_uss.py index 705abe31b..c1e42517f 100644 --- a/tests/functional/snapshot/test_validate_snaps_dir_over_uss.py +++ b/tests/functional/snapshot/test_validate_snaps_dir_over_uss.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> +# Copyright (C) 2017-2020 Red Hat, Inc. <http://www.redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -89,23 +89,14 @@ class TestValidateUss(GlusterBaseClass): g.log.info("Successfully disabled uss for volume" "%s", self.volname) - # Calling GlusterBaseClass tearDown - self.get_super_method(self, 'tearDown')() - - @classmethod - def tearDownClass(cls): - """ - Clean up the volume & mount - """ - # stopping the volume and clean up the volume - g.log.info("Starting to Cleanup Volume") - ret = cls.unmount_volume_and_cleanup_volume(cls.mounts) + # Unmount and cleanup-volume + ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts) if not ret: - raise ExecutionError("Failed to Cleanup Volume and mount") - g.log.info("Successful in Cleanup Volume and mount") + raise ExecutionError("Failed to Unmount and Cleanup Volume") + g.log.info("Successful in Unmount Volume and Cleanup Volume") - # calling GlusterBaseClass tearDownClass - cls.get_super_method(cls, 'tearDownClass')() + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() def test_validate_snaps_dir_over_uss(self): diff --git a/tests/gluster_tests_config.yml b/tests/gluster_tests_config.yml index 3684080f2..36c1b8390 100644 --- a/tests/gluster_tests_config.yml +++ b/tests/gluster_tests_config.yml @@ -203,8 +203,8 @@ gluster: replica_count: 3 arbiter_count: 1 transport: tcp - distributed-arbiter: &distrbuted_arbiter - type: distributed_arbiter + distributed-arbiter: &distrbuted-arbiter + type: distributed-arbiter dist_count: 2 replica_count: 3 arbiter_count: 1 diff --git a/tools/generate_glusto_config/README.md b/tools/generate_glusto_config/README.md new file mode 100644 index 000000000..ce0455d69 --- /dev/null +++ b/tools/generate_glusto_config/README.md @@ -0,0 +1,34 @@ +# generate_glusto_config +Tool to generate config file for executing glusto tests. + +## Prerequisites +Python 3.x + +## Installation +1. Change directory to the project directory. + +``` +# cd tools/generate_glusto_config +``` + +2. Now run the installation script. + +``` +# python3 setup.py install +``` + +3. To check run: + +``` +# generate_glusto_config --help +``` + +## Usage +Pass arguments to the script as shown below: + +``` +# generate_glusto_config -c examples/sample_glusto_config.yaml -t glusto_config_template.jinja -o output_config.yml +``` + +## Licence +[GPLv3](https://github.com/gluster/glusto-tests/blob/master/LICENSE) diff --git a/tools/generate_glusto_config/examples/sample_glusto_config.yaml b/tools/generate_glusto_config/examples/sample_glusto_config.yaml new file mode 100644 index 000000000..4991b2204 --- /dev/null +++ b/tools/generate_glusto_config/examples/sample_glusto_config.yaml @@ -0,0 +1,20 @@ +# 'clients' is list of Hostnames/IP's of clients in the cluster. +clients: [client_hostname1, client_hostname2] + +# 'servers' is list of Hostnames/IP's of servers in the cluster. +# Each item in list is a dict with 'Hostname/IP' of the server as key. +# The info should contain the devices to use +# for creating bricks, brick_root i.e dirname of brick mount point. + +servers: + - server_hostname1: + devices: ["/dev/vdb", "/dev/vdc", "/dev/vdd", "/dev/vde", "/dev/vdf"] + brick_root: "/bricks" + - server_hostname2: + devices: ["/dev/vdb", "/dev/vdc", "/dev/vdd", "/dev/vde", "/dev/vdf"] + brick_root: "/bricks" + +logfile: "/var/log/glusto_tests.log" + +# Mount protocol to use in the current run +mount_type: ["glusterfs"] diff --git a/tools/generate_glusto_config/generate_glusto_config.py b/tools/generate_glusto_config/generate_glusto_config.py new file mode 100644 index 000000000..ca63b1d5a --- /dev/null +++ b/tools/generate_glusto_config/generate_glusto_config.py @@ -0,0 +1,74 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import argparse + +from glusto.core import Glusto as g + + +def handle_configs(config_list): + """Load user configuration files""" + + # load user specified configs + if config_list: + config_files = config_list.split() + g.config = g.load_configs(config_files) + return True + + return False + + +def parse_args(): + """Parse arguments with newer argparse module + (adds built-in required parm) + """ + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='Create output file based on template and config files') + parser.add_argument("-c", "--config", + help="Config file(s) to read.", + action="store", dest="config_list", + default=None) + parser.add_argument("-t", "--template", + help="Template file to render", + action="store", dest="template_file", + default=None) + parser.add_argument("-o", "--output", + help="Output file for rendered template", + action="store", dest="output_file", + default=None) + return parser.parse_args() + + +def main(): + """Main function""" + + args = parse_args() + + if args.config_list: + handle_configs(args.config_list) + g.show_config(g.config) + + output_file = "rendered_template.txt" + if args.output_file: + output_file = args.output_file + + if args.template_file: + g.render_template(args.template_file, g.config, output_file) + + +if __name__ == '__main__': + main() diff --git a/tools/generate_glusto_config/glusto_config_template.jinja b/tools/generate_glusto_config/glusto_config_template.jinja new file mode 100644 index 000000000..3146586d8 --- /dev/null +++ b/tools/generate_glusto_config/glusto_config_template.jinja @@ -0,0 +1,40 @@ +log_file: {{logfile}} +log_level: DEBUG +remote_user: root + +# 'servers' is list of Hostnames/IP's of servers in the cluster. +servers: &servers_list{% for server_item in servers %}{% for server, value in server_item.items() %} + - {{server}}{% endfor %}{% endfor %} + +# 'clients' is list of Hostnames/IP's of clients in the cluster. +clients:{% for client in clients %} + - {{client}}{% endfor %} + +# 'servers_info' is info about each server in the cluster. +# each server_info is a dict with 'Hostname/IP' of the server as key. +# The info should contain the host(Hostname/IP) of server, devices to use +# for creating bricks, brick_root i.e dirname of brick mount point. +# Note: Use the same Hostname/IP used in the above 'servers' section. + +servers_info:{% for server_item in servers %} + {% set outer_loop = loop %} + {% for server, value in server_item.items() %} + {{server}}: &server{{ outer_loop.index }} + host: {{server}} + devices: {{ value["devices"] }} + brick_root: {{ value["brick_root"] }}{% endfor %}{% endfor %} + +# 'clients_info' is info about each client in the cluster. +# each client_info is a dict with 'Hostname/IP' of the client as key. +# The info should contain the host(Hostname/IP) of client. + +clients_info: {% for client in clients %} + {{client}}: &client{{ loop.index }} + host: {{client}}{% endfor %} + +# This is to define what volume types and mount protocols will be run +# in this current test run. + +gluster: + running_on_volumes: [] + running_on_mounts: {{mount_type}} diff --git a/tools/generate_glusto_config/setup.py b/tools/generate_glusto_config/setup.py new file mode 100644 index 000000000..f6dcab180 --- /dev/null +++ b/tools/generate_glusto_config/setup.py @@ -0,0 +1,32 @@ +#!/usr/bin/python3 +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from setuptools import setup + +setup( + name='generate_glusto_config', + author='Red Hat, Inc.', + author_email='gluster-devel@gluster.org', + url='http://www.gluster.org', + license='GPLv3+', + description=("Tool to generate config file for executing glusto tests."), + py_modules=['generate_glusto_config'], + entry_points=""" + [console_scripts] + generate_glusto_config = generate_glusto_config:main + """ +) diff --git a/tools/get_sosreports/README.md b/tools/get_sosreports/README.md new file mode 100644 index 000000000..57176f27f --- /dev/null +++ b/tools/get_sosreports/README.md @@ -0,0 +1,45 @@ +# get_sosreports +Tool to collect sosreports from all servers and clients. + +## Prerequisites +1. Python 3.x +2. Passwordless ssh should be setup. + +## Installation +1. Change directory to the project directory. + +``` +# cd tools/get_sosreports +``` + +2. Now run the installation script. + +``` +# python3 setup.py install +``` + +3. To check run: + +``` +# get_sosreports --help +``` + +## Usage +There are 2 ways of using the tool. +1. Passing IP addresses through command line seperated by comma(,): + +``` +# get_sosreports -m machine_1,machine_2,machine_3 +``` + +2. Passing a glusto-tests config file: + +``` +# get_sosreports -f config_file +``` + +**Note**: +The default destination directory is `.` (present dir) `-d` or `--dist-dir` option. + +## Licence +[GPLv3](https://github.com/gluster/glusto-tests/blob/master/LICENSE) diff --git a/tools/get_sosreports/get_sosreports.py b/tools/get_sosreports/get_sosreports.py new file mode 100644 index 000000000..962fa6d7f --- /dev/null +++ b/tools/get_sosreports/get_sosreports.py @@ -0,0 +1,190 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# Imports needed by the script. +import argparse +import os +import sys +from yaml import safe_load + + +def read_config_file(config_file): + """ + A function to read the yaml file given to the script. + + Args: + config_file(str): A config file used to run glusto-tests. + + Return: + dict: A dictornary with all the details from config file. + """ + return safe_load(open(config_file, 'r')) + + +def remove_previous_sosreports(server): + """ + A function to remove old sosreports. + + Args: + server: hostname/IP server from which sosreport + has to be removed. + + Returns: + bool: True if successful else false. + """ + cmd = ("ssh root@{} \"rm -rf /var/tmp/sosreport-*\"" + .format(server)) + ret = os.system(cmd) + if ret: + return False + return True + + +def collect_new_sosreports(server): + """ + A function to generate sosreports. + + Args: + server: hostname/IP server from which sosreport + has to be collected. + + Returns: + bool: True if successful else false. + """ + cmd = ("ssh root@{} \"sosreport --batch --name=$HOSTNAME\"" + .format(server)) + ret = os.system(cmd) + if ret: + return False + return True + + +def copy_sosreports_to_dir(server, directory): + """ + A function to copy sosreports to local dir. + + Args: + server: hostname/IP of server for passwordless ssh + has to be configured. + directory: Directory to be used to store sosreports. + + Returns: + bool: True if successful else false. + """ + cmd = ("scp root@{}:/var/tmp/sosreport-* {}" + .format(server, directory)) + ret = os.system(cmd) + if ret: + return False + return True + + +def check_and_create_dir_if_not_present(directory): + """ + A function to check and create directory if not present. + + Args: + directory: Directory to be checked/created. + + Returns: + bool: True if successful else false. + """ + if not os.path.isdir(directory): + cmd = ("mkdir -p {}".format(directory)) + ret = os.system(cmd) + if ret: + return False + else: + print("[INFO]:The dir already exists.") + return True + + +def main(): + """ + Main function of the tool. + """ + # Setting up command line arguments. + parser = argparse.ArgumentParser( + description="Tool to collect sosreports from servers and clients." + ) + parser.add_argument("-f", + "--config_file", + type=str, + dest="config_file", + help="A glusto-tests configuration file.") + parser.add_argument("-m", "--servers", type=str, + dest="servers", + help=("A list of hostnames/ips of" + " servers seperated by comma(',').")) + parser.add_argument("-d", "--dist-dir", type=str, default=".", + dest="directory", + help=("Directory where reports are to be stored." + "(Default:.)")) + args = parser.parse_args() + + # Getting list of hostname/IP. + if args.servers: + servers = args.servers.split(',') + + # Reading the config file. + if args.config_file: + config = read_config_file(args.config_file) + servers = [] + servers += config.get('clients', []) + servers += config.get('servers', []) + + # Fetching other parameters from command line. + directory = args.directory + + # Checking and creating dir if not present. + ret = check_and_create_dir_if_not_present(directory) + if not ret: + sys.exit("[ERROR]:Unable to create dir for storing sosreports.") + + try: + for server in servers: + + # Removing old sosreports from the server. + ret = remove_previous_sosreports(server) + if not ret: + sys.exit("[ERROR]:Unable to remove old sosreports on {}!" + .format(server)) + print("[INFO]:Successfully removed old sosreports on {}." + .format(server)) + + # Collecting sosreport on the server. + ret = collect_new_sosreports(server) + if not ret: + sys.exit("[ERROR]:Unable to collect sosreport on {}!" + .format(server)) + print("[INFO]:Successfully collected sosreport on {}." + .format(server)) + + # Downloading sosreport to local machine. + ret = copy_sosreports_to_dir(server, directory) + if not ret: + sys.exit("[ERROR]:Unable download sosreport from {}." + .format(server)) + print("[INFO]:Successfully copied sosreports from {}." + .format(server)) + + # If servers aren't provided. + except UnboundLocalError: + sys.exit("[ERROR]:servers were not provided") + + +if __name__ == "__main__": + main() diff --git a/tools/get_sosreports/setup.py b/tools/get_sosreports/setup.py new file mode 100644 index 000000000..79392e7ec --- /dev/null +++ b/tools/get_sosreports/setup.py @@ -0,0 +1,33 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from setuptools import setup + +setup( + name='get_sosreports', + version="1.0", + author='Red Hat, Inc.', + author_email='gluster-devel@gluster.org', + url='http://www.gluster.org', + licens="GPLv3+", + description=("Tool to collect sosreports" + " from all servers and clients"), + py_modules=['get_sosreports'], + entry_points=""" + [console_scripts] + get_sosreports = get_sosreports:main + """ +) diff --git a/tools/log_splitter/README.md b/tools/log_splitter/README.md new file mode 100644 index 000000000..e44aaecd3 --- /dev/null +++ b/tools/log_splitter/README.md @@ -0,0 +1,37 @@ +# log_splitter +Tool to split glusto logs to individual testcase logs. + +## Prerequisites +Python 3.x + +## Installation +1. Change directory to the project directory. + +``` +# cd tools/log_splitter +``` + +2. Now run the installation script. + +``` +# python3 setup.py install +``` + +3. To check run: + +``` +# log_splitter --help +``` + +## Usage +Just pass glusto_test.log file to the script as shown below: + +``` +# log_splitter -f glusto_test.log +``` + +**Note**: +The default destination directory is `.` (present dir) `-d` or `--dist-dir` option. + +## Licence +[GPLv3](https://github.com/gluster/glusto-tests/blob/master/LICENSE) diff --git a/tools/log_splitter/log_splitter.py b/tools/log_splitter/log_splitter.py new file mode 100644 index 000000000..e433b3ee1 --- /dev/null +++ b/tools/log_splitter/log_splitter.py @@ -0,0 +1,100 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# Imports needed by the script. +import argparse +import os +import sys + + +def check_and_create_dir_if_not_present(directory): + """ + A function to check and create directory if not present + + Args: + directory(str): Directory to be created if not present + + Retuns: + bool: True if successful else False + """ + if not os.path.isdir(directory): + cmd = "mkdir -p {}".format(directory) + ret = os.system(cmd) + if ret: + return False + print("[INFO]: Dir created successfully") + else: + print("[INFO]: The dir already exists") + return True + + +def main(): + """ + Main function of the tool. + """ + # Setting up command line arguments. + parser = argparse.ArgumentParser( + description="Tool to split glusto logs to individual testcase logs." + ) + parser.add_argument( + '-f', '--log_file', type=str, dest='log_file', required=True, + help="Glusto test log file") + parser.add_argument( + '-d', '--dist-dir', type=str, default=".", dest="destination_dir", + help="Path were individual test logs are to be stored.") + args = parser.parse_args() + + # Fetching the values from command line. + log_file = args.log_file + destination_dir = args.destination_dir + + # Check and create dir if not present + if not check_and_create_dir_if_not_present(destination_dir): + sys.exit("[ERROR]: Unable to create dir") + + with open(log_file, 'r', encoding="ISO-8859-1") as log_file_fd: + + # Read lines and set flag to check if + # file is open + file_open_flag = False + while True: + line = log_file_fd.readline() + if not line: + break + + # Check if line is starting line. + if '(setUp) Starting Test : ' in line: + if file_open_flag: + file_open_flag = False + + # Open new fd for individual test + # file + filename = line.split(' ')[7] + if destination_dir != '.': + filename = os.path.join(destination_dir, + filename) + file_open_flag = True + + # Write lines to individual test file + if file_open_flag: + with open(filename, 'w') as test_file: + test_file.write(line) + + print("[INFO]: Log file split completed") + + +if __name__ == "__main__": + main() diff --git a/tools/log_splitter/setup.py b/tools/log_splitter/setup.py new file mode 100644 index 000000000..2d922f30a --- /dev/null +++ b/tools/log_splitter/setup.py @@ -0,0 +1,33 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from setuptools import setup + +setup( + name='log_splitter', + version="1.0", + author='Red Hat, Inc.', + author_email='gluster-devel@gluster.org', + url='http://www.gluster.org', + licens="GPLv3+", + description=("Tool to split glusto logs to " + "individual testcase logs."), + py_modules=['log_splitter'], + entry_points=""" + [console_scripts] + log_splitter = log_splitter:main + """ +) diff --git a/tools/verify_test_execution/README.md b/tools/verify_test_execution/README.md new file mode 100644 index 000000000..d78819515 --- /dev/null +++ b/tools/verify_test_execution/README.md @@ -0,0 +1,38 @@ +# verify_test_execution +This tool verifies the stability of a given set of testcase(s) by executing it +consecutively for a pre-defined number of times. This ensures that the written +code is stable and also helps the user to identify unexpected failures or errors +that may arise while executing it multiple times. It also checks the given code +for any pylint/flake8 issues. + +## Prerequisites +Python 3.x + +To use this you need to have a valid glusto-tests config file + +## Usage +- Download the project files from github. + + ``` + # git clone https://github.com/gluster/glusto-tests.git + ``` +- Change directory to the project directory. + ``` + # cd glusto-tests/tool/verify_test_execution/ + ``` +- To get help run: + ``` + # python3 verify_test_execution.py --help + ``` +- To run the test(s): + ``` + # python3 verify_test_execution.py --config < Config file> --test <test_path> + ``` + +If you wish to specify the commands for flake8 and pylint (optional) use +`--flake8 <flake8 cmd> `and `--pylint <pylint command>` arguments. +Also, use `--iterations` to specify the number of times the test(s) +should be run (by default its 5) eg. `-- iterations 10 ` + +## License +GPLv3 diff --git a/tools/verify_test_execution/verify_test_execution.py b/tools/verify_test_execution/verify_test_execution.py new file mode 100644 index 000000000..1cfc3143e --- /dev/null +++ b/tools/verify_test_execution/verify_test_execution.py @@ -0,0 +1,157 @@ +# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY :or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import argparse +import subprocess +import sys +from datetime import datetime + + +class TestVerify: + + def __init__(self): + self.parser = argparse.ArgumentParser() + self.config = None + self.test_path = None + self.test_run_count = None + self.pre_run_check = False + iter_value_default = 5 + flake8_default = "flake8" + pylint_default = "pylint" + # Set the arguments + self.parser.add_argument('-c', '--config', + help='Path to config file', + required=True) + self.parser.add_argument('-t', '--test', + help='path to test file/folder', + required=True) + self.parser.add_argument('-f', '--flake8', + default=flake8_default, + help='command to invoke flake8 ' + '(by default <flake8 path_to_py_file>)') + self.parser.add_argument('-p', '--pylint', + default=pylint_default, + help='command to invoke pylint ' + '(by default <pylint path_to_py_file>)') + self.parser.add_argument('-i', '--iterations', + type=int, + default=iter_value_default, + help='Iterations to runs the tests ' + '(by default its 5)') + args = self.parser.parse_args() + + # Get config file path + self.config = args.config + + # Get test file or folder + self.test_path = args.test + + # Get the pylint command + self.pylint_cmd = args.pylint + + # Get the falke8 command + self.flake8_cmd = args.flake8 + + # Get the iteration count + self.test_run_count = args.iterations + + # Verify flake8 + self.verify_flake8() + + # Verify Pylint + self.verify_pylint() + + # Verify test run for user defined number of times + self.execute_tests() + + def verify_pylint(self): + """ + Verifies the given file has pylint issues or not. + In case the path given for the test to execute is a folder, the pylint + command returns all the issues in all the files present in the folder. + Verifies the return code of pylint. + """ + print("o Pylint Verification:") + result = subprocess.run([self.pylint_cmd, self.test_path], + stdout=subprocess.PIPE) + if result.returncode != 0: + self._print_error(result.stdout) + print("\t Pylint validation failed") + self.pre_run_check = False + else: + print("\t Pylint validation successful") + self.pre_run_check = True + + def verify_flake8(self): + """ + Verifies the given file for falke8 issues. Executes the flake8 command + and verifies the return code. + """ + print("o Flake8 Verification:") + result = subprocess.run([self.flake8_cmd, self.test_path], + stdout=subprocess.PIPE) + if result.returncode != 0: + self._print_error(result.stdout) + sys.exit("[ERROR]: Flake8 validation Failed") + print("\t Flake8 validation successful") + + def execute_tests(self): + """ + Runs the given test for user defined number of times. + """ + start_time = datetime.now() + if not self.pre_run_check: + print("========= WARNING =========") + decision = input("There were some errors in the pre-check for " + "the given code. It is advised to fix all those " + "issues and the start executing the tests. To " + "continue to test execution press Y. To exit " + "press any other key : ") + if decision.lower() != "y": + sys.exit("[ERROR]: Aborted by user") + cmd = ("glusto -c '{config_path}' --pytest='-v -x {test_path}'" + .format(config_path=self.config, test_path=self.test_path)) + print("\no Run Tests") + print("\t ==>[ ", cmd, " ]") + for counter in range(1, self.test_run_count+1): + print("\t Iteration : %s" % counter) + process = subprocess.Popen(cmd, shell=True, + stdout=subprocess.PIPE) + process.wait() + if process.returncode != 0: + self._print_error(process.stdout.read()) + sys.exit("[ERROR]: Test Execution Failed") + print("\n\t\t Status : PASS") + end_time = datetime.now() + print("[INFO] : Test Execution succeeded") + print("\t Test : {test_name}".format(test_name=self.test_path)) + print("\t Iterations : {iter}".format(iter=str(self.test_run_count))) + print("\t Completed in {time}".format(time=str(end_time-start_time))) + + @staticmethod + def _print_error(err): + """ + Prints the error from the stdout + """ + print("\t [Error] \n\t", "-" * 100) + output = err.decode("utf-8").split("\n") + for line in output: + if line: + print("\t", str(line)) + print("\t", "-" * 100) + + +if __name__ == "__main__": + TestVerify() |