summaryrefslogtreecommitdiffstats
path: root/glustolibs-io/glustolibs/io
diff options
context:
space:
mode:
Diffstat (limited to 'glustolibs-io/glustolibs/io')
-rw-r--r--glustolibs-io/glustolibs/io/memory_and_cpu_utils.py924
-rwxr-xr-xglustolibs-io/glustolibs/io/utils.py389
2 files changed, 1253 insertions, 60 deletions
diff --git a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
new file mode 100644
index 000000000..4e1dadbd7
--- /dev/null
+++ b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
@@ -0,0 +1,924 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.volume_ops import get_volume_status
+from glustolibs.gluster.glusterfile import file_exists
+from glustolibs.misc.misc_libs import upload_scripts, kill_process
+
+import numpy as np
+import pandas as pd
+from statistics import mean, median
+
+
+def check_upload_memory_and_cpu_logger_script(servers):
+ """Check and upload memory_and_cpu_logger.py to servers if not present
+
+ Args:
+ servers(list): List of all servers where script has to be uploaded
+
+ Returns:
+ bool: True if script is uploaded successfully else false
+ """
+ script = "/usr/share/glustolibs/io/scripts/memory_and_cpu_logger.py"
+ is_present = []
+ for server in servers:
+ if not file_exists(server, script):
+ if not upload_scripts(server, script):
+ g.log.error("Unable to upload memory_and_cpu_logger.py on %s",
+ server)
+ is_present.append(False)
+ else:
+ is_present.append(True)
+ return all(is_present)
+
+
+def _start_logging_processes(process, servers, test_name, interval, count):
+ """Start logging processes on all nodes for a given process
+
+ Args:
+ servers(list): Servers on which CPU and memory usage has to be logged
+ test_name(str): Name of testcase for which logs are to be collected
+ interval(int): Time interval after which logs are to be collected
+ count(int): Number of samples to be captured
+
+ Returns:
+ list: A list of logging processes
+ """
+ cmd = ("/usr/bin/env python "
+ "/usr/share/glustolibs/io/scripts/memory_and_cpu_logger.py"
+ " -p %s -t %s -i %d -c %d" % (process, test_name,
+ interval, count))
+ logging_process = []
+ for server in servers:
+ proc = g.run_async(server, cmd)
+ logging_process.append(proc)
+ return logging_process
+
+
+def log_memory_and_cpu_usage_on_servers(servers, test_name, interval=60,
+ count=100):
+ """Log memory and CPU usage of gluster server processes
+
+ Args:
+ servers(list): Servers on which CPU and memory usage has to be logged
+ test_name(str): Name of the testcase for which logs are to be collected
+
+ Kwargs:
+ interval(int): Time interval after which logs are to be collected
+ (Default:60)
+ count(int): Number of samples to be captured (Default:100)
+
+ Returns:
+ dict: Logging processes dict for all gluster server processes
+ """
+ logging_process_dict = {}
+ for proc_name in ('glusterd', 'glusterfs', 'glusterfsd'):
+ logging_procs = _start_logging_processes(
+ proc_name, servers, test_name, interval, count)
+ logging_process_dict[proc_name] = logging_procs
+ return logging_process_dict
+
+
+def log_memory_and_cpu_usage_on_clients(servers, test_name, interval=60,
+ count=100):
+ """Log memory and CPU usage of gluster client processes
+
+ Args:
+ servers(list): Clients on which CPU and memory usage has to be logged
+ test_name(str): Name of testcase for which logs are to be collected
+
+ Kwargs:
+ interval(int): Time interval after which logs are to be collected
+ (Defaults:60)
+ count(int): Number of samples to be captured (Default:100)
+
+ Returns:
+ dict: Logging processes dict for all gluster client processes
+ """
+ logging_process_dict = {}
+ logging_procs = _start_logging_processes(
+ 'glusterfs', servers, test_name, interval, count)
+ logging_process_dict['glusterfs'] = logging_procs
+ return logging_process_dict
+
+
+def log_memory_and_cpu_usage_on_cluster(servers, clients, test_name,
+ interval=60, count=100):
+ """Log memory and CPU usage on gluster cluster
+
+ Args:
+ servers(list): Servers on which memory and CPU usage is to be logged
+ clients(list): Clients on which memory and CPU usage is to be logged
+ test_name(str): Name of testcase for which logs are to be collected
+
+ Kwargs:
+ interval(int): Time interval after which logs are to be collected
+ (Default:60)
+ count(int): Number of samples to be captured (Default:100)
+
+ Returns:
+ dict: Logging processes dict for all servers and clients
+ """
+ # Start logging on all servers
+ server_logging_processes = log_memory_and_cpu_usage_on_servers(
+ servers, test_name, interval, count)
+ if not server_logging_processes:
+ return {}
+
+ # Starting logging on all clients
+ client_logging_processes = log_memory_and_cpu_usage_on_clients(
+ clients, test_name, interval, count)
+ if not client_logging_processes:
+ return {}
+
+ # Combining dicts
+ logging_process_dict = {}
+ for node_type, proc_dict in (('server', server_logging_processes),
+ ('client', client_logging_processes)):
+ logging_process_dict[node_type] = {}
+ for proc in proc_dict:
+ logging_process_dict[node_type][proc] = (
+ proc_dict[proc])
+ return logging_process_dict
+
+
+def _process_wait_flag_append(proc, flag):
+ """Run async communicate and adds true to flag list"""
+ # If the process is already completed async_communicate()
+ # throws a ValueError
+ try:
+ proc.async_communicate()
+ flag.append(True)
+ except ValueError:
+ flag.append(True)
+
+
+def wait_for_logging_processes_to_stop(proc_dict, cluster=False):
+ """Wait for all given logging processes to stop
+
+ Args:
+ proc_dict(dict): Dictionary of all the active logging processes
+
+ Kwargs:
+ cluster(bool): True if proc_dict is for the entire cluster else False
+ (Default:False)
+
+ Retruns:
+ bool: True if processes are completed else False
+ """
+ flag = []
+ if cluster:
+ for sub_dict in proc_dict:
+ for proc_name in proc_dict[sub_dict]:
+ for proc in proc_dict[sub_dict][proc_name]:
+ _process_wait_flag_append(proc, flag)
+ else:
+ for proc_name in proc_dict:
+ for proc in proc_dict[proc_name]:
+ _process_wait_flag_append(proc, flag)
+ return all(flag)
+
+
+def kill_all_logging_processes(proc_dict, nodes, cluster=False):
+ """Kill logging processes on all given nodes
+
+ Args:
+ proc_dict(dict): Dictonary of all active logging processes
+ nodes(list): List of nodes where logging has to be stopped
+
+ Kwargs:
+ cluster(bool): True if proc_dict is for a full cluster else False
+ (Default:False)
+
+ Retruns:
+ bool: True if processes are completed else False
+ """
+ # Kill all logging processes
+ for server in nodes:
+ if not kill_process(server, process_names='memory_and_cpu_logger.py'):
+ g.log.error("Unable to kill some of the processes at %s.", server)
+
+ # This will stop the async threads created by run_aysnc() as the proc is
+ # already killed.
+ ret = wait_for_logging_processes_to_stop(proc_dict, cluster)
+ if ret:
+ return True
+ return False
+
+
+def create_dataframe_from_csv(node, proc_name, test_name):
+ """Creates a dataframe from a given process.
+
+ Args:
+ node(str): Node from which csv is to be picked
+ proc_name(str): Name of process for which csv is to picked
+ test_name(str): Name of the testcase for which CSV
+
+ Returns:
+ dataframe: Pandas dataframe if CSV file exits else None
+ """
+ # Read the csv file generated by memory_and_cpu_logger.py
+ ret, raw_data, _ = g.run(node, "cat /root/{}.csv"
+ .format(proc_name))
+ if ret:
+ return None
+
+ # Split the complete dump to individual lines
+ data = raw_data.split("\r\n")
+ rows, flag = [], False
+ for line in data:
+ values = line.split(',')
+ if test_name == values[0]:
+ # Reset rows if it's the second instance
+ if flag:
+ rows = []
+ flag = True
+ continue
+
+ # Pick and append values which have complete entry
+ if flag and len(values) == 4:
+ rows.append(values)
+
+ # Create a panda dataframe and set the type for columns
+ dataframe = pd.DataFrame(rows[1:], columns=rows[0])
+ conversion_dict = {'Process ID': int,
+ 'CPU Usage': float,
+ 'Memory Usage': float}
+ dataframe = dataframe.astype(conversion_dict)
+ return dataframe
+
+
+def _get_min_max_mean_median(entrylist):
+ """Get the mix, max. mean and median of a list
+
+ Args:
+ entrylist(list): List of values to be used
+
+ Returns:
+ dict:Result dict generate from list
+ """
+ result = {}
+ result['Min'] = min(entrylist)
+ result['Max'] = max(entrylist)
+ result['Mean'] = mean(entrylist)
+ result['Median'] = median(entrylist)
+ return result
+
+
+def _compute_min_max_mean_median(dataframe, data_dict, process, node,
+ volume=None, brick=None):
+ """Compute min, max, mean and median for a given process
+
+ Args:
+ dataframe(panda dataframe): Panda data frame of the csv file
+ data_dict(dict): data dict to which info is to be added
+ process(str): Name of process for which data is to be computed
+ node(str): Node for which min, max, mean and median has to be computed
+
+ Kwargs:
+ volume(str): Volume name of the volume for which data is to be computed
+ brick(str): Brick path of the brick for which data is to be computed
+ """
+ if volume and process == 'glusterfs':
+ # Create subdict inside dict
+ data_dict[node][process][volume] = {}
+ for usage in ('CPU Usage', 'Memory Usage'):
+ # Create usage subdict
+ data_dict[node][process][volume][usage] = {}
+
+ # Clean data and compute values
+ cleaned_usage = list(dataframe[usage].dropna())
+ out = _get_min_max_mean_median(cleaned_usage)
+
+ # Add values to data_dict
+ for key in ('Min', 'Max', 'Mean', 'Median'):
+ data_dict[node][process][volume][usage][key] = out[key]
+
+ if volume and brick and process == 'glusterfsd':
+ # Create subdict inside dict
+ data_dict[node][process][volume] = {}
+ data_dict[node][process][volume][brick] = {}
+ for usage in ('CPU Usage', 'Memory Usage'):
+ # Create usage subdict
+ data_dict[node][process][volume][brick][usage] = {}
+
+ # Clean data and compute values
+ cleaned_usage = list(dataframe[usage].dropna())
+ out = _get_min_max_mean_median(cleaned_usage)
+
+ # Add values to data_dict
+ for key in ('Min', 'Max', 'Mean', 'Median'):
+ data_dict[node][process][volume][brick][usage][key] = out[key]
+
+ # Compute CPU Uage and Memory Usage for glusterd
+ else:
+ for usage in ('CPU Usage', 'Memory Usage'):
+ # Create uage subdict
+ data_dict[node][process][usage] = {}
+
+ # Clean data and compute value
+ cleaned_usage = list(dataframe[usage].dropna())
+ out = _get_min_max_mean_median(cleaned_usage)
+
+ # Add values to data_dict
+ for key in ('Min', 'Max', 'Mean', 'Median'):
+ data_dict[node][process][usage][key] = out[key]
+
+
+def compute_data_usage_stats_on_servers(nodes, test_name):
+ """Compute min, max, mean and median for servers
+
+ Args:
+ nodes(list): Servers from which data is to be used to compute min, max
+ , mean, mode and median
+ test_name(str): Name of testcase for which data has to be processed
+
+ Returns:
+ dict: dict of min, max, mean and median for a given process
+
+ NOTE:
+ This function has to be always run before cleanup.
+ """
+ data_dict = {}
+ for node in nodes:
+ # Get the volume status on the node
+ volume_status = get_volume_status(node)
+ data_dict[node] = {}
+ for process in ('glusterd', 'glusterfs', 'glusterfsd'):
+
+ # Generate a dataframe from the csv file
+ dataframe = create_dataframe_from_csv(node, process, test_name)
+ if dataframe.empty:
+ return {}
+
+ data_dict[node][process] = {}
+ if process == 'glusterd':
+ # Checking if glusterd is restarted.
+ if len(set(dataframe['Process ID'])) > 1:
+ data_dict[node][process]['is_restarted'] = True
+ else:
+ data_dict[node][process]['is_restarted'] = False
+
+ # Call function to compute min, max, mean and median
+ _compute_min_max_mean_median(dataframe, data_dict, process,
+ node)
+ continue
+
+ # Map volumes to volume process
+ for volume in volume_status.keys():
+ for proc in volume_status[volume][node].keys():
+ if (proc == 'Self-heal Daemon' and process == 'glusterfs'):
+ # Fetching pid from volume status output and create a
+ # dataframe with the entries of only that pid
+ pid = volume_status[volume][node][proc]['pid']
+ proc_dataframe = dataframe[
+ dataframe['Process ID'] == pid]
+
+ # Call function to compute min, max, mean
+ # and median
+ _compute_min_max_mean_median(
+ proc_dataframe, data_dict, process, node, volume)
+
+ if (proc.count('/') >= 2 and process == 'glusterfsd'):
+ # Fetching pid from volume status output and create a
+ # dataframe with the entries of only that pid
+ pid = volume_status[volume][node][proc]['pid']
+ proc_dataframe = dataframe[
+ dataframe['Process ID'] == pid]
+
+ # Call function to compute min, max, mean and median
+ _compute_min_max_mean_median(
+ proc_dataframe, data_dict, process, node, volume,
+ proc)
+
+ return data_dict
+
+
+def compute_data_usage_stats_on_clients(nodes, test_name):
+ """Compute min, max, mean and median for clients
+
+ Args:
+ nodes(list): Clients from which data is to be used to compute min, max
+ , mean, mode and median
+ test_name(str): Name of the testcase for which data has to be processed
+
+ Returns:
+ dict: dict of min, max, mean and median for a given process
+ """
+ data_dict = {}
+ for node in nodes:
+ data_dict[node] = {}
+ dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+ if dataframe.empty:
+ return {}
+
+ data_dict[node]['glusterfs'] = {}
+ # Call function to compute min, max, mean and median
+ _compute_min_max_mean_median(dataframe, data_dict, 'glusterfs', node)
+
+ return data_dict
+
+
+def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
+ volume_status=None,
+ volume=None,
+ vol_name=None):
+ """Perform three point check
+
+ Args:
+ dataframe(panda dataframe): Panda dataframe of a given process
+ node(str): Node on which memory leak has to be checked
+ process(str): Name of process for which check has to be done
+ gain(float): Accepted amount of leak for a given testcase in MB
+
+ kwargs:
+ volume_status(dict): Volume status output on the give name
+ volumne(str):Name of volume for which 3 point check has to be done
+ vol_name(str): Name of volume process according to volume status
+
+ Returns:
+ bool: True if memory leak instances are observed else False
+ """
+ # Filter dataframe to be process wise if it's volume specific process
+ if process in ('glusterfs', 'glusterfsd'):
+ if process == 'glusterfs' and vol_name:
+ pid = int(volume_status[volume][node][vol_name]['pid'])
+ dataframe = dataframe[dataframe['Process ID'] == pid]
+
+ # Compute usage gain throught the data frame
+ memory_increments = list(dataframe['Memory Usage'].diff().dropna())
+
+ # Check if usage is more than accepted amount of leak
+ memory_leak_decision_array = np.where(
+ dataframe['Memory Usage'].diff().dropna() > gain, True, False)
+ instances_of_leak = np.where(memory_leak_decision_array)[0]
+
+ # If memory leak instances are present check if it's reduced
+ count_of_leak_instances = len(instances_of_leak)
+ if count_of_leak_instances > 0:
+ g.log.error('There are %s instances of memory leaks on node %s',
+ count_of_leak_instances, node)
+ for instance in instances_of_leak:
+ # In cases of last log file entry the below op could throw
+ # IndexError which is handled as below.
+ try:
+ # Check if memory gain had decrease in the consecutive
+ # entries, after 2 entry and betwen current and last entry
+ if all([memory_increments[instance+1] >
+ memory_increments[instance],
+ memory_increments[instance+2] >
+ memory_increments[instance],
+ (memory_increments[len(memory_increments)-1] >
+ memory_increments[instance])]):
+ return True
+
+ except IndexError:
+ # In case of last log file entry rerun the command
+ # and check for difference
+ g.log.info('Instance at last log entry.')
+ if process in ('glusterfs', 'glusterfsd'):
+ cmd = ("ps u -p %s | awk 'NR>1 && $11~/%s$/{print "
+ " $6/1024}'" % (pid, process))
+ else:
+ cmd = ("ps u -p `pgrep glusterd` | awk 'NR>1 && $11~/"
+ "glusterd$/{print $6/1024}'")
+ ret, out, _ = g.run(node, cmd)
+ if ret:
+ g.log.error('Unable to run the command to fetch current '
+ 'memory utilization.')
+ continue
+ usage_now = float(out.replace('\n', '')[2])
+ last_entry = dataframe['Memory Usage'].iloc[-1]
+
+ # Check if current memory usage is higher than last entry
+ fresh_diff = last_entry - usage_now
+ if fresh_diff > gain and last_entry > fresh_diff:
+ return True
+ return False
+
+
+def check_for_memory_leaks_in_glusterd(nodes, test_name, gain=30.0):
+ """Check for memory leaks in glusterd
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ gain(float): Accepted amount of leak for a given testcase in MB
+ (Default:30)
+
+ Returns:
+ bool: True if memory leak was obsevred else False
+ """
+ is_there_a_leak = []
+ for node in nodes:
+ dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
+ if dataframe.empty:
+ return False
+
+ # Call 3 point check function
+ three_point_check = _perform_three_point_check_for_memory_leak(
+ dataframe, node, 'glusterd', gain)
+ if three_point_check:
+ g.log.error("Memory leak observed on node %s in glusterd",
+ node)
+ is_there_a_leak.append(three_point_check)
+
+ return any(is_there_a_leak)
+
+
+def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
+ """Check for memory leaks in glusterfs
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ gain(float): Accepted amount of leak for a given testcase in MB
+ (Default:30)
+
+ Returns:
+ bool: True if memory leak was obsevred else False
+
+ NOTE:
+ This function should be executed with the volumes present on the cluster
+ """
+ is_there_a_leak = []
+ for node in nodes:
+ # Get the volume status on the node
+ volume_status = get_volume_status(node)
+ dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+ if dataframe.empty:
+ return False
+
+ for volume in volume_status.keys():
+ for process in volume_status[volume][node].keys():
+ # Skiping if process isn't Self-heal Deamon
+ if process != 'Self-heal Daemon':
+ continue
+
+ # Call 3 point check function
+ three_point_check = _perform_three_point_check_for_memory_leak(
+ dataframe, node, 'glusterfs', gain, volume_status, volume,
+ 'Self-heal Daemon')
+ if three_point_check:
+ g.log.error("Memory leak observed on node %s in shd "
+ "on volume %s", node, volume)
+ is_there_a_leak.append(three_point_check)
+
+ return any(is_there_a_leak)
+
+
+def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
+ """Check for memory leaks in glusterfsd
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ gain(float): Accepted amount of leak for a given testcase in MB
+ (Default:30)
+
+ Returns:
+ bool: True if memory leak was obsevred else False
+
+ NOTE:
+ This function should be executed with the volumes present on the cluster.
+ """
+ is_there_a_leak = []
+ for node in nodes:
+ # Get the volume status on the node
+ volume_status = get_volume_status(node)
+ dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
+ if dataframe.empty:
+ return False
+
+ for volume in volume_status.keys():
+ for process in volume_status[volume][node].keys():
+ # Skiping if process isn't brick process
+ if not process.count('/'):
+ continue
+
+ # Call 3 point check function
+ three_point_check = _perform_three_point_check_for_memory_leak(
+ dataframe, node, 'glusterfsd', gain, volume_status, volume,
+ process)
+ if three_point_check:
+ g.log.error("Memory leak observed on node %s in brick "
+ " process for brick %s on volume %s", node,
+ process, volume)
+ is_there_a_leak.append(three_point_check)
+
+ return any(is_there_a_leak)
+
+
+def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
+ """Check for memory leaks in glusterfs fuse
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ gain(float): Accepted amount of leak for a given testcase in MB
+ (Default:30)
+
+ Returns:
+ bool: True if memory leak was observed else False
+
+ NOTE:
+ This function should be executed when the volume is still mounted.
+ """
+ is_there_a_leak = []
+ for node in nodes:
+ # Get the volume status on the node
+ dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+ if dataframe.empty:
+ return False
+
+ # Call 3 point check function
+ three_point_check = _perform_three_point_check_for_memory_leak(
+ dataframe, node, 'glusterfs', gain)
+ if three_point_check:
+ g.log.error("Memory leak observed on node %s for client",
+ node)
+
+ # If I/O is constantly running on Clients the memory
+ # usage spikes up and stays at a point for long.
+ last_entry = dataframe['Memory Usage'].iloc[-1]
+ cmd = ("ps u -p `pidof glusterfs` | "
+ "awk 'NR>1 && $11~/glusterfs$/{print"
+ " $6/1024}'")
+ ret, out, _ = g.run(node, cmd)
+ if ret:
+ g.log.error('Unable to run the command to fetch current '
+ 'memory utilization.')
+ continue
+
+ if float(out) != last_entry:
+ if float(out) > last_entry:
+ is_there_a_leak.append(True)
+ continue
+
+ is_there_a_leak.append(False)
+
+ return any(is_there_a_leak)
+
+
+def _check_for_oom_killers(nodes, process, oom_killer_list):
+ """Checks for OOM killers for a specific process
+
+ Args:
+ nodes(list): Nodes on which OOM killers have to be checked
+ process(str): Process for which OOM killers have to be checked
+ oom_killer_list(list): A list in which the presence of
+ OOM killer has to be noted
+ """
+ cmd = ("grep -i 'killed process' /var/log/messages* "
+ "| grep -w '{}'".format(process))
+ ret_codes = g.run_parallel(nodes, cmd)
+ for key in ret_codes.keys():
+ ret, out, _ = ret_codes[key]
+ if not ret:
+ g.log.error('OOM killer observed on %s for %s', key, process)
+ g.log.error(out)
+ oom_killer_list.append(True)
+ else:
+ oom_killer_list.append(False)
+
+
+def check_for_oom_killers_on_servers(nodes):
+ """Check for OOM killers on servers
+
+ Args:
+ nodes(list): Servers on which OOM kills have to be checked
+
+ Returns:
+ bool: True if OOM killers are present on any server else False
+ """
+ oom_killer_list = []
+ for process in ('glusterfs', 'glusterfsd', 'glusterd'):
+ _check_for_oom_killers(nodes, process, oom_killer_list)
+ return any(oom_killer_list)
+
+
+def check_for_oom_killers_on_clients(nodes):
+ """Check for OOM killers on clients
+
+ Args:
+ nodes(list): Clients on which OOM kills have to be checked
+
+ Returns:
+ bool: True if OOM killers are present on any client else false
+ """
+ oom_killer_list = []
+ _check_for_oom_killers(nodes, 'glusterfs', oom_killer_list)
+ return any(oom_killer_list)
+
+
+def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
+ volume_status=None, volume=None,
+ vol_name=None):
+ """Check for cpu spikes for a given process
+
+ Args:
+ dataframe(panda dataframe): Panda dataframe of a given process
+ node(str): Node on which cpu spikes has to be checked
+ process(str): Name of process for which check has to be done
+ threshold(int): Accepted amount of 100% CPU usage instances
+
+ kwargs:
+ volume_status(dict): Volume status output on the give name
+ volume(str):Name of volume for which check has to be done
+ vol_name(str): Name of volume process according to volume status
+
+ Returns:
+ bool: True if number of instances more than threshold else False
+ """
+ # Filter dataframe to be process wise if it's volume specific process
+ if process in ('glusterfs', 'glusterfsd'):
+ pid = int(volume_status[volume][node][vol_name]['pid'])
+ dataframe = dataframe[dataframe['Process ID'] == pid]
+
+ # Check if usage is more than accepted amount of leak
+ cpu_spike_decision_array = np.where(
+ dataframe['CPU Usage'].dropna() == 100.0, True, False)
+ instances_of_spikes = np.where(cpu_spike_decision_array)[0]
+
+ return bool(len(instances_of_spikes) > threshold)
+
+
+def check_for_cpu_usage_spikes_on_glusterd(nodes, test_name, threshold=3):
+ """Check for CPU usage spikes on glusterd
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ threshold(int): Accepted amount of instances of 100% CPU usage
+ (Default:3)
+
+ Returns:
+ bool: True if CPU spikes are more than threshold else False
+ """
+ is_there_a_spike = []
+ for node in nodes:
+ dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
+ if dataframe.empty:
+ return False
+
+ # Call function to check for cpu spikes
+ cpu_spikes = _check_for_cpu_usage_spikes(
+ dataframe, node, 'glusterd', threshold)
+ if cpu_spikes:
+ g.log.error("CPU usage spikes observed more than "
+ "threshold %d on node %s for glusterd",
+ threshold, node)
+ is_there_a_spike.append(cpu_spikes)
+
+ return any(is_there_a_spike)
+
+
+def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
+ """Check for CPU usage spikes on glusterfs
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ threshold(int): Accepted amount of instances of 100% CPU usage
+ (Default:3)
+
+ Returns:
+ bool: True if CPU spikes are more than threshold else False
+
+ NOTE:
+ This function should be exuected with the volumes present on the cluster.
+ """
+ is_there_a_spike = []
+ for node in nodes:
+ # Get the volume status on the node
+ volume_status = get_volume_status(node)
+ dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+ if dataframe.empty:
+ return False
+
+ for volume in volume_status.keys():
+ for process in volume_status[volume][node].keys():
+ # Skiping if process isn't Self-heal Deamon
+ if process != 'Self-heal Daemon':
+ continue
+
+ # Call function to check for cpu spikes
+ cpu_spikes = _check_for_cpu_usage_spikes(
+ dataframe, node, 'glusterfs', threshold, volume_status,
+ volume, 'Self-heal Daemon')
+ if cpu_spikes:
+ g.log.error("CPU usage spikes observed more than "
+ "threshold %d on node %s on volume %s for shd",
+ threshold, node, volume)
+ is_there_a_spike.append(cpu_spikes)
+
+ return any(is_there_a_spike)
+
+
+def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
+ """Check for CPU usage spikes in glusterfsd
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ threshold(int): Accepted amount of instances of 100% CPU usage
+ (Default:3)
+
+ Returns:
+ bool: True if CPU spikes are more than threshold else False
+
+ NOTE:
+ This function should be exuected with the volumes present on the cluster.
+ """
+ is_there_a_spike = []
+ for node in nodes:
+ # Get the volume status on the node
+ volume_status = get_volume_status(node)
+ dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
+ if dataframe.empty:
+ return False
+
+ for volume in volume_status.keys():
+ for process in volume_status[volume][node].keys():
+ # Skiping if process isn't brick process
+ if process in ('Self-heal Daemon', 'Quota Daemon'):
+ continue
+
+ # Call function to check for cpu spikes
+ cpu_spikes = _check_for_cpu_usage_spikes(
+ dataframe, node, 'glusterfsd', threshold, volume_status,
+ volume, process)
+ if cpu_spikes:
+ g.log.error("CPU usage spikes observed more than "
+ "threshold %d on node %s on volume %s for "
+ "brick process %s",
+ threshold, node, volume, process)
+ is_there_a_spike.append(cpu_spikes)
+
+ return any(is_there_a_spike)
+
+
+def check_for_cpu_usage_spikes_on_glusterfs_fuse(nodes, test_name,
+ threshold=3):
+ """Check for CPU usage spikes on glusterfs fuse
+
+ Args:
+ nodes(list): Servers on which memory leaks have to be checked
+ test_name(str): Name of testcase for which memory leaks has to be checked
+
+ Kwargs:
+ threshold(int): Accepted amount of instances of 100% CPU usage
+ (Default:3)
+
+ Returns:
+ bool: True if CPU spikes are more than threshold else False
+
+ NOTE:
+ This function should be executed when the volume is still mounted.
+ """
+ is_there_a_spike = []
+ for node in nodes:
+ # Get the volume status on the node
+ dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+ if dataframe.empty:
+ return False
+
+ # Call function to check for cpu spikes
+ cpu_spikes = _check_for_cpu_usage_spikes(
+ dataframe, node, 'glusterfs', threshold)
+ if cpu_spikes:
+ g.log.error("CPU usage spikes observed more than "
+ "threshold %d on node %s for client",
+ threshold, node)
+ is_there_a_spike.append(cpu_spikes)
+
+ return any(is_there_a_spike)
diff --git a/glustolibs-io/glustolibs/io/utils.py b/glustolibs-io/glustolibs/io/utils.py
index 52c2c7df0..16ee93f21 100755
--- a/glustolibs-io/glustolibs/io/utils.py
+++ b/glustolibs-io/glustolibs/io/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+# Copyright (C) 2015-2020 Red Hat, Inc. <http://www.redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -17,20 +17,26 @@
"""
Description: Helper library for io modules.
"""
+from multiprocessing import Pool
import os
import subprocess
+
from glusto.core import Glusto as g
+from glustolibs.gluster.glusterfile import file_exists
from glustolibs.gluster.mount_ops import GlusterMount
-from multiprocessing import Pool
from glustolibs.gluster.volume_libs import get_subvols
+from glustolibs.misc.misc_libs import upload_scripts
-def collect_mounts_arequal(mounts):
+def collect_mounts_arequal(mounts, path=''):
"""Collects arequal from all the mounts
Args:
mounts (list): List of all GlusterMount objs.
+ Kwargs:
+ path (str): Path whose arequal is to be calculated.
+ Defaults to root of mountpoint
Returns:
tuple(bool, list):
On success returns (True, list of arequal-checksums of each mount)
@@ -45,11 +51,11 @@ def collect_mounts_arequal(mounts):
g.log.info("Start collecting arequal-checksum from all mounts")
all_mounts_procs = []
for mount_obj in mounts:
+ total_path = os.path.join(mount_obj.mountpoint, path)
g.log.info("arequal-checksum of mount %s:%s", mount_obj.client_system,
- mount_obj.mountpoint)
- cmd = "arequal-checksum -p %s -i .trashcan" % mount_obj.mountpoint
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
+ total_path)
+ cmd = "arequal-checksum -p %s -i .trashcan" % total_path
+ proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
all_mounts_procs.append(proc)
all_mounts_arequal_checksums = []
_rc = True
@@ -68,7 +74,7 @@ def collect_mounts_arequal(mounts):
def log_mounts_info(mounts):
- """Logs mount information like df, stat, ls
+ """Log mount information like df, stat, ls
Args:
mounts (list): List of all GlusterMount objs.
@@ -83,22 +89,22 @@ def log_mounts_info(mounts):
# Mount Info
g.log.info("Look For Mountpoint:\n")
cmd = "mount | grep %s" % mount_obj.mountpoint
- _, _, _ = g.run(mount_obj.client_system, cmd)
+ g.run(mount_obj.client_system, cmd)
# Disk Space Usage
g.log.info("Disk Space Usage Of Mountpoint:\n")
cmd = "df -h %s" % mount_obj.mountpoint
- _, _, _ = g.run(mount_obj.client_system, cmd)
+ g.run(mount_obj.client_system, cmd)
# Long list the mountpoint
g.log.info("List Mountpoint Entries:\n")
cmd = "ls -ld %s" % mount_obj.mountpoint
- _, _, _ = g.run(mount_obj.client_system, cmd)
+ g.run(mount_obj.client_system, cmd)
# Stat mountpoint
g.log.info("Mountpoint Status:\n")
cmd = "stat %s" % mount_obj.mountpoint
- _, _, _ = g.run(mount_obj.client_system, cmd)
+ g.run(mount_obj.client_system, cmd)
def get_mounts_stat(mounts):
@@ -119,9 +125,8 @@ def get_mounts_stat(mounts):
for mount_obj in mounts:
g.log.info("Stat of mount %s:%s", mount_obj.client_system,
mount_obj.mountpoint)
- cmd = ("find %s | xargs stat" % (mount_obj.mountpoint))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
+ cmd = "find %s | xargs stat" % (mount_obj.mountpoint)
+ proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
all_mounts_procs.append(proc)
_rc = True
for i, proc in enumerate(all_mounts_procs):
@@ -157,7 +162,7 @@ def list_all_files_and_dirs_mounts(mounts):
for mount_obj in mounts:
g.log.info("Listing files and dirs on %s:%s", mount_obj.client_system,
mount_obj.mountpoint)
- cmd = ("find %s | grep -ve '%s'" % (mount_obj.mountpoint, ignore_dirs))
+ cmd = "find %s | grep -ve '%s'" % (mount_obj.mountpoint, ignore_dirs)
proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
all_mounts_procs.append(proc)
_rc = True
@@ -194,7 +199,7 @@ def view_snaps_from_mount(mounts, snaps):
for mount_obj in mounts:
g.log.info("Viewing '.snaps' on %s:%s", mount_obj.client_system,
mount_obj.mountpoint)
- cmd = ("ls -1 %s/.snaps" % mount_obj.mountpoint)
+ cmd = "ls -1 %s/.snaps" % mount_obj.mountpoint
proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
all_mounts_procs.append(proc)
@@ -229,7 +234,7 @@ def view_snaps_from_mount(mounts, snaps):
def validate_io_procs(all_mounts_procs, mounts):
- """Validates whether IO was successful or not
+ """Validate whether IO was successful or not.
Args:
all_mounts_procs (list): List of open connection descriptor as
@@ -316,19 +321,16 @@ def cleanup_mounts(mounts):
for mount_obj in mounts:
g.log.info("Cleaning up data from %s:%s", mount_obj.client_system,
mount_obj.mountpoint)
- if (not mount_obj.mountpoint or
- (os.path.realpath(os.path.abspath(mount_obj.mountpoint))
- == '/')):
+ if (not mount_obj.mountpoint or (os.path.realpath(os.path.abspath(
+ mount_obj.mountpoint)) == '/')):
g.log.error("%s on %s is not a valid mount point",
mount_obj.mountpoint, mount_obj.client_system)
continue
cmd = "rm -rf %s/*" % (mount_obj.mountpoint)
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
+ proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
all_mounts_procs.append(proc)
valid_mounts.append(mount_obj)
- g.log.info("rm -rf on all clients is complete. Validating "
- "deletion now...")
+ g.log.info("rm -rf on all clients is complete. Validating deletion now...")
# Get cleanup status
_rc_rmdir = True
@@ -355,8 +357,7 @@ def cleanup_mounts(mounts):
for mount_obj in mounts:
cmd = ("find %s -mindepth 1 | grep -ve '%s'" %
(mount_obj.mountpoint, ignore_dirs))
- proc = g.run_async(mount_obj.client_system, cmd,
- user=mount_obj.user)
+ proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
all_mounts_procs.append(proc)
# Get cleanup status
@@ -383,8 +384,7 @@ def cleanup_mounts(mounts):
def run_bonnie(servers, directory_to_run, username="root"):
- """
- Module to run bonnie test suite on the given servers.
+ """Run bonnie test suite on the given servers.
Args:
servers (list): servers in which tests to be run.
@@ -459,8 +459,7 @@ def run_bonnie(servers, directory_to_run, username="root"):
def run_fio(servers, directory_to_run):
- """
- Module to run fio test suite on the given servers.
+ """Run fio test suite on the given servers.
Args:
servers (list): servers in which tests to be run.
@@ -536,15 +535,14 @@ def run_fio(servers, directory_to_run):
def run_mixed_io(servers, io_tools, directory_to_run):
- """
- Module to run different io patterns on each given servers.
+ """Run different io patterns on each given servers.
Args:
servers (list): servers in which tests to be run.
io_tools (list): different io tools. Currently fio, bonnie are
- supported.
+ supported.
directory_to_run (list): directory path where tests will run for
- each server.
+ each server.
Returns:
bool: True, if test passes in all servers, False otherwise
@@ -565,8 +563,7 @@ def run_mixed_io(servers, io_tools, directory_to_run):
for items in zip(servers, io_tools):
server_io_dict[items[0]] = items[1]
- io_dict = {'fio': run_fio,
- 'bonnie': run_bonnie}
+ io_dict = {'fio': run_fio, 'bonnie': run_bonnie}
func_list = []
for index, server in enumerate(servers):
@@ -586,8 +583,7 @@ def run_mixed_io(servers, io_tools, directory_to_run):
def is_io_procs_fail_with_rofs(self, all_mounts_procs, mounts):
- """
- Checks whether IO failed with Read-only file system error
+ """Check whether IO failed with Read-only file system error.
Args:
all_mounts_procs (list): List of open connection descriptor as
@@ -619,8 +615,8 @@ def is_io_procs_fail_with_rofs(self, all_mounts_procs, mounts):
g.log.info("EXPECTED : IO Failed on %s:%s",
self.mounts[i].client_system,
self.mounts[i].mountpoint)
- if ("Read-only file system" in err or
- "Read-only file system" in out):
+ if ("Read-only file system" in err
+ or "Read-only file system" in out):
g.log.info("EXPECTED : Read-only file system in output")
io_results[proc] = True
else:
@@ -637,8 +633,7 @@ def is_io_procs_fail_with_rofs(self, all_mounts_procs, mounts):
def is_io_procs_fail_with_error(self, all_mounts_procs, mounts, mount_type):
- """
- Checks whether IO failed with connection error
+ """Check whether IO failed with connection error.
Args:
all_mounts_procs (list): List of open connection descriptor as
@@ -672,8 +667,8 @@ def is_io_procs_fail_with_error(self, all_mounts_procs, mounts, mount_type):
self.mounts[i].client_system,
self.mounts[i].mountpoint)
if mount_type == "glusterfs":
- if ("Transport endpoint is not connected" in err or
- "Transport endpoint is not connected" in out):
+ if ("Transport endpoint is not connected" in err
+ or "Transport endpoint is not connected" in out):
g.log.info("EXPECTED : Transport endpoint is not connected"
" in output")
io_results[proc] = True
@@ -683,8 +678,7 @@ def is_io_procs_fail_with_error(self, all_mounts_procs, mounts, mount_type):
"not found in output")
io_results[proc] = False
if mount_type == "nfs":
- if ("Input/output error" in err or
- "Input/output error" in out):
+ if "Input/output error" in err or "Input/output error" in out:
g.log.info("EXPECTED : Input/output error in output")
io_results[proc] = True
else:
@@ -702,8 +696,7 @@ def is_io_procs_fail_with_error(self, all_mounts_procs, mounts, mount_type):
def compare_dir_structure_mount_with_brick(mnthost, mntloc, brick_list, type):
- """ Compare directory structure from mount point with brick path along
- with stat parameter
+ """Compare mount point dir structure with brick path along with stat param..
Args:
mnthost (str): hostname or ip of mnt system
@@ -725,8 +718,8 @@ def compare_dir_structure_mount_with_brick(mnthost, mntloc, brick_list, type):
if type == 2:
statformat = '%A'
- command = ("find %s -mindepth 1 -type d | xargs -r stat -c '%s'"
- % (mntloc, statformat))
+ command = "find %s -mindepth 1 -type d | xargs -r stat -c '%s'" % (
+ mntloc, statformat)
rcode, rout, _ = g.run(mnthost, command)
all_dir_mnt_perm = rout.strip().split('\n')
@@ -736,7 +729,8 @@ def compare_dir_structure_mount_with_brick(mnthost, mntloc, brick_list, type):
"xargs -r stat -c '%s'" % (brick_path, statformat))
rcode, rout, _ = g.run(brick_node, command)
all_brick_dir_perm = rout.strip().split('\n')
- retval = cmp(all_dir_mnt_perm, all_brick_dir_perm)
+ retval = (all_dir_mnt_perm > all_brick_dir_perm) - (
+ all_dir_mnt_perm < all_brick_dir_perm)
if retval != 0:
return False
@@ -769,8 +763,7 @@ def check_arequal_bricks_replicated(mnode, volname):
subvol_brick_list = subvols_dict['volume_subvols'][i]
node, brick_path = subvol_brick_list[0].split(':')
command = ('arequal-checksum -p %s '
- '-i .glusterfs -i .landfill -i .trashcan'
- % brick_path)
+ '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
ret, arequal, _ = g.run(node, command)
if ret != 0:
g.log.error("Failed to calculate arequal for first brick"
@@ -782,21 +775,297 @@ def check_arequal_bricks_replicated(mnode, volname):
for brick in subvol_brick_list[1:]:
node, brick_path = brick.split(':')
command = ('arequal-checksum -p %s '
- '-i .glusterfs -i .landfill -i .trashcan'
- % brick_path)
+ '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
ret, brick_arequal, _ = g.run(node, command)
if ret != 0:
- g.log.error('Failed to get arequal on brick %s'
- % brick)
+ g.log.error('Failed to get arequal on brick %s' % brick)
return False
g.log.info('Getting arequal for %s is successful', brick)
brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
# compare arequal of first brick of subvol with all brick other
# bricks in subvol
if first_brick_total != brick_total:
- g.log.error('Arequals for subvol and %s are not equal'
- % brick)
+ g.log.error('Arequals for subvol and %s are not equal' % brick)
return False
g.log.info('Arequals for subvol and %s are equal', brick)
g.log.info('All arequals are equal for volume %s', volname)
return True
+
+
+def run_crefi(client, mountpoint, number, breadth, depth, thread=5,
+ random_size=False, fop='create', filetype='text',
+ minfs=10, maxfs=500, single=False, multi=False, size=100,
+ interval=100, nameBytes=10, random_filename=True):
+ """Run crefi on a given mount point and generate I/O.
+
+ Args:
+ client(str): Client on which I/O has to be performed.
+ mountpoint(str): Mount point where the client is mounted.
+ number(int): Number of files to be created.
+ breadth(int): Number of directories in one level.
+ depth(int): Number of levels of directories.
+
+ Kwargs:
+ thread(int): Number of threads used to generate fop.
+ random_size(bool): Random size of the file between min and max.
+ fop(str): fop can be [create|rename|chmod|chown|chgrp|symlink|hardlink|
+ truncate|setxattr] this specifies the type of fop to be
+ executed by default it is create.
+ filetype(str): filetype can be [text|sparse|binary|tar] this specifies
+ the type of file by default it is text.
+ minfs(int): If random is set to true then this value has to be altered
+ to change minimum file size. (Value is in KB)
+ maxfs(int): If random is set to true then this value has to be altered
+ to change maximum file size. (Value is in KB)
+ single(bool): Create files in a single directory.
+ multi(bool): Create files in sub-dir and sub-sub dir.
+ size(int): Size of the files to be created. (Value is in KB)
+ interval(int): Print number files created of interval.
+ nameBytes(int): Number of bytes for filename. (Value is in Bytes)
+ random_filename(bool): It creates files with random names, if set to
+ False it creates files with file name file1,
+ file2 and so on.
+
+ Returns:
+ bool: True if I/O was sucessfully otherwise False.
+
+ NOTE:
+ To use this function it is a prerequisite to have crefi installed
+ on all the clients. Please use the below command to install it:
+ $ pip install crefi
+ $ pip install pyxattr
+ """
+
+ # Checking value of fop.
+ list_of_fops = ["create", "rename", "chmod", "chown", "chgrp", "symlink",
+ "hardlink", "truncate", "setxattr"]
+ if fop not in list_of_fops:
+ g.log.error("fop value is not valid.")
+ return False
+
+ # Checking value of filetype.
+ list_of_filetypes = ["text", "sparse", "binary", "tar"]
+ if filetype not in list_of_filetypes:
+ g.log.error("filetype is not a valid file type.")
+ return False
+
+ # Checking if single and multi both are set to true.
+ if single and multi:
+ g.log.error("single and mutli both can't be true.")
+ return False
+
+ # Checking if file size and random size arguments are given together.
+ if (size > 100 or size < 100) and random_size:
+ g.log.error("Size and Random size can't be used together.")
+ return False
+
+ # Checking if minfs is greater than or equal to maxfs.
+ if random_size and (minfs >= maxfs):
+ g.log.error("minfs shouldn't be greater than or equal to maxfs.")
+ return False
+
+ # Creating basic command.
+ command = "crefi %s -n %s -b %s -d %s " % (
+ mountpoint, number, breadth, depth)
+
+ # Checking thread value and adding it, If it is greater or smaller than 5.
+ if thread > 5 or thread < 5:
+ command = command + ("-T %s " % thread)
+
+ # Checking if random size is true or false.
+ if random_size:
+ command = command + "--random "
+ if minfs > 10 or minfs < 10:
+ command = command + ("--min %s " % minfs)
+ if maxfs > 500 or maxfs < 500:
+ command = command + ("--max %s " % maxfs)
+
+ # Checking fop and adding it if not create.
+ if fop != "create":
+ command = command + ("--fop %s " % fop)
+
+ # Checking if size if greater than or less than 100.
+ if size > 100 or size < 100:
+ command = command + ("--size %s " % size)
+
+ # Checking if single or mutli is true.
+ if single:
+ command = command + "--single "
+ if multi:
+ command = command + "--multi "
+
+ # Checking if random_filename is false.
+ if not random_filename:
+ command = command + "-R "
+
+ # Checking if print interval is greater than or less than 100.
+ if interval > 100 or interval < 100:
+ command = command + ("-I %s " % interval)
+
+ # Checking if name Bytes is greater than or less than 10.
+ if nameBytes > 10 or nameBytes < 10:
+ command = command + ("-l %s " % nameBytes)
+
+ # Checking filetype and setting it if not
+ # text.
+ if filetype != "text":
+ command = command + ("-t %s " % filetype)
+
+ # Running the command on the client node.
+ ret, _, _ = g.run(client, command)
+ if ret:
+ g.log.error("Failed to run crefi on %s." % client)
+ return False
+ return True
+
+
+def run_cthon(mnode, volname, clients, dir_name):
+ """This function runs the cthon test suite.
+
+ Args:
+ mnode (str) : IP of the server exporting the gluster volume.
+ volname (str) : The volume name.
+ clients (list) : List of client machines where
+ the test needs to be run.
+ dir_name (str) : Directory where the repo
+ is cloned.
+
+ Returns:
+ bool : True if the cthon test passes successfully
+ False otherwise.
+ """
+ param_list = ['-b', '-g', '-s', '-l']
+ vers_list = ['4.0', '4.1']
+
+ for client in clients:
+ g.log.info("Running tests on client %s" % client)
+ for vers in vers_list:
+ g.log.info("Running tests on client version %s" % vers)
+ for param in param_list:
+ # Initialising the test_type that will be running
+ if param == '-b':
+ test_type = "Basic"
+ elif param == '-g':
+ test_type = "General"
+ elif param == '-s':
+ test_type = "Special"
+ else:
+ test_type = "Lock"
+ g.log.info("Running %s test" % test_type)
+ cmd = "cd /root/%s; ./server %s -o vers=%s -p %s -N 1 %s;" % (
+ dir_name, param, vers, volname, mnode)
+ ret, _, _ = g.run(client, cmd)
+ if ret:
+ g.log.error("Error with %s test" % test_type)
+ return False
+ else:
+ g.log.info("%s test successfully passed" % test_type)
+ return True
+
+
+def upload_file_dir_ops(clients):
+ """Upload file_dir_ops.py to all the clients.
+
+ Args:
+ clients(list): List of client machines where we need to upload
+ the script.
+
+ Returns:
+ bool: True if script is uploaded successfully
+ False otherwise.
+ """
+
+ g.log.info("Upload io scripts to clients %s for running IO on "
+ "mounts", clients)
+ file_dir_ops_path = ("/usr/share/glustolibs/io/scripts/"
+ "file_dir_ops.py")
+
+ if not upload_scripts(clients, file_dir_ops_path):
+ g.log.error("Failed to upload IO scripts to clients %s" %
+ clients)
+ return False
+
+ g.log.info("Successfully uploaded IO scripts to clients %s",
+ clients)
+ return True
+
+
+def open_file_fd(mountpoint, time, client, start_range=0,
+ end_range=0):
+ """Open FD for a file and write to file.
+
+ Args:
+ mountpoint(str): The mount point where the FD of file is to
+ be opened.
+ time(int): The time to wait after opening an FD.
+ client(str): The client from which FD is to be opened.
+
+ Kwargs:
+ start_range(int): The start range of the open FD.
+ (Default: 0)
+ end_range(int): The end range of the open FD.
+ (Default: 0)
+
+ Returns:
+ proc(object): Returns a process object
+
+ NOTE:
+ Before opening FD, check the currently used fds on the
+ system as only a limited number of fds can be opened on
+ a system at a given time for each process.
+ """
+ if not (start_range and end_range):
+ cmd = ("cd {}; exec 30<> file_openfd ; sleep {};"
+ "echo 'xyz' >&30".format(mountpoint, time))
+ else:
+ cmd = ('cd {}; for i in `seq {} {}`;'
+ ' do eval "exec $i<>file_openfd$i"; sleep {};'
+ ' echo "Write to open FD" >&$i; done'.format(
+ mountpoint, start_range, end_range, time))
+ proc = g.run_async(client, cmd)
+ return proc
+
+
+def run_linux_untar(clients, mountpoint, dirs=('.')):
+ """Run linux kernal untar on a given mount point
+
+ Args:
+ clients(str|list): Client nodes on which I/O
+ has to be started.
+ mountpoint(str): Mount point where the volume is
+ mounted.
+ Kwagrs:
+ dirs(tuple): A tuple of dirs where untar has to
+ started. (Default:('.'))
+ Returns:
+ list: Returns a list of process object else None
+ """
+ # Checking and convering clients to list.
+ if not isinstance(clients, list):
+ clients = [clients]
+
+ list_of_procs = []
+ for client in clients:
+ # Download linux untar to root, so that it can be
+ # utilized in subsequent run_linux_untar() calls.
+ cmd = ("wget https://cdn.kernel.org/pub/linux/kernel/"
+ "v5.x/linux-5.4.54.tar.xz")
+ if not file_exists(client, '/root/linux-5.4.54.tar.xz'):
+ ret, _, _ = g.run(client, cmd)
+ if ret:
+ return None
+
+ for directory in dirs:
+ # copy linux tar to dir
+ cmd = ("cp /root/linux-5.4.54.tar.xz {}/{}"
+ .format(mountpoint, directory))
+ ret, _, _ = g.run(client, cmd)
+ if ret:
+ return None
+ # Start linux untar
+ cmd = ("cd {}/{};tar -xvf linux-5.4.54.tar.xz"
+ .format(mountpoint, directory))
+ proc = g.run_async(client, cmd)
+ list_of_procs.append(proc)
+
+ return list_of_procs