1 files changed, 924 insertions, 0 deletions
diff --git a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
new file mode 100644
index 000000000..4e1dadbd7
--- /dev/null
+++ b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
@@ -0,0 +1,924 @@
+#  Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+
+from glustolibs.gluster.volume_ops import get_volume_status
+from glustolibs.gluster.glusterfile import file_exists
+from glustolibs.misc.misc_libs import upload_scripts, kill_process
+
+import numpy as np
+import pandas as pd
+from statistics import mean, median
+
+
+def check_upload_memory_and_cpu_logger_script(servers):
+    """Check and upload memory_and_cpu_logger.py to servers if not present
+
+    Args:
+     servers(list): List of all servers where script has to be uploaded
+
+    Returns:
+     bool: True if script is uploaded successfully else false
+    """
+    script = "/usr/share/glustolibs/io/scripts/memory_and_cpu_logger.py"
+    is_present = []
+    for server in servers:
+        if not file_exists(server, script):
+            if not upload_scripts(server, script):
+                g.log.error("Unable to upload memory_and_cpu_logger.py on %s",
+                            server)
+                is_present.append(False)
+            else:
+                is_present.append(True)
+    return all(is_present)
+
+
+def _start_logging_processes(process, servers, test_name, interval, count):
+    """Start logging processes on all nodes for a given process
+
+    Args:
+     servers(list): Servers on which CPU and memory usage has to be logged
+     test_name(str): Name of testcase for which logs are to be collected
+     interval(int): Time interval after which logs are to be collected
+     count(int): Number of samples to be captured
+
+    Returns:
+     list: A list of logging processes
+    """
+    cmd = ("/usr/bin/env python "
+           "/usr/share/glustolibs/io/scripts/memory_and_cpu_logger.py"
+           " -p %s -t %s -i %d -c %d" % (process, test_name,
+                                         interval, count))
+    logging_process = []
+    for server in servers:
+        proc = g.run_async(server, cmd)
+        logging_process.append(proc)
+    return logging_process
+
+
+def log_memory_and_cpu_usage_on_servers(servers, test_name, interval=60,
+                                        count=100):
+    """Log memory and CPU usage of gluster server processes
+
+    Args:
+     servers(list): Servers on which CPU and memory usage has to be logged
+     test_name(str): Name of the testcase for which logs are to be collected
+
+    Kwargs:
+     interval(int): Time interval after which logs are to be collected
+                    (Default:60)
+     count(int): Number of samples to be captured (Default:100)
+
+    Returns:
+     dict: Logging processes dict for all gluster server processes
+    """
+    logging_process_dict = {}
+    for proc_name in ('glusterd', 'glusterfs', 'glusterfsd'):
+        logging_procs = _start_logging_processes(
+            proc_name, servers, test_name, interval, count)
+        logging_process_dict[proc_name] = logging_procs
+    return logging_process_dict
+
+
+def log_memory_and_cpu_usage_on_clients(servers, test_name, interval=60,
+                                        count=100):
+    """Log memory and CPU usage of gluster client processes
+
+    Args:
+     servers(list): Clients on which CPU and memory usage has to be logged
+     test_name(str): Name of testcase for which logs are to be collected
+
+    Kwargs:
+     interval(int): Time interval after which logs are to be collected
+                    (Defaults:60)
+     count(int): Number of samples to be captured (Default:100)
+
+    Returns:
+     dict: Logging processes dict for all gluster client processes
+    """
+    logging_process_dict = {}
+    logging_procs = _start_logging_processes(
+        'glusterfs', servers, test_name, interval, count)
+    logging_process_dict['glusterfs'] = logging_procs
+    return logging_process_dict
+
+
+def log_memory_and_cpu_usage_on_cluster(servers, clients, test_name,
+                                        interval=60, count=100):
+    """Log memory and CPU usage on gluster cluster
+
+    Args:
+     servers(list): Servers on which memory and CPU usage is to be logged
+     clients(list): Clients on which memory and CPU usage is to be logged
+     test_name(str): Name of testcase for which logs are to be collected
+
+    Kwargs:
+     interval(int): Time interval after which logs are to be collected
+                    (Default:60)
+     count(int): Number of samples to be captured (Default:100)
+
+    Returns:
+     dict: Logging processes dict for all servers and clients
+    """
+    # Start logging on all servers
+    server_logging_processes = log_memory_and_cpu_usage_on_servers(
+        servers, test_name, interval, count)
+    if not server_logging_processes:
+        return {}
+
+    # Starting logging on all clients
+    client_logging_processes = log_memory_and_cpu_usage_on_clients(
+        clients, test_name, interval, count)
+    if not client_logging_processes:
+        return {}
+
+    # Combining dicts
+    logging_process_dict = {}
+    for node_type, proc_dict in (('server', server_logging_processes),
+                                 ('client', client_logging_processes)):
+        logging_process_dict[node_type] = {}
+        for proc in proc_dict:
+            logging_process_dict[node_type][proc] = (
+                proc_dict[proc])
+    return logging_process_dict
+
+
+def _process_wait_flag_append(proc, flag):
+    """Run async communicate and adds true to flag list"""
+    # If the process is already completed  async_communicate()
+    # throws a ValueError
+    try:
+        proc.async_communicate()
+        flag.append(True)
+    except ValueError:
+        flag.append(True)
+
+
+def wait_for_logging_processes_to_stop(proc_dict, cluster=False):
+    """Wait for all given logging processes to stop
+
+    Args:
+     proc_dict(dict): Dictionary of all the active logging processes
+
+    Kwargs:
+     cluster(bool): True if proc_dict is for the entire cluster else False
+                    (Default:False)
+
+    Retruns:
+     bool: True if processes are completed else False
+    """
+    flag = []
+    if cluster:
+        for sub_dict in proc_dict:
+            for proc_name in proc_dict[sub_dict]:
+                for proc in proc_dict[sub_dict][proc_name]:
+                    _process_wait_flag_append(proc, flag)
+    else:
+        for proc_name in proc_dict:
+            for proc in proc_dict[proc_name]:
+                _process_wait_flag_append(proc, flag)
+    return all(flag)
+
+
+def kill_all_logging_processes(proc_dict, nodes, cluster=False):
+    """Kill logging processes on all given nodes
+
+    Args:
+     proc_dict(dict): Dictonary of all active logging processes
+     nodes(list): List of nodes where logging has to be stopped
+
+    Kwargs:
+     cluster(bool): True if proc_dict is for a full cluster else False
+                    (Default:False)
+
+    Retruns:
+     bool: True if processes are completed else False
+    """
+    # Kill all logging processes
+    for server in nodes:
+        if not kill_process(server, process_names='memory_and_cpu_logger.py'):
+            g.log.error("Unable to kill some of the processes at %s.", server)
+
+    # This will stop the async threads created by run_aysnc() as the proc is
+    # already killed.
+    ret = wait_for_logging_processes_to_stop(proc_dict, cluster)
+    if ret:
+        return True
+    return False
+
+
+def create_dataframe_from_csv(node, proc_name, test_name):
+    """Creates a dataframe from a given process.
+
+    Args:
+     node(str): Node from which csv is to be picked
+     proc_name(str): Name of process for which csv is to picked
+     test_name(str): Name of the testcase for which CSV
+
+    Returns:
+     dataframe: Pandas dataframe if CSV file exits else None
+    """
+    # Read the csv file generated by memory_and_cpu_logger.py
+    ret, raw_data, _ = g.run(node, "cat /root/{}.csv"
+                             .format(proc_name))
+    if ret:
+        return None
+
+    # Split the complete dump to individual lines
+    data = raw_data.split("\r\n")
+    rows, flag = [], False
+    for line in data:
+        values = line.split(',')
+        if test_name == values[0]:
+            # Reset rows if it's the second instance
+            if flag:
+                rows = []
+            flag = True
+            continue
+
+        # Pick and append values which have complete entry
+        if flag and len(values) == 4:
+            rows.append(values)
+
+    # Create a panda dataframe and set the type for columns
+    dataframe = pd.DataFrame(rows[1:], columns=rows[0])
+    conversion_dict = {'Process ID': int,
+                       'CPU Usage': float,
+                       'Memory Usage': float}
+    dataframe = dataframe.astype(conversion_dict)
+    return dataframe
+
+
+def _get_min_max_mean_median(entrylist):
+    """Get the mix, max. mean and median of a list
+
+    Args:
+      entrylist(list): List of values to be used
+
+    Returns:
+       dict:Result dict generate from list
+    """
+    result = {}
+    result['Min'] = min(entrylist)
+    result['Max'] = max(entrylist)
+    result['Mean'] = mean(entrylist)
+    result['Median'] = median(entrylist)
+    return result
+
+
+def _compute_min_max_mean_median(dataframe, data_dict, process, node,
+                                 volume=None, brick=None):
+    """Compute min, max, mean and median for a given process
+
+    Args:
+     dataframe(panda dataframe): Panda data frame of the csv file
+     data_dict(dict): data dict to which info is to be added
+     process(str): Name of process for which data is to be computed
+     node(str): Node for which min, max, mean and median has to be computed
+
+    Kwargs:
+     volume(str): Volume name of the volume for which data is to be computed
+     brick(str): Brick path of the brick for which data is to be computed
+    """
+    if volume and process == 'glusterfs':
+        # Create subdict inside dict
+        data_dict[node][process][volume] = {}
+        for usage in ('CPU Usage', 'Memory Usage'):
+            # Create usage subdict
+            data_dict[node][process][volume][usage] = {}
+
+            # Clean data and compute values
+            cleaned_usage = list(dataframe[usage].dropna())
+            out = _get_min_max_mean_median(cleaned_usage)
+
+            # Add values to data_dict
+            for key in ('Min', 'Max', 'Mean', 'Median'):
+                data_dict[node][process][volume][usage][key] = out[key]
+
+    if volume and brick and process == 'glusterfsd':
+        # Create subdict inside dict
+        data_dict[node][process][volume] = {}
+        data_dict[node][process][volume][brick] = {}
+        for usage in ('CPU Usage', 'Memory Usage'):
+            # Create usage subdict
+            data_dict[node][process][volume][brick][usage] = {}
+
+            # Clean data and compute values
+            cleaned_usage = list(dataframe[usage].dropna())
+            out = _get_min_max_mean_median(cleaned_usage)
+
+            # Add values to data_dict
+            for key in ('Min', 'Max', 'Mean', 'Median'):
+                data_dict[node][process][volume][brick][usage][key] = out[key]
+
+    # Compute CPU Uage and Memory Usage for glusterd
+    else:
+        for usage in ('CPU Usage', 'Memory Usage'):
+            # Create uage subdict
+            data_dict[node][process][usage] = {}
+
+            # Clean data and compute value
+            cleaned_usage = list(dataframe[usage].dropna())
+            out = _get_min_max_mean_median(cleaned_usage)
+
+            # Add values to data_dict
+            for key in ('Min', 'Max', 'Mean', 'Median'):
+                data_dict[node][process][usage][key] = out[key]
+
+
+def compute_data_usage_stats_on_servers(nodes, test_name):
+    """Compute min, max, mean and median for servers
+
+    Args:
+     nodes(list): Servers from which data is to be used to compute min, max
+                  , mean, mode and median
+     test_name(str): Name of testcase for which data has to be processed
+
+    Returns:
+     dict: dict of min, max, mean and median for a given process
+
+    NOTE:
+     This function has to be always run before cleanup.
+    """
+    data_dict = {}
+    for node in nodes:
+        # Get the volume status on the node
+        volume_status = get_volume_status(node)
+        data_dict[node] = {}
+        for process in ('glusterd', 'glusterfs', 'glusterfsd'):
+
+            # Generate a dataframe from the csv file
+            dataframe = create_dataframe_from_csv(node, process, test_name)
+            if dataframe.empty:
+                return {}
+
+            data_dict[node][process] = {}
+            if process == 'glusterd':
+                # Checking if glusterd is restarted.
+                if len(set(dataframe['Process ID'])) > 1:
+                    data_dict[node][process]['is_restarted'] = True
+                else:
+                    data_dict[node][process]['is_restarted'] = False
+
+                # Call function to compute min, max, mean and median
+                _compute_min_max_mean_median(dataframe, data_dict, process,
+                                             node)
+                continue
+
+            # Map volumes to volume process
+            for volume in volume_status.keys():
+                for proc in volume_status[volume][node].keys():
+                    if (proc == 'Self-heal Daemon' and process == 'glusterfs'):
+                        # Fetching pid from volume status output and create a
+                        # dataframe with the entries of only that pid
+                        pid = volume_status[volume][node][proc]['pid']
+                        proc_dataframe = dataframe[
+                            dataframe['Process ID'] == pid]
+
+                        # Call function to compute min, max, mean
+                        # and median
+                        _compute_min_max_mean_median(
+                            proc_dataframe, data_dict, process, node, volume)
+
+                    if (proc.count('/') >= 2 and process == 'glusterfsd'):
+                        # Fetching pid from volume status output and create a
+                        # dataframe with the entries of only that pid
+                        pid = volume_status[volume][node][proc]['pid']
+                        proc_dataframe = dataframe[
+                            dataframe['Process ID'] == pid]
+
+                        # Call function to compute min, max, mean and median
+                        _compute_min_max_mean_median(
+                            proc_dataframe, data_dict, process, node, volume,
+                            proc)
+
+    return data_dict
+
+
+def compute_data_usage_stats_on_clients(nodes, test_name):
+    """Compute min, max, mean and median for clients
+
+    Args:
+     nodes(list): Clients from which data is to be used to compute min, max
+                  , mean, mode and median
+     test_name(str): Name of the testcase for which data has to be processed
+
+    Returns:
+     dict: dict of min, max, mean and median for a given process
+    """
+    data_dict = {}
+    for node in nodes:
+        data_dict[node] = {}
+        dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+        if dataframe.empty:
+            return {}
+
+        data_dict[node]['glusterfs'] = {}
+        # Call function to compute min, max, mean and median
+        _compute_min_max_mean_median(dataframe, data_dict, 'glusterfs', node)
+
+    return data_dict
+
+
+def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
+                                               volume_status=None,
+                                               volume=None,
+                                               vol_name=None):
+    """Perform three point check
+
+    Args:
+     dataframe(panda dataframe): Panda dataframe of a given process
+     node(str): Node on which memory leak has to be checked
+     process(str): Name of process for which check has to be done
+     gain(float): Accepted amount of leak for a given testcase in MB
+
+    kwargs:
+     volume_status(dict): Volume status output on the give name
+     volumne(str):Name of volume for which 3 point check has to be done
+     vol_name(str): Name of volume process according to volume status
+
+    Returns:
+     bool: True if memory leak instances are observed else False
+    """
+    # Filter dataframe to be process wise if it's volume specific process
+    if process in ('glusterfs', 'glusterfsd'):
+        if process == 'glusterfs' and vol_name:
+            pid = int(volume_status[volume][node][vol_name]['pid'])
+            dataframe = dataframe[dataframe['Process ID'] == pid]
+
+    # Compute usage gain throught the data frame
+    memory_increments = list(dataframe['Memory Usage'].diff().dropna())
+
+    # Check if usage is more than accepted amount of leak
+    memory_leak_decision_array = np.where(
+        dataframe['Memory Usage'].diff().dropna() > gain, True, False)
+    instances_of_leak = np.where(memory_leak_decision_array)[0]
+
+    # If memory leak instances are present check if it's reduced
+    count_of_leak_instances = len(instances_of_leak)
+    if count_of_leak_instances > 0:
+        g.log.error('There are %s instances of memory leaks on node %s',
+                    count_of_leak_instances, node)
+        for instance in instances_of_leak:
+            # In cases of last log file entry the below op could throw
+            # IndexError which is handled as below.
+            try:
+                # Check if memory gain had decrease in the consecutive
+                # entries, after 2 entry and betwen current and last entry
+                if all([memory_increments[instance+1] >
+                       memory_increments[instance],
+                       memory_increments[instance+2] >
+                       memory_increments[instance],
+                       (memory_increments[len(memory_increments)-1] >
+                        memory_increments[instance])]):
+                    return True
+
+            except IndexError:
+                # In case of last log file entry rerun the command
+                # and check for difference
+                g.log.info('Instance at last log entry.')
+                if process in ('glusterfs', 'glusterfsd'):
+                    cmd = ("ps u -p %s | awk 'NR>1 && $11~/%s$/{print "
+                           " $6/1024}'" % (pid, process))
+                else:
+                    cmd = ("ps u -p `pgrep glusterd` | awk 'NR>1 && $11~/"
+                           "glusterd$/{print $6/1024}'")
+                ret, out, _ = g.run(node, cmd)
+                if ret:
+                    g.log.error('Unable to run the command to fetch current '
+                                'memory utilization.')
+                    continue
+                usage_now = float(out.replace('\n', '')[2])
+                last_entry = dataframe['Memory Usage'].iloc[-1]
+
+                # Check if current memory usage is higher than last entry
+                fresh_diff = last_entry - usage_now
+                if fresh_diff > gain and last_entry > fresh_diff:
+                    return True
+        return False
+
+
+def check_for_memory_leaks_in_glusterd(nodes, test_name, gain=30.0):
+    """Check for memory leaks in glusterd
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     gain(float): Accepted amount of leak for a given testcase in MB
+                  (Default:30)
+
+    Returns:
+      bool: True if memory leak was obsevred else False
+    """
+    is_there_a_leak = []
+    for node in nodes:
+        dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
+        if dataframe.empty:
+            return False
+
+        # Call 3 point check function
+        three_point_check = _perform_three_point_check_for_memory_leak(
+            dataframe, node, 'glusterd', gain)
+        if three_point_check:
+            g.log.error("Memory leak observed on node %s in glusterd",
+                        node)
+        is_there_a_leak.append(three_point_check)
+
+    return any(is_there_a_leak)
+
+
+def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
+    """Check for memory leaks in glusterfs
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     gain(float): Accepted amount of leak for a given testcase in MB
+                  (Default:30)
+
+    Returns:
+      bool: True if memory leak was obsevred else False
+
+    NOTE:
+     This function should be executed with the volumes present on the cluster
+    """
+    is_there_a_leak = []
+    for node in nodes:
+        # Get the volume status on the node
+        volume_status = get_volume_status(node)
+        dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+        if dataframe.empty:
+            return False
+
+        for volume in volume_status.keys():
+            for process in volume_status[volume][node].keys():
+                # Skiping if process isn't Self-heal Deamon
+                if process != 'Self-heal Daemon':
+                    continue
+
+                # Call 3 point check function
+                three_point_check = _perform_three_point_check_for_memory_leak(
+                    dataframe, node, 'glusterfs', gain, volume_status, volume,
+                    'Self-heal Daemon')
+                if three_point_check:
+                    g.log.error("Memory leak observed on node %s in shd "
+                                "on volume %s", node, volume)
+                is_there_a_leak.append(three_point_check)
+
+    return any(is_there_a_leak)
+
+
+def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
+    """Check for memory leaks in glusterfsd
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     gain(float): Accepted amount of leak for a given testcase in MB
+                  (Default:30)
+
+    Returns:
+      bool: True if memory leak was obsevred else False
+
+    NOTE:
+     This function should be executed with the volumes present on the cluster.
+    """
+    is_there_a_leak = []
+    for node in nodes:
+        # Get the volume status on the node
+        volume_status = get_volume_status(node)
+        dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
+        if dataframe.empty:
+            return False
+
+        for volume in volume_status.keys():
+            for process in volume_status[volume][node].keys():
+                # Skiping if process isn't brick process
+                if not process.count('/'):
+                    continue
+
+                # Call 3 point check function
+                three_point_check = _perform_three_point_check_for_memory_leak(
+                    dataframe, node, 'glusterfsd', gain, volume_status, volume,
+                    process)
+                if three_point_check:
+                    g.log.error("Memory leak observed on node %s in brick "
+                                " process for brick %s on volume %s", node,
+                                process, volume)
+                is_there_a_leak.append(three_point_check)
+
+    return any(is_there_a_leak)
+
+
+def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
+    """Check for memory leaks in glusterfs fuse
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     gain(float): Accepted amount of leak for a given testcase in MB
+                  (Default:30)
+
+    Returns:
+      bool: True if memory leak was observed else False
+
+    NOTE:
+     This function should be executed when the volume is still mounted.
+    """
+    is_there_a_leak = []
+    for node in nodes:
+        # Get the volume status on the node
+        dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+        if dataframe.empty:
+            return False
+
+        # Call 3 point check function
+        three_point_check = _perform_three_point_check_for_memory_leak(
+            dataframe, node, 'glusterfs', gain)
+        if three_point_check:
+            g.log.error("Memory leak observed on node %s for client",
+                        node)
+
+            # If I/O is constantly running on Clients the memory
+            # usage spikes up and stays at a point for long.
+            last_entry = dataframe['Memory Usage'].iloc[-1]
+            cmd = ("ps u -p `pidof glusterfs` | "
+                   "awk 'NR>1 && $11~/glusterfs$/{print"
+                   " $6/1024}'")
+            ret, out, _ = g.run(node, cmd)
+            if ret:
+                g.log.error('Unable to run the command to fetch current '
+                            'memory utilization.')
+                continue
+
+            if float(out) != last_entry:
+                if float(out) > last_entry:
+                    is_there_a_leak.append(True)
+                    continue
+
+        is_there_a_leak.append(False)
+
+    return any(is_there_a_leak)
+
+
+def _check_for_oom_killers(nodes, process, oom_killer_list):
+    """Checks for OOM killers for a specific process
+
+    Args:
+     nodes(list): Nodes on which OOM killers have to be checked
+     process(str): Process for which OOM killers have to be checked
+     oom_killer_list(list): A list in which the presence of
+                            OOM killer has to be noted
+    """
+    cmd = ("grep -i 'killed process' /var/log/messages* "
+           "| grep -w '{}'".format(process))
+    ret_codes = g.run_parallel(nodes, cmd)
+    for key in ret_codes.keys():
+        ret, out, _ = ret_codes[key]
+        if not ret:
+            g.log.error('OOM killer observed on %s for %s', key, process)
+            g.log.error(out)
+            oom_killer_list.append(True)
+        else:
+            oom_killer_list.append(False)
+
+
+def check_for_oom_killers_on_servers(nodes):
+    """Check for OOM killers on servers
+
+    Args:
+     nodes(list): Servers on which OOM kills have to be checked
+
+    Returns:
+     bool: True if OOM killers are present on any server else False
+    """
+    oom_killer_list = []
+    for process in ('glusterfs', 'glusterfsd', 'glusterd'):
+        _check_for_oom_killers(nodes, process, oom_killer_list)
+    return any(oom_killer_list)
+
+
+def check_for_oom_killers_on_clients(nodes):
+    """Check for OOM killers on clients
+
+    Args:
+     nodes(list): Clients on which OOM kills have to be checked
+
+    Returns:
+     bool: True if OOM killers are present on any client else false
+    """
+    oom_killer_list = []
+    _check_for_oom_killers(nodes, 'glusterfs', oom_killer_list)
+    return any(oom_killer_list)
+
+
+def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
+                                volume_status=None, volume=None,
+                                vol_name=None):
+    """Check for cpu spikes for a given process
+
+    Args:
+     dataframe(panda dataframe): Panda dataframe of a given process
+     node(str): Node on which cpu spikes has to be checked
+     process(str): Name of process for which check has to be done
+     threshold(int): Accepted amount of 100% CPU usage instances
+
+    kwargs:
+     volume_status(dict): Volume status output on the give name
+     volume(str):Name of volume for which check has to be done
+     vol_name(str): Name of volume process according to volume status
+
+    Returns:
+     bool: True if number of instances more than threshold else False
+    """
+    # Filter dataframe to be process wise if it's volume specific process
+    if process in ('glusterfs', 'glusterfsd'):
+        pid = int(volume_status[volume][node][vol_name]['pid'])
+        dataframe = dataframe[dataframe['Process ID'] == pid]
+
+    # Check if usage is more than accepted amount of leak
+    cpu_spike_decision_array = np.where(
+        dataframe['CPU Usage'].dropna() == 100.0, True, False)
+    instances_of_spikes = np.where(cpu_spike_decision_array)[0]
+
+    return bool(len(instances_of_spikes) > threshold)
+
+
+def check_for_cpu_usage_spikes_on_glusterd(nodes, test_name, threshold=3):
+    """Check for CPU usage spikes on glusterd
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     threshold(int): Accepted amount of instances of 100% CPU usage
+                    (Default:3)
+
+    Returns:
+      bool: True if CPU spikes are more than threshold else False
+    """
+    is_there_a_spike = []
+    for node in nodes:
+        dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
+        if dataframe.empty:
+            return False
+
+        # Call function to check for cpu spikes
+        cpu_spikes = _check_for_cpu_usage_spikes(
+            dataframe, node, 'glusterd', threshold)
+        if cpu_spikes:
+            g.log.error("CPU usage spikes observed more than "
+                        "threshold %d on node %s for glusterd",
+                        threshold, node)
+        is_there_a_spike.append(cpu_spikes)
+
+    return any(is_there_a_spike)
+
+
+def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
+    """Check for CPU usage spikes on glusterfs
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     threshold(int): Accepted amount of instances of 100% CPU usage
+                    (Default:3)
+
+    Returns:
+      bool: True if CPU spikes are more than threshold else False
+
+    NOTE:
+     This function should be exuected with the volumes present on the cluster.
+    """
+    is_there_a_spike = []
+    for node in nodes:
+        # Get the volume status on the node
+        volume_status = get_volume_status(node)
+        dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+        if dataframe.empty:
+            return False
+
+        for volume in volume_status.keys():
+            for process in volume_status[volume][node].keys():
+                # Skiping if process isn't Self-heal Deamon
+                if process != 'Self-heal Daemon':
+                    continue
+
+                # Call function to check for cpu spikes
+                cpu_spikes = _check_for_cpu_usage_spikes(
+                    dataframe, node, 'glusterfs', threshold, volume_status,
+                    volume, 'Self-heal Daemon')
+                if cpu_spikes:
+                    g.log.error("CPU usage spikes observed more than "
+                                "threshold %d on node %s on volume %s for shd",
+                                threshold, node, volume)
+                is_there_a_spike.append(cpu_spikes)
+
+    return any(is_there_a_spike)
+
+
+def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
+    """Check for CPU usage spikes in glusterfsd
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     threshold(int): Accepted amount of instances of 100% CPU usage
+                    (Default:3)
+
+    Returns:
+      bool: True if CPU spikes are more than threshold else False
+
+    NOTE:
+     This function should be exuected with the volumes present on the cluster.
+    """
+    is_there_a_spike = []
+    for node in nodes:
+        # Get the volume status on the node
+        volume_status = get_volume_status(node)
+        dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
+        if dataframe.empty:
+            return False
+
+        for volume in volume_status.keys():
+            for process in volume_status[volume][node].keys():
+                # Skiping if process isn't brick process
+                if process in ('Self-heal Daemon', 'Quota Daemon'):
+                    continue
+
+                # Call function to check for cpu spikes
+                cpu_spikes = _check_for_cpu_usage_spikes(
+                    dataframe, node, 'glusterfsd', threshold, volume_status,
+                    volume, process)
+                if cpu_spikes:
+                    g.log.error("CPU usage spikes observed more than "
+                                "threshold %d on node %s on volume %s for "
+                                "brick process %s",
+                                threshold, node, volume, process)
+                is_there_a_spike.append(cpu_spikes)
+
+    return any(is_there_a_spike)
+
+
+def check_for_cpu_usage_spikes_on_glusterfs_fuse(nodes, test_name,
+                                                 threshold=3):
+    """Check for CPU usage spikes on glusterfs fuse
+
+    Args:
+     nodes(list): Servers on which memory leaks have to be checked
+     test_name(str): Name of testcase for which memory leaks has to be checked
+
+    Kwargs:
+     threshold(int): Accepted amount of instances of 100% CPU usage
+                    (Default:3)
+
+    Returns:
+      bool: True if CPU spikes are more than threshold else False
+
+    NOTE:
+     This function should be executed when the volume is still mounted.
+    """
+    is_there_a_spike = []
+    for node in nodes:
+        # Get the volume status on the node
+        dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
+        if dataframe.empty:
+            return False
+
+        # Call function to check for cpu spikes
+        cpu_spikes = _check_for_cpu_usage_spikes(
+            dataframe, node, 'glusterfs', threshold)
+        if cpu_spikes:
+            g.log.error("CPU usage spikes observed more than "
+                        "threshold %d on node %s for client",
+                        threshold, node)
+        is_there_a_spike.append(cpu_spikes)
+
+    return any(is_there_a_spike)