Standalone script which runs the tools that generate data into the system

Change-Id: I4c5a9baa1178a3fec4863e7cb7a81493ee4a52f7 Signed-off-by: Arthy Loganathan <aloganat@redhat.com>
author: Arthy Loganathan <aloganat@redhat.com> 2017-12-07 10:50:03 +0530
committer: Nigel Babu <nigelb@redhat.com> 2018-04-20 04:52:02 +0000
commit: 172ff6402afb8ab934c7567b95ecf74aa9da2dd2 (patch)
tree: 58d55c4ce94ced37c0d4463c1a155b4d5140d6fc /glustolibs-io
parent: b0de1e89029b2af15b1fdc92e83c378c5a353346 (diff)
1 files changed, 521 insertions, 0 deletions
diff --git a/glustolibs-io/shared_files/scripts/generate_io.py b/glustolibs-io/shared_files/scripts/generate_io.py
new file mode 100644
index 000000000..1b28983a0
--- /dev/null
+++ b/glustolibs-io/shared_files/scripts/generate_io.py
@@ -0,0 +1,521 @@
+#!/usr/bin/env python
+#  Copyright (C) 2015-2016  Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import subprocess
+import re
+import time
+import multiprocessing
+import tempfile
+import os
+import shutil
+import signal
+import argparse
+import sys
+import yaml
+import datetime
+
+ONE_GB_BYTES = 1073741824.0
+
+"""
+Script for generating IO on client
+"""
+
+
+def get_disk_usage(path):
+    """
+    This module gets disk usage of the given path
+
+    Args:
+        path (str): path for which disk usage to be calculated
+
+    Returns:
+        dict: disk usage in dict format on success
+        None Type, on failure
+
+    """
+
+    cmd = 'stat -f ' + path
+    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    out, err = p.communicate()
+    ret = p.returncode
+    if ret != 0:
+        print("Failed to execute stat command")
+        return None
+
+    res = ''.join(out)
+
+    # Sample raw output of stat cmd to be parsed
+    #   File: "write_data.py"
+    #   ID: ffcb5576be049643 Namelen: 255     Type: ext2/ext3
+    # Block size: 4096       Fundamental block size: 4096
+    # Blocks: Total: 46014997   Free: 19716181   Available: 17372988
+    # Inodes: Total: 11698176   Free: 11605234
+
+    match = re.match(r'.*Block size:\s(\d+).*Blocks:\sTotal:\s(\d+)\s+?'
+                     r'Free:\s(\d+)\s+?Available:\s(\d+).*Inodes:\s'
+                     r'Total:\s(\d+)\s+?Free:\s(\d+)', res, re.S)
+    if match is None:
+        print("Regex mismatch in get_disk_usage()")
+        return None
+
+    usage_info = dict()
+    keys = ['b_size', 'b_total', 'b_free', 'b_avail', 'i_total', 'i_free']
+    val = list(match.groups())
+    info = dict(zip(keys, val))
+    usage_info['total'] = ((int(info['b_total']) * int(info['b_size'])) /
+                           ONE_GB_BYTES)
+    usage_info['free'] = ((int(info['b_free']) * int(info['b_size'])) /
+                          ONE_GB_BYTES)
+    usage_info['used_percent'] = (100 - (100.0 * usage_info['free'] /
+                                  usage_info['total']))
+    usage_info['total_inode'] = int(info['i_total'])
+    usage_info['free_inode'] = int(info['i_free'])
+    usage_info['used_percent_inode'] = ((100 -
+                                        (100.0 * usage_info['free_inode']) /
+                                        usage_info['total_inode']))
+    usage_info['used'] = usage_info['total'] - usage_info['free']
+    usage_info['used_inode'] = (usage_info['total_inode'] -
+                                usage_info['free_inode'])
+    return usage_info
+
+
+def get_disk_used_percent(dirname):
+    """
+    Module to get disk used percent
+
+    Args:
+       dirname (str): absolute path of directory
+
+    Returns:
+        str: used percent for given directory
+        None Type, on failure
+
+    Example:
+        get_disk_used_percent("/mnt/glusterfs")
+
+    """
+
+    output = get_disk_usage(dirname)
+    if output is None:
+        print("Failed to get disk used percent for %s"
+              % dirname)
+        return None
+    return output['used_percent']
+
+
+def check_if_percent_to_fill_or_timeout_is_met(dirname, percent_to_fill,
+                                               timeout):
+    """
+    Module to check if percent to fill or timeout is met.
+
+    Args:
+        dirname (str): absolute path of directory
+        percent_to_fill (int): percentage to fill the volume
+        timeout (int): timeout value.
+
+    Returns:
+        bool: True, if volume is filled with given percent or timeout
+            is met, False otherwise
+
+    Example:
+        check_if_percent_to_fill_or_timeout_is_met("/mnt/glusterfs",
+                                                       10, 60)
+    """
+    flag = 0
+    count = 0
+
+    while ((timeout == 0) or (count < timeout)):
+        output = get_disk_usage(dirname)
+        used = output['used_percent']
+
+        if int(percent_to_fill) > int(used):
+            remaining_to_fill = int(percent_to_fill) - int(used)
+            print("Remaining space left to fill data in directory %s is %s"
+                  % (dirname, str(remaining_to_fill)))
+            time_str = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+            print("Directory %s used percent at time %s: %s"
+                  % (dirname, time_str, used))
+            if int(percent_to_fill) <= int(used):
+                flag = 1
+                break
+            time.sleep(5)
+            count = count + 5
+        else:
+            print("Directory %s is filled with given percent already. "
+                  "Percentage filled: %s"
+                  % (dirname, str(percent_to_fill)))
+            flag = 1
+            break
+
+    if flag:
+        print("Directory is filled with given percentage %s"
+              % str(percent_to_fill))
+        return True
+    else:
+        print("Timeout %s seconds reached before filling directory with "
+              "given percentage %s" % (str(timeout), str(percent_to_fill)))
+        return True
+    return False
+
+
+def run_check_if_percent_to_fill_or_timeout_is_met(dirname,
+                                                   percent_to_fill,
+                                                   timeout, event):
+    """
+    Helper Module to check if percent to fill or timeout is met.
+    """
+    ret = check_if_percent_to_fill_or_timeout_is_met(dirname,
+                                                     percent_to_fill,
+                                                     timeout)
+    if ret:
+        event.set()
+        return True
+    else:
+        return False
+
+
+def run_fio(proc_queue, script_path, dirname,
+            job_files_list, log_file):
+    """
+    Module to invoke IOs using fio tool
+
+    Args:
+        proc_queue (obj): multiprocessing queue object
+        script_path (str): absolute path of the run_fio.py script
+        dirname (str): absolute path of dir to write data with fio
+        job_files_list (list): list of ini job files for fio
+        log_file (str): log file name for logging fio console output
+
+    Returns:
+        bool: True, if fio starts to write data and stops when it
+            gets "STOP" string in queue, False otherwise
+
+    """
+    tmpdir = tempfile.mkdtemp()
+    job_files_list_to_run = []
+    for job_file in job_files_list:
+        job_file_to_run = tmpdir + "/" + os.path.basename(job_file)
+        shutil.copy(job_file, job_file_to_run)
+        job_files_list_to_run.append(job_file_to_run)
+
+    if log_file is not None:
+        with open(log_file, "w") as fd:
+            time_str = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+            title = ("=========STARTING FIO-" + time_str +
+                     "=======\n")
+            fd.write(title)
+            fd.close()
+        cmd = ("python " + script_path +
+               " --job-files '" + ' '.join(job_files_list_to_run) + "' " +
+               dirname + " >> " + log_file + " 2>&1")
+
+    else:
+        cmd = ("python " + script_path +
+               " --job-files '" + ' '.join(job_files_list_to_run) +
+               "' " + dirname)
+    p = subprocess.Popen(cmd, shell=True,
+                         preexec_fn=os.setsid)
+    time.sleep(10)
+    if p is None:
+        print("Unable to trigger IO using fio")
+        return False
+    while True:
+        if proc_queue.get() == 'STOP':
+            os.killpg(os.getpgid(p.pid), signal.SIGTERM)
+            time.sleep(2)
+            with open(log_file, "a") as fd:
+                time_str = (datetime.datetime.now().
+                            strftime('%Y_%m_%d_%H_%M_%S'))
+                title = ("=========ENDING FIO-" + time_str +
+                         "=======\n")
+                fd.write(title)
+                fd.close()
+            break
+
+    shutil.rmtree(tmpdir)
+    return True
+
+
+def start_populate_data(mount_point, io_dict,
+                        percent_to_fill, timeout):
+    """
+    Starts populating data on the directory
+
+    Args:
+        mount_point(str): Directory name to fill data
+        io_dict (dict): dict of io related information
+        percent_to_fill (int): percentage to fill the directory
+        timeout (int): timeout value
+
+    Returns:
+        bool: returns True, if IO succeeds. False, otherwise
+
+    """
+
+    dirname = mount_point
+    m = multiprocessing.Manager()
+    event = m.Event()
+
+    proc_list = []
+    proc_queue = []
+
+    for each_io in io_dict.keys():
+            q = multiprocessing.Queue()
+            proc_queue.append(q)
+            workload_type = io_dict[each_io]['workload_type']
+            proc = multiprocessing.Process(target=(io_dict[each_io]
+                                                   ['function_addr']),
+                                           args=(q,
+                                                 (io_dict[each_io]
+                                                  ['script_path']),
+                                                 dirname,
+                                                 (io_dict[each_io]['job_files']
+                                                  [workload_type]),
+                                                 io_dict[each_io]['log_file']))
+            proc_list.append(proc)
+            time.sleep(5)
+            proc.start()
+
+    p = multiprocessing.Process(
+        target=run_check_if_percent_to_fill_or_timeout_is_met,
+        args=(dirname, percent_to_fill, timeout, event,))
+
+    time.sleep(5)
+    proc_list.append(p)
+    p.start()
+    time.sleep(2)
+    ret = stop_populate_data(proc_list, proc_queue, mevent=event)
+    return ret
+
+
+def stop_populate_data(proc_list, proc_queue, mevent=None):
+    """
+    Stops populating data on the directory
+
+    Args:
+        proc_list (list): List of processes to kill
+        proc_queue (list): List of process queues to close
+
+    Kwargs:
+        mevent (obj): multiprocessing event object is passed, then
+            it waits till the event is set by one of the process,
+            Defaults to None.
+
+    Returns:
+        bool: If async=False, returns True, if data population is stopped
+            in all the processes. False, otherwise
+              If async=True, return list of process. False, otherwise
+
+    Example:
+        stop_populate_data(proc_list, proc_queue)
+    """
+
+    try:
+        if mevent:
+            mevent.wait()
+
+        for q in proc_queue:
+            q.put("STOP")
+            time.sleep(5)
+            q.close()
+            q.join_thread()
+        for proc in proc_list:
+            proc.terminate()
+        return True
+    except Exception as e:
+        print("Exception occured in stop_populate_data(): %s"
+              % e)
+        return False
+
+
+def call_get_disk_usage(args):
+    """
+    Main method for getting disk usage
+    """
+
+    disk_usage = get_disk_usage(args.dir)
+    if disk_usage is None:
+        return 1
+    print disk_usage
+    return 0
+
+
+def call_start_populate_data(args):
+    """
+    Main method for populating data
+    """
+
+    dirname = args.dir
+    config_file_list = args.c.split()
+    workload = args.w
+    percent = args.p
+    timeout = args.t
+    log_file = args.l
+
+    # Collects config data from multiple config files
+    config_data = {}
+    for config_file in config_file_list:
+        with open(config_file, 'r') as f:
+            each_config_data = yaml.load(f)
+            config_data.update(each_config_data)
+
+    # Handling the following cases as per user option.
+    # case1: If user gives -i option only, then select io tools from user
+    #        option.
+    # case2: If user gives -w option only, look for given workload in config
+    #        file and choose io tools for the specified workload from
+    #        config file.
+    # case3: if -i and -w option specified, select workload and select io
+    #        tools as specified in -i and also it should be part of the list
+    #        of io tools available for that workload.
+    # case4: If -i | -w | -i and -w is not specified , run all the tools
+    #        specified in the config file
+
+    if args.i is not None:
+        io_list = args.i.split()
+    else:
+        io_list = []
+
+    workload_type = ""
+    if workload is not None:
+        if (('workload' in config_data['io'] and
+             config_data['io']['workload'] and
+             workload in config_data['io']['workload'])):
+            if not io_list:
+                io_list = config_data['io']['workload'][workload]
+            else:
+                io_list_from_user = io_list
+                io_list_for_given_workload = (config_data['io']
+                                              ['workload'][workload])
+                io_list = (list(set(io_list_from_user).
+                           intersection(io_list_for_given_workload)))
+            workload_type = workload
+    else:
+        if not io_list:
+            io_list = config_data['io']['generic_workload']
+
+    # If workload type is not given by the user, then by default
+    # generic_workload is assigned.
+    if not workload_type:
+        workload_type = "generic_workload"
+
+    if timeout is None:
+        timeout = 0
+
+    log_file_dir = os.path.dirname(log_file)
+    if not os.path.exists(log_file_dir):
+        os.makedirs(log_file_dir)
+
+    filename, file_ext = os.path.splitext(log_file)
+    time_str = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+    log_file = filename + "_" + time_str + file_ext
+
+    print "GENERATE IO Log file: %s" % log_file
+
+    if('io' in config_data and 'tools' in config_data['io']):
+        config_data_io = dict(config_data['io']['tools'])
+    else:
+        print "io tools info is not given in config file"
+        return 1
+
+    if('io' in config_data and 'scripts' in config_data['io']):
+        config_data_io.update(config_data['io']['scripts'])
+    else:
+        print "io scripts info is not given in config file"
+        return 1
+
+    io_details = {}
+    for io in io_list:
+        if io in config_data_io.keys():
+            config_data_io[io]['function_addr'] = eval("run_" + io)
+            config_data_io[io]['log_file'] = (log_file_dir + "/" +
+                                              io + "_log.log")
+            config_data_io[io]['workload_type'] = workload_type
+            io_details[io] = config_data_io[io]
+        else:
+            print ("The IO tool/script - '%s' details not present in config "
+                   "file. Skipping the IO - '%s'" % (io, io))
+
+    if not io_details:
+        print "Config file doesn't have IO details for %s" % ','.join(io_list)
+        return 1
+
+    # Starts generating IO
+    # If -t and -p bot are passed as options, runs all the io's as specified
+    # until '-t' or '-p' is reached. i.e which ever reaches first.
+    ret = start_populate_data(dirname, io_details, percent, timeout)
+    print "Disk Usage Details of %s: %s" % (dirname, get_disk_usage(dirname))
+
+    fd_list = []
+    for io in io_details.keys():
+        if 'log_file' in io_details[io]:
+            fh = open(io_details[io]['log_file'], "r")
+            fd_list.append(fh)
+
+    if log_file is not None:
+        with open(log_file, 'a') as fd:
+            for each_fh in fd_list:
+                fd.write(each_fh.read())
+                each_fh.close()
+            fd.write("\nDisk Usage Details of %s: %s" % (dirname,
+                     get_disk_usage(dirname)))
+            fd.close()
+
+    if ret:
+        return 0
+    else:
+        return 1
+
+
+if __name__ == "__main__":
+    print "Starting IO Generation..."
+    test_start_time = datetime.datetime.now().replace(microsecond=0)
+
+    write_data_parser = argparse.ArgumentParser(prog="generate_io.py",
+                                                description=("Program for "
+                                                             "generating io"))
+
+    write_data_required_parser = write_data_parser.add_argument_group(
+                                                    'required named arguments')
+
+    write_data_required_parser.add_argument(
+        'dir', metavar='DIR', type=str,
+        help="Directory on which operations has to be performed")
+    write_data_required_parser.add_argument('-c', help="space separated list "
+                                                       "of config files",
+                                            required=True)
+    write_data_parser.add_argument('-i', help="space separated list of "
+                                              "io tools")
+    write_data_parser.add_argument('-w', help="Workload type")
+    write_data_parser.add_argument('-p', help="percentage to fill the"
+                                              "directory",
+                                   type=int, default=100)
+    write_data_parser.add_argument('-t', help="timeout value in seconds.",
+                                   type=int)
+    default_log_file = "/var/tmp/generate_io/generate_io.log"
+    write_data_parser.add_argument('-l', help="log file name.",
+                                   default=default_log_file)
+
+    write_data_parser.set_defaults(func=call_start_populate_data)
+
+    args = write_data_parser.parse_args()
+    rc = args.func(args)
+    test_end_time = datetime.datetime.now().replace(microsecond=0)
+    print "Execution time: %s" % (test_end_time - test_start_time)
+    print "Ending IO Generation"
+    sys.exit(rc)
author	Arthy Loganathan <aloganat@redhat.com>	2017-12-07 10:50:03 +0530
committer	Nigel Babu <nigelb@redhat.com>	2018-04-20 04:52:02 +0000
commit	172ff6402afb8ab934c7567b95ecf74aa9da2dd2 (patch)
tree	58d55c4ce94ced37c0d4463c1a155b4d5140d6fc /glustolibs-io
parent	b0de1e89029b2af15b1fdc92e83c378c5a353346 (diff)