diff options
| author | Vikas Gorur <vikas@gluster.com> | 2010-04-02 18:03:33 +0000 | 
|---|---|---|
| committer | Anand V. Avati <avati@dev.gluster.com> | 2010-04-02 23:11:11 -0700 | 
| commit | 9c2bfa8a4441d27178f3b843bfa0a77df9f867e5 (patch) | |
| tree | 4ae50c43f4e1b39dd13e7bfaae20eef6a25d64c5 | |
| parent | d9b34f3f2c5de8cdde6dd8c24fade839b7727ab2 (diff) | |
extras/profiler/glusterfs-profiler: Add graphing tool.
glusterfs-profiler is a Python tool that can graphically display
the profiling information printed in the process state dump.
Signed-off-by: Vikas Gorur <vikas@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 268 (Add timing instrumentation code)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=268
| -rwxr-xr-x | extras/profiler/glusterfs-profiler | 267 | 
1 files changed, 267 insertions, 0 deletions
diff --git a/extras/profiler/glusterfs-profiler b/extras/profiler/glusterfs-profiler new file mode 100755 index 00000000000..f843ae69a3d --- /dev/null +++ b/extras/profiler/glusterfs-profiler @@ -0,0 +1,267 @@ +#!/usr/bin/env python + +#    Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> +#    This file is part of GlusterFS. + +#    GlusterFS is free software; you can redistribute it and/or modify +#    it under the terms of the GNU General Public License as published +#    by the Free Software Foundation; either version 3 of the License, +#    or (at your option) any later version. + +#    GlusterFS is distributed in the hope that it will be useful, but +#    WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +#    General Public License for more details. + +#    You should have received a copy of the GNU General Public License +#    along with this program.  If not, see +#    <http://www.gnu.org/licenses/>. + +import numpy as np +import matplotlib.pyplot as plt +import re +import sys + +from optparse import OptionParser + +# Global dict-of-dict holding the latency data +# latency[xlator-name][op-name] + +latencies = {} +counts = {} +totals = {} + +def collect_data (f): +    """Collect latency data from the file object f and store it in +    the global variable @latencies""" + +    # example dump file line: +    # fuse.latency.TRUNCATE=3147.000,4 + +    for line in f: +        m = re.search ("(\w+)\.\w+.(\w+)=(\w+\.\w+),(\w+),(\w+.\w+)", line) +        if m and float(m.group(3)) != 0: +            xlator = m.group(1) +            op     = m.group(2) +            time   = m.group(3) +            count  = m.group(4) +            total  = m.group(5) + +            if not xlator in latencies.keys(): +                latencies[xlator] = dict() + +            if not xlator in counts.keys(): +                counts[xlator] = dict() + +            if not xlator in totals.keys(): +                totals[xlator] = dict() + +            latencies[xlator][op] = time +            counts[xlator][op]    = count +            totals[xlator][op]    = total + + +def calc_latency_heights (xlator_order): +    heights = map (lambda x: [], xlator_order) + +    N = len (xlator_order) +    for i in range (N): +        xl = xlator_order[i] + +        k = latencies[xl].keys() +        k.sort() + +        if i == len (xlator_order) - 1: +            # bottom-most xlator +            heights[i] = [float (latencies[xl][key]) for key in k] + +        else: +            next_xl = xlator_order[i+1] +            this_xl_time = [latencies[xl][key] for key in k] +            next_xl_time = [latencies[next_xl][key] for key in k] + +            heights[i] = map (lambda x, y: float (x) - float (y), +                              this_xl_time, next_xl_time) +    return heights + +# have sufficient number of colors +colors = ["violet", "blue", "green", "yellow", "orange", "red"] + +def latency_profile (title, xlator_order): +    heights = calc_latency_heights (xlator_order) + +    N     = len (latencies[xlator_order[0]].keys()) +    Nxl   = len (xlator_order) +    ind   = np.arange (N) +    width = 0.35 + +    pieces  = map (lambda x: [], xlator_order) +    bottoms = map (lambda x: [], xlator_order) + +    bottoms[Nxl-1] = map (lambda x: 0, latencies[xlator_order[0]].keys()) + +    for i in range (Nxl-1): +        xl = xlator_order[i+1] +        k = latencies[xl].keys() +        k.sort() + +        bottoms[i] = [float(latencies[xl][key]) for key in k] + +    for i in range(Nxl): +        pieces[i] = plt.bar (ind, heights[i], width, color=colors[i], +                             bottom=bottoms[i]) + +    plt.ylabel ("Average Latency (microseconds)") +    plt.title ("Latency Profile for '%s'" % title) +    k = latencies[xlator_order[0]].keys() +    k.sort () +    plt.xticks (ind+width/2., k) + +    m = round (max(map (float, latencies[xlator_order[0]].values())), -2) +    plt.yticks (np.arange(0, m + m*0.1, m/10)) +    plt.legend (map (lambda p: p[0], pieces), xlator_order) + +    plt.show () + +def fop_distribution (title, xlator_order): +    plt.ylabel ("Percentage of calls") +    plt.title ("FOP distribution for '%s'" % title) +    k = counts[xlator_order[0]].keys() +    k.sort () + +    N     = len (latencies[xlator_order[0]].keys()) +    ind   = np.arange(N) +    width = 0.35 + +    total = 0 +    top_xl = xlator_order[0] +    for op in k: +        total += int(counts[top_xl][op]) + +    heights = [] + +    for op in k: +        heights.append (float(counts[top_xl][op])/total * 100) + +    bars = plt.bar (ind, heights, width, color="red") + +    for bar in bars: +        height = bar.get_height() +        plt.text (bar.get_x()+bar.get_width()/2., 1.05*height, +                  "%d%%" % int(height)) + +    plt.xticks(ind+width/2., k) +    plt.yticks(np.arange (0, 110, 10)) + +    plt.show() + +def calc_workload_heights (xlator_order, scaling): +    workload_heights = map (lambda x: [], xlator_order) + +    top_xl = xlator_order[0] + +    N = len (xlator_order) +    for i in range (N): +        xl = xlator_order[i] + +        k = totals[xl].keys() +        k.sort() + +        if i == len (xlator_order) - 1: +            # bottom-most xlator +            workload_heights[i] = [float (totals[xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k] + +        else: +            next_xl = xlator_order[i+1] +            this_xl_time = [float(totals[xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k] +            next_xl_time = [float(totals[next_xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k] + +            workload_heights[i] = map (lambda x, y: (float (x) - float (y)), +                                       this_xl_time, next_xl_time) + +    return workload_heights + +def workload_profile(title, xlator_order): +    plt.ylabel ("Percentage of Total Time") +    plt.title ("Workload Profile for '%s'" % title) +    k = totals[xlator_order[0]].keys() +    k.sort () + +    N     = len(totals[xlator_order[0]].keys()) +    Nxl   = len(xlator_order) +    ind   = np.arange(N) +    width = 0.35 + +    total = 0 +    top_xl = xlator_order[0] +    for op in k: +        total += float(totals[top_xl][op]) + +    p_heights = [] + +    for op in k: +        p_heights.append (float(totals[top_xl][op])/total * 100) + +    heights = calc_workload_heights (xlator_order, p_heights) + +    pieces  = map (lambda x: [], xlator_order) +    bottoms = map (lambda x: [], xlator_order) + +    bottoms[Nxl-1] = map (lambda x: 0, totals[xlator_order[0]].keys()) + +    for i in range (Nxl-1): +        xl = xlator_order[i+1] +        k = totals[xl].keys() +        k.sort() + +        bottoms[i] = [float(totals[xl][key]) / float(totals[top_xl][key]) * p_heights[k.index(key)] for key in k] + +    for i in range(Nxl): +        pieces[i] = plt.bar (ind, heights[i], width, color=colors[i], +                             bottom=bottoms[i]) + +    for key in k: +        bar = pieces[Nxl-1][k.index(key)] +        plt.text (bar.get_x() + bar.get_width()/2., 1.05*p_heights[k.index(key)], +                  "%d%%" % int(p_heights[k.index(key)])) + +    plt.xticks(ind+width/2., k) +    plt.yticks(np.arange (0, 110, 10)) +    plt.legend (map (lambda p: p[0], pieces), xlator_order) + +    plt.show() + +def main (): +    parser = OptionParser(usage="usage: %prog [-l | -d | -w] -x <xlator order> <state dump file>") +    parser.add_option("-l", "--latency", dest="latency", action="store_true", +                      help="Produce latency profile") +    parser.add_option("-d", "--distribution", dest="distribution", action="store_true", +                      help="Produce distribution of FOPs") +    parser.add_option("-w", "--workload", dest="workload", action="store_true", +                      help="Produce workload profile") +    parser.add_option("-t", "--title", dest="title", help="Set the title of the graph") +    parser.add_option("-x", "--xlator-order", dest="xlator_order", help="Specify the order of xlators") + +    (options, args) = parser.parse_args() + +    if len(args) != 1: +        parser.error("Incorrect number of arguments") + +    if (options.xlator_order): +        xlator_order = options.xlator_order.split() +    else: +        print "xlator order must be specified" +        sys.exit(1) + +    collect_data(file (args[0], 'r')) + +    if (options.latency): +        latency_profile (options.title, xlator_order) + +    if (options.distribution): +        fop_distribution(options.title, xlator_order) + +    if (options.workload): +        workload_profile(options.title, xlator_order) + +main ()  | 
