1 files changed, 817 insertions, 0 deletions
diff --git a/extras/profiler/glusterfs-profiler b/extras/profiler/glusterfs-profiler
new file mode 100755
index 00000000000..aaafd088648
--- /dev/null
+++ b/extras/profiler/glusterfs-profiler
@@ -0,0 +1,817 @@
+#!/usr/bin/python3
+
+#  Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+#  This file is part of GlusterFS.
+#
+#  This file is licensed to you under your choice of the GNU Lesser
+#  General Public License, version 3 or any later version (LGPLv3 or
+#  later), or the GNU General Public License, version 2 (GPLv2), in all
+#  cases as published by the Free Software Foundation.
+
+
+# texttable - module for creating simple ASCII tables
+# Incorporated from texttable.py downloaded from
+# http://jefke.free.fr/stuff/python/texttable/texttable-0.7.0.tar.gz
+
+import sys
+import string
+
+try:
+    if sys.version >= '2.3':
+        import textwrap
+    elif sys.version >= '2.2':
+        from optparse import textwrap
+    else:
+        from optik import textwrap
+except ImportError:
+    sys.stderr.write("Can't import textwrap module!\n")
+    raise
+
+try:
+    True, False
+except NameError:
+    (True, False) = (1, 0)
+
+def len(iterable):
+    """Redefining len here so it will be able to work with non-ASCII characters
+    """
+    if not isinstance(iterable, str):
+        return iterable.__len__()
+    
+    try:
+        return len(unicode(iterable, 'utf'))
+    except:
+        return iterable.__len__()
+
+class ArraySizeError(Exception):
+    """Exception raised when specified rows don't fit the required size
+    """
+
+    def __init__(self, msg):
+        self.msg = msg
+        Exception.__init__(self, msg, '')
+
+    def __str__(self):
+        return self.msg
+
+class Texttable:
+
+    BORDER = 1
+    HEADER = 1 << 1
+    HLINES = 1 << 2
+    VLINES = 1 << 3
+
+    def __init__(self, max_width=80):
+        """Constructor
+
+        - max_width is an integer, specifying the maximum width of the table
+        - if set to 0, size is unlimited, therefore cells won't be wrapped
+        """
+
+        if max_width <= 0:
+            max_width = False
+        self._max_width = max_width
+        self._deco = Texttable.VLINES | Texttable.HLINES | Texttable.BORDER | \
+            Texttable.HEADER
+        self.set_chars(['-', '|', '+', '='])
+        self.reset()
+
+    def reset(self):
+        """Reset the instance
+
+        - reset rows and header
+        """
+
+        self._hline_string = None
+        self._row_size = None
+        self._header = []
+        self._rows = []
+
+    def header(self, array):
+        """Specify the header of the table
+        """
+
+        self._check_row_size(array)
+        self._header = map(str, array)
+
+    def add_row(self, array):
+        """Add a row in the rows stack
+
+        - cells can contain newlines and tabs
+        """
+
+        self._check_row_size(array)
+        self._rows.append(map(str, array))
+
+    def add_rows(self, rows, header=True):
+        """Add several rows in the rows stack
+
+        - The 'rows' argument can be either an iterator returning arrays,
+          or a by-dimensional array
+        - 'header' specifies if the first row should be used as the header
+          of the table
+        """
+
+        # nb: don't use 'iter' on by-dimensional arrays, to get a 
+        #     usable code for python 2.1
+        if header:
+            if hasattr(rows, '__iter__') and hasattr(rows, 'next'):
+                self.header(rows.next())
+            else:
+                self.header(rows[0])
+                rows = rows[1:]
+        for row in rows:
+            self.add_row(row)
+
+    def set_chars(self, array):
+        """Set the characters used to draw lines between rows and columns
+
+        - the array should contain 4 fields:
+
+            [horizontal, vertical, corner, header]
+
+        - default is set to:
+
+            ['-', '|', '+', '=']
+        """
+
+        if len(array) != 4:
+            raise ArraySizeError, "array should contain 4 characters"
+        array = [ x[:1] for x in [ str(s) for s in array ] ]
+        (self._char_horiz, self._char_vert,
+            self._char_corner, self._char_header) = array
+
+    def set_deco(self, deco):
+        """Set the table decoration
+
+        - 'deco' can be a combinaison of:
+
+            Texttable.BORDER: Border around the table
+            Texttable.HEADER: Horizontal line below the header
+            Texttable.HLINES: Horizontal lines between rows
+            Texttable.VLINES: Vertical lines between columns
+
+           All of them are enabled by default
+
+        - example:
+
+            Texttable.BORDER | Texttable.HEADER
+        """
+
+        self._deco = deco
+
+    def set_cols_align(self, array):
+        """Set the desired columns alignment
+
+        - the elements of the array should be either "l", "c" or "r":
+
+            * "l": column flushed left
+            * "c": column centered
+            * "r": column flushed right
+        """
+
+        self._check_row_size(array)
+        self._align = array
+
+    def set_cols_valign(self, array):
+        """Set the desired columns vertical alignment
+
+        - the elements of the array should be either "t", "m" or "b":
+
+            * "t": column aligned on the top of the cell
+            * "m": column aligned on the middle of the cell
+            * "b": column aligned on the bottom of the cell
+        """
+
+        self._check_row_size(array)
+        self._valign = array
+
+    def set_cols_width(self, array):
+        """Set the desired columns width
+
+        - the elements of the array should be integers, specifying the
+          width of each column. For example:
+
+                [10, 20, 5]
+        """
+
+        self._check_row_size(array)
+        try:
+            array = map(int, array)
+            if reduce(min, array) <= 0:
+                raise ValueError
+        except ValueError:
+            sys.stderr.write("Wrong argument in column width specification\n")
+            raise
+        self._width = array
+
+    def draw(self):
+        """Draw the table
+
+        - the table is returned as a whole string
+        """
+
+        if not self._header and not self._rows:
+            return
+        self._compute_cols_width()
+        self._check_align()
+        out = ""
+        if self._has_border():
+            out += self._hline()
+        if self._header:
+            out += self._draw_line(self._header, isheader=True)
+            if self._has_header():
+                out += self._hline_header()
+        length = 0
+        for row in self._rows:
+            length += 1
+            out += self._draw_line(row)
+            if self._has_hlines() and length < len(self._rows):
+                out += self._hline()
+        if self._has_border():
+            out += self._hline()
+        return out[:-1]
+
+    def _check_row_size(self, array):
+        """Check that the specified array fits the previous rows size
+        """
+
+        if not self._row_size:
+            self._row_size = len(array)
+        elif self._row_size != len(array):
+            raise ArraySizeError, "array should contain %d elements" \
+                % self._row_size
+
+    def _has_vlines(self):
+        """Return a boolean, if vlines are required or not
+        """
+
+        return self._deco & Texttable.VLINES > 0
+
+    def _has_hlines(self):
+        """Return a boolean, if hlines are required or not
+        """
+
+        return self._deco & Texttable.HLINES > 0
+
+    def _has_border(self):
+        """Return a boolean, if border is required or not
+        """
+
+        return self._deco & Texttable.BORDER > 0
+
+    def _has_header(self):
+        """Return a boolean, if header line is required or not
+        """
+
+        return self._deco & Texttable.HEADER > 0
+
+    def _hline_header(self):
+        """Print header's horizontal line
+        """
+
+        return self._build_hline(True)
+
+    def _hline(self):
+        """Print an horizontal line
+        """
+
+        if not self._hline_string:
+            self._hline_string = self._build_hline()
+        return self._hline_string
+
+    def _build_hline(self, is_header=False):
+        """Return a string used to separated rows or separate header from
+        rows
+        """
+        horiz = self._char_horiz
+        if (is_header):
+            horiz = self._char_header
+        # compute cell separator
+        s = "%s%s%s" % (horiz, [horiz, self._char_corner][self._has_vlines()],
+            horiz)
+        # build the line
+        l = s.join([horiz*n for n in self._width])
+        # add border if needed
+        if self._has_border():
+            l = "%s%s%s%s%s\n" % (self._char_corner, horiz, l, horiz,
+                self._char_corner)
+        else:
+            l += "\n"
+        return l
+
+    def _len_cell(self, cell):
+        """Return the width of the cell
+
+        Special characters are taken into account to return the width of the
+        cell, such like newlines and tabs
+        """
+
+        cell_lines = cell.split('\n')
+        maxi = 0
+        for line in cell_lines:
+            length = 0
+            parts = line.split('\t')
+            for part, i in zip(parts, range(1, len(parts) + 1)):
+                length = length + len(part)
+                if i < len(parts):
+                    length = (length/8 + 1)*8
+            maxi = max(maxi, length)
+        return maxi
+
+    def _compute_cols_width(self):
+        """Return an array with the width of each column
+
+        If a specific width has been specified, exit. If the total of the
+        columns width exceed the table desired width, another width will be
+        computed to fit, and cells will be wrapped.
+        """
+
+        if hasattr(self, "_width"):
+            return
+        maxi = []
+        if self._header:
+            maxi = [ self._len_cell(x) for x in self._header ]
+        for row in self._rows:
+            for cell,i in zip(row, range(len(row))):
+                try:
+                    maxi[i] = max(maxi[i], self._len_cell(cell))
+                except (TypeError, IndexError):
+                    maxi.append(self._len_cell(cell))
+        items = len(maxi)
+        length = reduce(lambda x,y: x+y, maxi)
+        if self._max_width and length + items*3 + 1 > self._max_width:
+            maxi = [(self._max_width - items*3 -1) / items \
+                for n in range(items)]
+        self._width = maxi
+
+    def _check_align(self):
+        """Check if alignment has been specified, set default one if not
+        """
+
+        if not hasattr(self, "_align"):
+            self._align = ["l"]*self._row_size
+        if not hasattr(self, "_valign"):
+            self._valign = ["t"]*self._row_size
+
+    def _draw_line(self, line, isheader=False):
+        """Draw a line
+
+        Loop over a single cell length, over all the cells
+        """
+
+        line = self._splitit(line, isheader)
+        space = " "
+        out  = ""
+        for i in range(len(line[0])):
+            if self._has_border():
+                out += "%s " % self._char_vert
+            length = 0
+            for cell, width, align in zip(line, self._width, self._align):
+                length += 1
+                cell_line = cell[i]
+                fill = width - len(cell_line)
+                if isheader:
+                    align = "c"
+                if align == "r":
+                    out += "%s " % (fill * space + cell_line)
+                elif align == "c":
+                    out += "%s " % (fill/2 * space + cell_line \
+                            + (fill/2 + fill%2) * space)
+                else:
+                    out += "%s " % (cell_line + fill * space)
+                if length < len(line):
+                    out += "%s " % [space, self._char_vert][self._has_vlines()]
+            out += "%s\n" % ['', self._char_vert][self._has_border()]
+        return out
+
+    def _splitit(self, line, isheader):
+        """Split each element of line to fit the column width
+
+        Each element is turned into a list, result of the wrapping of the
+        string to the desired width
+        """
+
+        line_wrapped = []
+        for cell, width in zip(line, self._width):
+            array = []
+            for c in cell.split('\n'):
+                array.extend(textwrap.wrap(unicode(c, 'utf'), width))
+            line_wrapped.append(array)
+        max_cell_lines = reduce(max, map(len, line_wrapped))
+        for cell, valign in zip(line_wrapped, self._valign):
+            if isheader:
+                valign = "t"
+            if valign == "m":
+                missing = max_cell_lines - len(cell)
+                cell[:0] = [""] * (missing / 2)
+                cell.extend([""] * (missing / 2 + missing % 2))
+            elif valign == "b":
+                cell[:0] = [""] * (max_cell_lines - len(cell))
+            else:
+                cell.extend([""] * (max_cell_lines - len(cell)))
+        return line_wrapped
+
+
+#    Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+#    This file is part of GlusterFS.
+
+#    GlusterFS is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published
+#    by the Free Software Foundation; either version 3 of the License,
+#    or (at your option) any later version.
+
+#    GlusterFS is distributed in the hope that it will be useful, but
+#    WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#    General Public License for more details.
+
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see
+#    <http://www.gnu.org/licenses/>.
+
+graph_available = True
+
+try:
+    import numpy as np
+    import matplotlib.pyplot as plt
+except ImportError:
+    graph_available = False
+
+import re
+import sys
+
+from optparse import OptionParser
+
+# Global dict-of-dict holding the latency data
+# latency[xlator-name][op-name]
+
+latencies = {}
+counts = {}
+totals = {}
+
+def collect_data (f):
+    """Collect latency data from the file object f and store it in
+    the global variable @latencies"""
+
+    # example dump file line:
+    # fuse.latency.TRUNCATE=3147.000,4
+
+    for line in f:
+        m = re.search ("(\w+)\.\w+.(\w+)=(\w+\.\w+),(\w+),(\w+.\w+)", line)
+        if m and float(m.group(3)) != 0:
+            xlator = m.group(1)
+            op     = m.group(2)
+            time   = m.group(3)
+            count  = m.group(4)
+            total  = m.group(5)
+
+            if not xlator in latencies.keys():
+                latencies[xlator] = dict()
+
+            if not xlator in counts.keys():
+                counts[xlator] = dict()
+
+            if not xlator in totals.keys():
+                totals[xlator] = dict()
+
+            latencies[xlator][op] = time
+            counts[xlator][op]    = count
+            totals[xlator][op]    = total
+
+
+def calc_latency_heights (xlator_order):
+    heights = map (lambda x: [], xlator_order)
+
+    N = len (xlator_order)
+    for i in range (N):
+        xl = xlator_order[i]
+
+        k = latencies[xl].keys()
+        k.sort()
+
+        if i == len (xlator_order) - 1:
+            # bottom-most xlator
+            heights[i] = [float (latencies[xl][key]) for key in k]
+
+        else:
+            next_xl = xlator_order[i+1]
+            this_xl_time = [latencies[xl][key] for key in k]
+            next_xl_time = [latencies[next_xl][key] for key in k]
+
+            heights[i] = map (lambda x, y: float (x) - float (y),
+                              this_xl_time, next_xl_time)
+    return heights
+
+# have sufficient number of colors
+colors = ["violet", "blue", "green", "yellow", "orange", "red"]
+
+def latency_profile (title, xlator_order, mode):
+    heights = calc_latency_heights (xlator_order)
+
+    N     = len (latencies[xlator_order[0]].keys())
+    Nxl   = len (xlator_order)
+    ind   = np.arange (N)
+    width = 0.35
+
+    pieces  = map (lambda x: [], xlator_order)
+    bottoms = map (lambda x: [], xlator_order)
+
+    bottoms[Nxl-1] = map (lambda x: 0, latencies[xlator_order[0]].keys())
+
+    k = latencies[xlator_order[0]].keys()
+    k.sort()
+
+    for i in range (Nxl-1):
+        xl = xlator_order[i+1]
+        bottoms[i] = [float(latencies[xl][key]) for key in k]
+
+    if mode == 'text':
+        print "\n%sLatency profile for %s\n" % (' '*20, title)
+        print "Average latency (microseconds):\n"
+
+        table = Texttable()
+
+        table.set_cols_align(["l", "r"] + ["r"] * len(xlator_order))
+        rows = []
+
+        header = ['OP', 'OP Average (us)'] + xlator_order
+        rows = []
+
+        for op in k:
+            sum = reduce (lambda x, y: x + y, [heights[xlator_order.index(xl)][k.index(op)] for xl in xlator_order],
+                          0)
+
+            row = [op]
+            row += ["%5.2f" % sum]
+
+            for xl in xlator_order:
+                op_index = k.index(op)
+                row += ["%5.2f" % (heights[xlator_order.index(xl)][op_index])]
+
+            rows.append(row)
+
+        def row_sort(r1, r2):
+            v1 = float(r1[1])
+            v2 = float(r2[1])
+
+            if v1 < v2:
+                return -1
+            elif v1 == v2:
+                return 0
+            else:
+                return 1
+
+        rows.sort(row_sort, reverse=True)
+        table.add_rows([header] + rows)
+        print table.draw()
+
+    elif mode == 'graph':
+        for i in range(Nxl):
+            pieces[i] = plt.bar (ind, heights[i], width, color=colors[i],
+                                 bottom=bottoms[i])
+
+            plt.ylabel ("Average Latency (microseconds)")
+            plt.title ("Latency Profile for '%s'" % title)
+            k = latencies[xlator_order[0]].keys()
+            k.sort ()
+            plt.xticks (ind+width/2., k)
+
+            m = round (max(map (float, latencies[xlator_order[0]].values())), -2)
+            plt.yticks (np.arange(0, m + m*0.1, m/10))
+            plt.legend (map (lambda p: p[0], pieces), xlator_order)
+
+            plt.show ()
+    else:
+        print "Unknown mode specified!"
+        sys.exit(1)
+
+
+def fop_distribution (title, xlator_order, mode):
+    plt.ylabel ("Percentage of calls")
+    plt.title ("FOP distribution for '%s'" % title)
+    k = counts[xlator_order[0]].keys()
+    k.sort ()
+
+    N     = len (latencies[xlator_order[0]].keys())
+    ind   = np.arange(N)
+    width = 0.35
+
+    total = 0
+    top_xl = xlator_order[0]
+    for op in k:
+        total += int(counts[top_xl][op])
+
+    heights = []
+
+    for op in k:
+        heights.append (float(counts[top_xl][op])/total * 100)
+
+    if mode == 'text':
+        print "\n%sFOP distribution for %s\n" % (' '*20, title)
+        print "Total number of calls: %d\n" % total
+
+        table = Texttable()
+
+        table.set_cols_align(["l", "r", "r"])
+
+        rows = []
+        header = ["OP", "% of Calls", "Count"]
+
+        for op in k:
+            row = [op, "%5.2f" % (float(counts[top_xl][op])/total * 100), counts[top_xl][op]]
+            rows.append(row)
+
+        def row_sort(r1, r2):
+            v1 = float(r1[1])
+            v2 = float(r2[1])
+
+            if v1 < v2:
+                return -1
+            elif v1 == v2:
+                return 0
+            else:
+                return 1
+
+        rows.sort(row_sort, reverse=True)
+        table.add_rows([header] + rows)
+        print table.draw()
+
+    elif mode == 'graph':
+        bars = plt.bar (ind, heights, width, color="red")
+
+        for bar in bars:
+            height = bar.get_height()
+            plt.text (bar.get_x()+bar.get_width()/2., 1.05*height,
+                      "%d%%" % int(height))
+
+            plt.xticks(ind+width/2., k)
+            plt.yticks(np.arange (0, 110, 10))
+
+            plt.show()
+    else:
+        print "mode not specified!"
+        sys.exit(1)
+
+
+def calc_workload_heights (xlator_order, scaling):
+    workload_heights = map (lambda x: [], xlator_order)
+
+    top_xl = xlator_order[0]
+
+    N = len (xlator_order)
+    for i in range (N):
+        xl = xlator_order[i]
+
+        k = totals[xl].keys()
+        k.sort()
+
+        if i == len (xlator_order) - 1:
+            # bottom-most xlator
+            workload_heights[i] = [float (totals[xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k]
+
+        else:
+            next_xl = xlator_order[i+1]
+            this_xl_time = [float(totals[xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k]
+            next_xl_time = [float(totals[next_xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k]
+
+            workload_heights[i] = map (lambda x, y: (float (x) - float (y)),
+                                       this_xl_time, next_xl_time)
+
+    return workload_heights
+
+def workload_profile(title, xlator_order, mode):
+    plt.ylabel ("Percentage of Total Time")
+    plt.title ("Workload Profile for '%s'" % title)
+    k = totals[xlator_order[0]].keys()
+    k.sort ()
+
+    N     = len(totals[xlator_order[0]].keys())
+    Nxl   = len(xlator_order)
+    ind   = np.arange(N)
+    width = 0.35
+
+    total = 0
+    top_xl = xlator_order[0]
+    for op in k:
+        total += float(totals[top_xl][op])
+
+    p_heights = []
+
+    for op in k:
+        p_heights.append (float(totals[top_xl][op])/total * 100)
+
+    heights = calc_workload_heights (xlator_order, p_heights)
+
+    pieces  = map (lambda x: [], xlator_order)
+    bottoms = map (lambda x: [], xlator_order)
+
+    bottoms[Nxl-1] = map (lambda x: 0, totals[xlator_order[0]].keys())
+
+    for i in range (Nxl-1):
+        xl = xlator_order[i+1]
+        k = totals[xl].keys()
+        k.sort()
+
+        bottoms[i] = [float(totals[xl][key]) / float(totals[top_xl][key]) * p_heights[k.index(key)] for key in k]
+
+    if mode == 'text':
+        print "\n%sWorkload profile for %s\n" % (' '*20, title)
+        print "Total Time: %d microseconds = %.1f seconds = %.1f minutes\n" % (total, total / 1000000.0, total / 6000000.0)
+
+        table = Texttable()
+        table.set_cols_align(["l", "r"] + ["r"] * len(xlator_order))
+        rows = []
+
+        header = ['OP', 'OP Total (%)'] + xlator_order
+        rows = []
+
+        for op in k:
+            sum = reduce (lambda x, y: x + y, [heights[xlator_order.index(xl)][k.index(op)] for xl in xlator_order],
+                          0)
+            row = [op]
+            row += ["%5.2f" % sum]
+
+            for xl in xlator_order:
+                op_index = k.index(op)
+                row += ["%5.2f" % heights[xlator_order.index(xl)][op_index]]
+
+            rows.append(row)
+
+        def row_sort(r1, r2):
+            v1 = float(r1[1])
+            v2 = float(r2[1])
+
+            if v1 < v2:
+                return -1
+            elif v1 == v2:
+                return 0
+            else:
+                return 1
+
+        rows.sort(row_sort, reverse=True)
+        table.add_rows([header] + rows)
+        print table.draw()
+
+    elif mode == 'graph':
+        for i in range(Nxl):
+            pieces[i] = plt.bar (ind, heights[i], width, color=colors[i],
+                                 bottom=bottoms[i])
+
+        for key in k:
+            bar = pieces[Nxl-1][k.index(key)]
+            plt.text (bar.get_x() + bar.get_width()/2., 1.05*p_heights[k.index(key)],
+                      "%d%%" % int(p_heights[k.index(key)]))
+
+        plt.xticks(ind+width/2., k)
+        plt.yticks(np.arange (0, 110, 10))
+        plt.legend (map (lambda p: p[0], pieces), xlator_order)
+
+        plt.show()
+    else:
+        print "Unknown mode specified!"
+        sys.exit(1)
+
+
+def main ():
+    parser = OptionParser(usage="usage: %prog [-l | -d | -w] -x <xlator order> <state dump file>")
+    parser.add_option("-l", "--latency", dest="latency", action="store_true",
+                      help="Produce latency profile")
+    parser.add_option("-d", "--distribution", dest="distribution", action="store_true",
+                      help="Produce distribution of FOPs")
+    parser.add_option("-w", "--workload", dest="workload", action="store_true",
+                      help="Produce workload profile")
+    parser.add_option("-t", "--title", dest="title", help="Set the title of the graph")
+    parser.add_option("-x", "--xlator-order", dest="xlator_order", help="Specify the order of xlators")
+    parser.add_option("-m", "--mode", dest="mode", help="Output format, can be text[default] or graph")
+
+    (options, args) = parser.parse_args()
+
+    if len(args) != 1:
+        parser.error("Incorrect number of arguments")
+
+    if (options.xlator_order):
+        xlator_order = options.xlator_order.split()
+    else:
+        print "xlator order must be specified"
+        sys.exit(1)
+
+    collect_data(file (args[0], 'r'))
+
+    mode = 'text'
+    if (options.mode):
+        mode = options.mode
+        if options.mode == 'graph' and graph_available == False:
+            print "matplotlib not available, falling back to text mode"
+            mode = 'text'
+
+    if (options.latency):
+        latency_profile (options.title, xlator_order, mode)
+
+    if (options.distribution):
+        fop_distribution(options.title, xlator_order, mode)
+
+    if (options.workload):
+        workload_profile(options.title, xlator_order, mode)
+
+main ()