[PATCH] IOzone test: Introduce postprocessing module v2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This module contains code to postprocess IOzone data
in a convenient way so we can generate performance graphs
and condensed data. The graph generation part depends
on gnuplot, but if the utility is not present,
functionality will gracefully degrade.

Use the postprocessing module introduced on the previous
patch, use it to analyze results and write performance
graphs and performance tables.

Also, in order for other tests to be able to use the
postprocessing code, added the right __init__.py
files, so a simple

from autotest_lib.client.tests.iozone import postprocessing

will work

Note: Martin, as patch will ignore and not create the
zero-sized files (high time we move to git), if the changes
look good to you I can commit them all at once, making sure
all files are created.

Signed-off-by: Lucas Meneghel Rodrigues <lmr@xxxxxxxxxx>
---
 client/tests/iozone/common.py         |    8 +
 client/tests/iozone/iozone.py         |   25 ++-
 client/tests/iozone/postprocessing.py |  487 +++++++++++++++++++++++++++++++++
 3 files changed, 515 insertions(+), 5 deletions(-)
 create mode 100644 client/tests/__init__.py
 create mode 100644 client/tests/iozone/__init__.py
 create mode 100644 client/tests/iozone/common.py
 create mode 100755 client/tests/iozone/postprocessing.py

diff --git a/client/tests/__init__.py b/client/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/client/tests/iozone/__init__.py b/client/tests/iozone/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/client/tests/iozone/common.py b/client/tests/iozone/common.py
new file mode 100644
index 0000000..ce78b85
--- /dev/null
+++ b/client/tests/iozone/common.py
@@ -0,0 +1,8 @@
+import os, sys
+dirname = os.path.dirname(sys.modules[__name__].__file__)
+client_dir = os.path.abspath(os.path.join(dirname, "..", ".."))
+sys.path.insert(0, client_dir)
+import setup_modules
+sys.path.pop(0)
+setup_modules.setup(base_path=client_dir,
+                    root_module_name="autotest_lib.client")
diff --git a/client/tests/iozone/iozone.py b/client/tests/iozone/iozone.py
index fa3fba4..03c2c04 100755
--- a/client/tests/iozone/iozone.py
+++ b/client/tests/iozone/iozone.py
@@ -1,5 +1,6 @@
 import os, re
 from autotest_lib.client.bin import test, utils
+import postprocessing
 
 
 class iozone(test.test):
@@ -63,17 +64,19 @@ class iozone(test.test):
         self.results = utils.system_output('%s %s' % (cmd, args))
         self.auto_mode = ("-a" in args)
 
-        path = os.path.join(self.resultsdir, 'raw_output_%s' % self.iteration)
-        raw_output_file = open(path, 'w')
-        raw_output_file.write(self.results)
-        raw_output_file.close()
+        self.results_path = os.path.join(self.resultsdir,
+                                         'raw_output_%s' % self.iteration)
+        self.analysisdir = os.path.join(self.resultsdir,
+                                        'analysis_%s' % self.iteration)
+
+        utils.open_write_close(self.results_path, self.results)
 
 
     def __get_section_name(self, desc):
         return desc.strip().replace(' ', '_')
 
 
-    def postprocess_iteration(self):
+    def generate_keyval(self):
         keylist = {}
 
         if self.auto_mode:
@@ -150,3 +153,15 @@ class iozone(test.test):
                             keylist[key_name] = result
 
         self.write_perf_keyval(keylist)
+
+
+    def postprocess_iteration(self):
+        self.generate_keyval()
+        if self.auto_mode:
+            a = postprocessing.IOzoneAnalyzer(list_files=[self.results_path],
+                                              output_dir=self.analysisdir)
+            a.analyze()
+            p = postprocessing.IOzonePlotter(results_file=self.results_path,
+                                             output_dir=self.analysisdir)
+            p.plot_all()
+
diff --git a/client/tests/iozone/postprocessing.py b/client/tests/iozone/postprocessing.py
new file mode 100755
index 0000000..c995aea
--- /dev/null
+++ b/client/tests/iozone/postprocessing.py
@@ -0,0 +1,487 @@
+#!/usr/bin/python
+"""
+Postprocessing module for IOzone. It is capable to pick results from an
+IOzone run, calculate the geometric mean for all throughput results for
+a given file size or record size, and then generate a series of 2D and 3D
+graphs. The graph generation functionality depends on gnuplot, and if it
+is not present, functionality degrates gracefully.
+
+@copyright: Red Hat 2010
+"""
+import os, sys, optparse, logging, math, time
+import common
+from autotest_lib.client.common_lib import logging_config, logging_manager
+from autotest_lib.client.common_lib import error
+from autotest_lib.client.bin import utils, os_dep
+
+
+_LABELS = ['file_size', 'record_size', 'write', 'rewrite', 'read', 'reread',
+           'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread',
+           'fwrite', 'frewrite', 'fread', 'freread']
+
+
+def unique(list):
+    """
+    Return a list of the elements in list, but without duplicates.
+
+    @param list: List with values.
+    @return: List with non duplicate elements.
+    """
+    n = len(list)
+    if n == 0:
+        return []
+    u = {}
+    try:
+        for x in list:
+            u[x] = 1
+    except TypeError:
+        return None
+    else:
+        return u.keys()
+
+
+def geometric_mean(values):
+    """
+    Evaluates the geometric mean for a list of numeric values.
+
+    @param values: List with values.
+    @return: Single value representing the geometric mean for the list values.
+    @see: http://en.wikipedia.org/wiki/Geometric_mean
+    """
+    try:
+        values = [int(value) for value in values]
+    except ValueError:
+        return None
+    product = 1
+    n = len(values)
+    if n == 0:
+        return None
+    return math.exp(sum([math.log(x) for x in values])/n)
+
+
+def compare_matrices(matrix1, matrix2, treshold=0.05):
+    """
+    Compare 2 matrices nxm and return a matrix nxm with comparison data
+
+    @param matrix1: Reference Matrix with numeric data
+    @param matrix2: Matrix that will be compared
+    @param treshold: Any difference bigger than this percent treshold will be
+            reported.
+    """
+    improvements = 0
+    regressions = 0
+    same = 0
+    comparison_matrix = []
+
+    new_matrix = []
+    for line1, line2 in zip(matrix1, matrix2):
+        new_line = []
+        for element1, element2 in zip(line1, line2):
+            ratio = float(element2) / float(element1)
+            if ratio < (1 - treshold):
+                regressions += 1
+                new_line.append((100 * ratio - 1) - 100)
+            elif ratio > (1 + treshold):
+                improvements += 1
+                new_line.append("+" + str((100 * ratio - 1) - 100))
+            else:
+                same + 1
+                if line1.index(element1) == 0:
+                    new_line.append(element1)
+                else:
+                    new_line.append(".")
+        new_matrix.append(new_line)
+
+    total = improvements + regressions + same
+
+    return (new_matrix, improvements, regressions, total)
+
+
+class IOzoneAnalyzer(object):
+    """
+    Analyze an unprocessed IOzone file, and generate the following types of
+    report:
+
+    * Summary of throughput for all file and record sizes combined
+    * Summary of throughput for all file sizes
+    * Summary of throughput for all record sizes
+
+    If more than one file is provided to the analyzer object, a comparison
+    between the two runs is made, searching for regressions in performance.
+    """
+    def __init__(self, list_files, output_dir):
+        self.list_files = list_files
+        if not os.path.isdir(output_dir):
+            os.makedirs(output_dir)
+        self.output_dir = output_dir
+        logging.info("Results will be stored in %s", output_dir)
+
+
+    def average_performance(self, results, size=None):
+        """
+        Flattens a list containing performance results.
+
+        @param results: List of n lists containing data from performance runs.
+        @param size: Numerical value of a size (say, file_size) that was used
+                to filter the original results list.
+        @return: List with 1 list containing average data from the performance
+                run.
+        """
+        average_line = []
+        if size is not None:
+            average_line.append(size)
+        for i in range(2, 15):
+            average = geometric_mean([line[i] for line in results]) / 1024.0
+            average = int(average)
+            average_line.append(average)
+        return average_line
+
+
+    def process_results(self, results, label=None):
+        """
+        Process a list of IOzone results according to label.
+
+        @label: IOzone column label that we'll use to filter and compute
+                geometric mean results, in practical term either 'file_size'
+                or 'record_size'.
+        @result: A list of n x m columns with original iozone results.
+        @return: A list of n-? x (m-1) columns with geometric averages for
+                values of each label (ex, average for all file_sizes).
+        """
+        performance = []
+        if label is not None:
+            index = _LABELS.index(label)
+            sizes = unique([line[index] for line in results])
+            sizes.sort()
+            for size in sizes:
+                r_results = [line for line in results if line[index] == size]
+                performance.append(self.average_performance(r_results, size))
+        else:
+            performance.append(self.average_performance(results))
+
+        return performance
+
+
+    def parse_file(self, file):
+        """
+        Parse an IOzone results file.
+
+        @param file: File object that will be parsed.
+        @return: Matrix containing IOzone results extracted from the file.
+        """
+        lines = []
+        for line in file.readlines():
+            fields = line.split()
+            if len(fields) != 15:
+                continue
+            try:
+                lines.append([int(i) for i in fields])
+            except ValueError:
+                continue
+        return lines
+
+
+    def report(self, overall_results, record_size_results, file_size_results):
+        """
+        Generates analysis data for IOZone run.
+
+        Generates a report to both logs (where it goes with nice headers) and
+        output files for further processing (graph generation).
+
+        @param overall_results: 1x15 Matrix containing IOzone results for all
+                file sizes
+        @param record_size_results: nx15 Matrix containing IOzone results for
+                each record size tested.
+        @param file_size_results: nx15 Matrix containing file size results
+                for each file size tested.
+        """
+        # Here we'll use the logging system to put the output of our analysis
+        # to files
+        logger = logging.getLogger()
+        formatter = logging.Formatter("")
+
+        logging.info("")
+        logging.info("TABLE:  SUMMARY of ALL FILE and RECORD SIZES                        Results in MB/sec")
+        logging.info("")
+        logging.info("FILE & RECORD  INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE")
+        logging.info("SIZES (KB)     WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
+        logging.info("-------------------------------------------------------------------------------------------------------------------")
+        for result_line in overall_results:
+            logging.info("ALL            %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
+        logging.info("")
+
+        logging.info("DRILLED DATA:")
+
+        logging.info("")
+        logging.info("TABLE:  RECORD Size against all FILE Sizes                          Results in MB/sec")
+        logging.info("")
+        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
+        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
+        logging.info("--------------------------------------------------------------------------------------------------------------")
+
+        foutput_path = os.path.join(self.output_dir, '2d-datasource-file')
+        if os.path.isfile(foutput_path):
+            os.unlink(foutput_path)
+        foutput = logging.FileHandler(foutput_path)
+        foutput.setFormatter(formatter)
+        logger.addHandler(foutput)
+        for result_line in record_size_results:
+            logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
+        logger.removeHandler(foutput)
+
+        logging.info("")
+
+        logging.info("")
+        logging.info("TABLE:  FILE Size against all RECORD Sizes                          Results in MB/sec")
+        logging.info("")
+        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
+        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
+        logging.info("--------------------------------------------------------------------------------------------------------------")
+
+        routput_path = os.path.join(self.output_dir, '2d-datasource-record')
+        if os.path.isfile(routput_path):
+            os.unlink(routput_path)
+        routput = logging.FileHandler(routput_path)
+        routput.setFormatter(formatter)
+        logger.addHandler(routput)
+        for result_line in file_size_results:
+            logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
+        logger.removeHandler(routput)
+
+        logging.info("")
+
+
+    def report_comparison(self, record, file):
+        """
+        Generates comparison data for 2 IOZone runs.
+
+        It compares 2 sets of nxm results and outputs a table with differences.
+        If a difference higher or smaller than 5% is found, a warning is
+        triggered.
+
+        @param record: Tuple with 4 elements containing results for record size.
+        @param file: Tuple with 4 elements containing results for file size.
+        """
+        (record_size, record_improvements, record_regressions,
+         record_total) = record
+        (file_size, file_improvements, file_regressions,
+         file_total) = file
+        logging.info("ANALYSIS of DRILLED DATA:")
+
+        logging.info("")
+        logging.info("TABLE:  RECsize Difference between runs                            Results are % DIFF")
+        logging.info("")
+        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
+        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
+        logging.info("--------------------------------------------------------------------------------------------------------------")
+        for result_line in record_size:
+            logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
+        logging.info("REGRESSIONS: %d (%.2f%%)    Improvements: %d (%.2f%%)",
+                     record_regressions,
+                     (100 * record_regressions/float(record_total)),
+                     record_improvements,
+                     (100 * record_improvements/float(record_total)))
+        logging.info("")
+
+        logging.info("")
+        logging.info("TABLE:  FILEsize Difference between runs                           Results are % DIFF")
+        logging.info("")
+        logging.info("RECORD    INIT    RE              RE    RANDOM  RANDOM  BACKWD   RECRE  STRIDE    F       FRE     F       FRE ")
+        logging.info("SIZE (KB) WRITE   WRITE   READ    READ    READ   WRITE    READ   WRITE    READ    WRITE   WRITE   READ    READ")
+        logging.info("--------------------------------------------------------------------------------------------------------------")
+        for result_line in file_size:
+            logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
+        logging.info("REGRESSIONS: %d (%.2f%%)    Improvements: %d (%.2f%%)",
+                     file_regressions,
+                     (100 * file_regressions/float(file_total)),
+                     file_improvements,
+                     (100 * file_improvements/float(file_total)))
+        logging.info("")
+
+
+    def analyze(self):
+        """
+        Analyzes and eventually compares sets of IOzone data.
+        """
+        overall = []
+        record_size = []
+        file_size = []
+        for path in self.list_files:
+            file = open(path, 'r')
+            logging.info('FILE: %s', path)
+
+            results = self.parse_file(file)
+
+            overall_results = self.process_results(results)
+            record_size_results = self.process_results(results, 'record_size')
+            file_size_results = self.process_results(results, 'file_size')
+            self.report(overall_results, record_size_results, file_size_results)
+
+            if len(self.list_files) == 2:
+                overall.append(overall_results)
+                record_size.append(record_size_results)
+                file_size.append(file_size_results)
+
+        if len(self.list_files) == 2:
+            record_comparison = compare_matrices(*record_size)
+            file_comparison = compare_matrices(*file_size)
+            self.report_comparison(record_comparison, file_comparison)
+
+
+class IOzonePlotter(object):
+    """
+    Plots graphs based on the results of an IOzone run.
+
+    Plots graphs based on the results of an IOzone run. Uses gnuplot to
+    generate the graphs.
+    """
+    def __init__(self, results_file, output_dir):
+        self.active = True
+        try:
+            self.gnuplot = os_dep.command("gnuplot")
+        except:
+            logging.error("Command gnuplot not found, disabling graph "
+                          "generation")
+            self.active = False
+
+        if not os.path.isdir(output_dir):
+            os.makedirs(output_dir)
+        self.output_dir = output_dir
+
+        if not os.path.isfile(results_file):
+            logging.error("Invalid file %s provided, disabling graph "
+                          "generation", results_file)
+            self.active = False
+            self.results_file = None
+        else:
+            self.results_file = results_file
+            self.generate_data_source()
+
+
+    def generate_data_source(self):
+        """
+        Creates data file without headers for gnuplot consumption.
+        """
+        results_file = open(self.results_file, 'r')
+        self.datasource = os.path.join(self.output_dir, '3d-datasource')
+        datasource = open(self.datasource, 'w')
+        for line in results_file.readlines():
+            fields = line.split()
+            if len(fields) != 15:
+                continue
+            try:
+                values = [int(i) for i in fields]
+                datasource.write(line)
+            except ValueError:
+                continue
+        datasource.close()
+
+
+    def plot_2d_graphs(self):
+        """
+        For each one of the throughput parameters, generate a set of gnuplot
+        commands that will create a parametric surface with file size vs.
+        record size vs. throughput.
+        """
+        datasource_2d = os.path.join(self.output_dir, '2d-datasource-file')
+        for index, label in zip(range(1, 14), _LABELS[2:]):
+            commands_path = os.path.join(self.output_dir, '2d-%s.do' % label)
+            commands = ""
+            commands += "set title 'Iozone performance: %s'\n" % label
+            commands += "set logscale x\n"
+            commands += "set xlabel 'File size (KB)'\n"
+            commands += "set ylabel 'Througput (MB/s)'\n"
+            commands += "set terminal png small size 450 350\n"
+            commands += "set output '%s'\n" % os.path.join(self.output_dir,
+                                                           '2d-%s.png' % label)
+            commands += ("plot '%s' using 1:%s title '%s' with lines \n" %
+                         (datasource_2d, index, label))
+            commands_file = open(commands_path, 'w')
+            commands_file.write(commands)
+            commands_file.close()
+            try:
+                utils.run("%s %s" % (self.gnuplot, commands_path))
+            except error.CmdError, e:
+                logging.error("Problem plotting from commands file %s: %s",
+                              commands_file, str(e))
+
+
+    def plot_3d_graphs(self):
+        """
+        For each one of the throughput parameters, generate a set of gnuplot
+        commands that will create a parametric surface with file size vs.
+        record size vs. throughput.
+        """
+        for index, label in zip(range(1, 14), _LABELS[2:]):
+            commands_path = os.path.join(self.output_dir, '%s.do' % label)
+            commands = ""
+            commands += "set title 'Iozone performance: %s'\n" % label
+            commands += "set grid lt 2 lw 1\n"
+            commands += "set surface\n"
+            commands += "set parametric\n"
+            commands += "set xtics\n"
+            commands += "set ytics\n"
+            commands += "set logscale x 2\n"
+            commands += "set logscale y 2\n"
+            commands += "set logscale z\n"
+            commands += "set xrange [2.**5:2.**24]\n"
+            commands += "set xlabel 'File size (KB)'\n"
+            commands += "set ylabel 'Record size (KB)'\n"
+            commands += "set zlabel 'Througput (KB/s)'\n"
+            commands += "set data style lines\n"
+            commands += "set dgrid3d 80,80, 3\n"
+            commands += "set terminal png small size 900 700\n"
+            commands += "set output '%s'\n" % os.path.join(self.output_dir,
+                                                           '%s.png' % label)
+            commands += ("splot '%s' using 1:2:%s title '%s'\n" %
+                         (self.datasource, index, label))
+            commands_file = open(commands_path, 'w')
+            commands_file.write(commands)
+            commands_file.close()
+            try:
+                utils.run("%s %s" % (self.gnuplot, commands_path))
+            except error.CmdError, e:
+                logging.error("Problem plotting from commands file %s: %s",
+                              commands_file, str(e))
+
+
+    def plot_all(self):
+        """
+        Plot all graphs that are to be plotted, provided that we have gnuplot.
+        """
+        if self.active:
+            self.plot_2d_graphs()
+            self.plot_3d_graphs()
+
+
+class AnalyzerLoggingConfig(logging_config.LoggingConfig):
+    def configure_logging(self, results_dir=None, verbose=False):
+        super(AnalyzerLoggingConfig, self).configure_logging(use_console=True,
+                                                        verbose=verbose)
+
+
+if __name__ == "__main__":
+    parser = optparse.OptionParser("usage: %prog [options] [filenames]")
+    options, args = parser.parse_args()
+
+    logging_manager.configure_logging(AnalyzerLoggingConfig())
+
+    if args:
+        filenames = args
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+    if len(args) > 2:
+        parser.print_help()
+        sys.exit(1)
+
+    o = os.path.join(os.getcwd(),
+                     "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S'))
+    if not os.path.isdir(o):
+        os.makedirs(o)
+
+    a = IOzoneAnalyzer(list_files=filenames, output_dir=o)
+    a.analyze()
+    p = IOzonePlotter(results_file=filenames[0], output_dir=o)
+    p.plot_all()
-- 
1.7.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux