I'm slightly surprised this isn't called from postprocess in the test? Any downside to doing that? On Fri, Apr 30, 2010 at 2:20 PM, Lucas Meneghel Rodrigues <lmr@xxxxxxxxxx> wrote: > This module contains code to postprocess IOzone data > in a convenient way so we can generate performance graphs > and condensed data. The graph generation part depends > on gnuplot, but if the utility is not present, > functionality will gracefully degrade. > > The reason why this was created as a separate module is: > * It doesn't pollute the main test class. > * Allows us to use the postprocess module as a stand alone program, > that can even do performance comparison between 2 IOzone runs. > > Signed-off-by: Lucas Meneghel Rodrigues <lmr@xxxxxxxxxx> > --- > client/tests/iozone/postprocessing.py | 487 +++++++++++++++++++++++++++++++++ > 1 files changed, 487 insertions(+), 0 deletions(-) > create mode 100755 client/tests/iozone/postprocessing.py > > diff --git a/client/tests/iozone/postprocessing.py b/client/tests/iozone/postprocessing.py > new file mode 100755 > index 0000000..b495502 > --- /dev/null > +++ b/client/tests/iozone/postprocessing.py > @@ -0,0 +1,487 @@ > +#!/usr/bin/python > +""" > +Postprocessing module for IOzone. It is capable to pick results from an > +IOzone run, calculate the geometric mean for all throughput results for > +a given file size or record size, and then generate a series of 2D and 3D > +graphs. The graph generation functionality depends on gnuplot, and if it > +is not present, functionality degrates gracefully. > + > +@copyright: Red Hat 2010 > +""" > +import os, sys, optparse, logging, math, time > +import common > +from autotest_lib.client.common_lib import logging_config, logging_manager > +from autotest_lib.client.common_lib import error > +from autotest_lib.client.bin import utils, os_dep > + > + > +_LABELS = ('file_size', 'record_size', 'write', 'rewrite', 'read', 'reread', > + 'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread', > + 'fwrite', 'frewrite', 'fread', 'freread') > + > + > +def unique(list): > + """ > + Return a list of the elements in list, but without duplicates. > + > + @param list: List with values. > + @return: List with non duplicate elements. > + """ > + n = len(list) > + if n == 0: > + return [] > + u = {} > + try: > + for x in list: > + u[x] = 1 > + except TypeError: > + return None > + else: > + return u.keys() > + > + > +def geometric_mean(values): > + """ > + Evaluates the geometric mean for a list of numeric values. > + > + @param values: List with values. > + @return: Single value representing the geometric mean for the list values. > + @see: http://en.wikipedia.org/wiki/Geometric_mean > + """ > + try: > + values = [int(value) for value in values] > + except ValueError: > + return None > + product = 1 > + n = len(values) > + if n == 0: > + return None > + return math.exp(sum([math.log(x) for x in values])/n) > + > + > +def compare_matrices(matrix1, matrix2, treshold=0.05): > + """ > + Compare 2 matrices nxm and return a matrix nxm with comparison data > + > + @param matrix1: Reference Matrix with numeric data > + @param matrix2: Matrix that will be compared > + @param treshold: Any difference bigger than this percent treshold will be > + reported. > + """ > + improvements = 0 > + regressions = 0 > + same = 0 > + comparison_matrix = [] > + > + new_matrix = [] > + for line1, line2 in zip(matrix1, matrix2): > + new_line = [] > + for element1, element2 in zip(line1, line2): > + ratio = float(element2) / float(element1) > + if ratio < (1 - treshold): > + regressions += 1 > + new_line.append((100 * ratio - 1) - 100) > + elif ratio > (1 + treshold): > + improvements += 1 > + new_line.append("+" + str((100 * ratio - 1) - 100)) > + else: > + same + 1 > + if line1.index(element1) == 0: > + new_line.append(element1) > + else: > + new_line.append(".") > + new_matrix.append(new_line) > + > + total = improvements + regressions + same > + > + return (new_matrix, improvements, regressions, total) > + > + > +class IOzoneAnalyzer(object): > + """ > + Analyze an unprocessed IOzone file, and generate the following types of > + report: > + > + * Summary of throughput for all file and record sizes combined > + * Summary of throughput for all file sizes > + * Summary of throughput for all record sizes > + > + If more than one file is provided to the analyzer object, a comparison > + between the two runs is made, searching for regressions in performance. > + """ > + def __init__(self, list_files, output_dir): > + self.list_files = list_files > + if not os.path.isdir(output_dir): > + os.makedirs(output_dir) > + self.output_dir = output_dir > + logging.info("Results will be stored in %s", output_dir) > + > + > + def average_performance(self, results, size=None): > + """ > + Flattens a list containing performance results. > + > + @param results: List of n lists containing data from performance runs. > + @param size: Numerical value of a size (say, file_size) that was used > + to filter the original results list. > + @return: List with 1 list containing average data from the performance > + run. > + """ > + average_line = [] > + if size is not None: > + average_line.append(size) > + for i in range(2, 15): > + average = geometric_mean([line[i] for line in results]) / 1024.0 > + average = int(average) > + average_line.append(average) > + return average_line > + > + > + def process_results(self, results, label=None): > + """ > + Process a list of IOzone results according to label. > + > + @label: IOzone column label that we'll use to filter and compute > + geometric mean results, in practical term either 'file_size' > + or 'record_size'. > + @result: A list of n x m columns with original iozone results. > + @return: A list of n-? x (m-1) columns with geometric averages for > + values of each label (ex, average for all file_sizes). > + """ > + performance = [] > + if label is not None: > + index = _LABELS.index(label) > + sizes = unique([line[index] for line in results]) > + sizes.sort() > + for size in sizes: > + r_results = [line for line in results if line[index] == size] > + performance.append(self.average_performance(r_results, size)) > + else: > + performance.append(self.average_performance(results)) > + > + return performance > + > + > + def parse_file(self, file): > + """ > + Parse an IOzone results file. > + > + @param file: File object that will be parsed. > + @return: Matrix containing IOzone results extracted from the file. > + """ > + lines = [] > + for line in file.readlines(): > + fields = line.split() > + if len(fields) != 15: > + continue > + try: > + lines.append([int(i) for i in fields]) > + except ValueError: > + continue > + return lines > + > + > + def report(self, overall_results, record_size_results, file_size_results): > + """ > + Generates analysis data for IOZone run. > + > + Generates a report to both logs (where it goes with nice headers) and > + output files for further processing (graph generation). > + > + @param overall_results: 1x15 Matrix containing IOzone results for all > + file sizes > + @param record_size_results: nx15 Matrix containing IOzone results for > + each record size tested. > + @param file_size_results: nx15 Matrix containing file size results > + for each file size tested. > + """ > + # Here we'll use the logging system to put the output of our analysis > + # to files > + logger = logging.getLogger() > + formatter = logging.Formatter("") > + > + logging.info("") > + logging.info("TABLE: SUMMARY of ALL FILE and RECORD SIZES Results in MB/sec") > + logging.info("") > + logging.info("FILE & RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE") > + logging.info("SIZES (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") > + logging.info("-------------------------------------------------------------------------------------------------------------------") > + for result_line in overall_results: > + logging.info("ALL %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) > + logging.info("") > + > + logging.info("DRILLED DATA:") > + > + logging.info("") > + logging.info("TABLE: RECORD Size against all FILE Sizes Results in MB/sec") > + logging.info("") > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") > + logging.info("--------------------------------------------------------------------------------------------------------------") > + > + foutput_path = os.path.join(self.output_dir, '2d-datasource-file') > + if os.path.isfile(foutput_path): > + os.unlink(foutput_path) > + foutput = logging.FileHandler(foutput_path) > + foutput.setFormatter(formatter) > + logger.addHandler(foutput) > + for result_line in record_size_results: > + logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) > + logger.removeHandler(foutput) > + > + logging.info("") > + > + logging.info("") > + logging.info("TABLE: FILE Size against all RECORD Sizes Results in MB/sec") > + logging.info("") > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") > + logging.info("--------------------------------------------------------------------------------------------------------------") > + > + routput_path = os.path.join(self.output_dir, '2d-datasource-record') > + if os.path.isfile(routput_path): > + os.unlink(routput_path) > + routput = logging.FileHandler(routput_path) > + routput.setFormatter(formatter) > + logger.addHandler(routput) > + for result_line in file_size_results: > + logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) > + logger.removeHandler(routput) > + > + logging.info("") > + > + > + def report_comparison(self, record, file): > + """ > + Generates comparison data for 2 IOZone runs. > + > + It compares 2 sets of nxm results and outputs a table with differences. > + If a difference higher or smaller than 5% is found, a warning is > + triggered. > + > + @param record: Tuple with 4 elements containing results for record size. > + @param file: Tuple with 4 elements containing results for file size. > + """ > + (record_size, record_improvements, record_regressions, > + record_total) = record > + (file_size, file_improvements, file_regressions, > + file_total) = file > + logging.info("ANALYSIS of DRILLED DATA:") > + > + logging.info("") > + logging.info("TABLE: RECsize Difference between runs Results are % DIFF") > + logging.info("") > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") > + logging.info("--------------------------------------------------------------------------------------------------------------") > + for result_line in record_size: > + logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) > + logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", > + record_regressions, > + (100 * record_regressions/float(record_total)), > + record_improvements, > + (100 * record_improvements/float(record_total))) > + logging.info("") > + > + logging.info("") > + logging.info("TABLE: FILEsize Difference between runs Results are % DIFF") > + logging.info("") > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") > + logging.info("--------------------------------------------------------------------------------------------------------------") > + for result_line in file_size: > + logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) > + logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", > + file_regressions, > + (100 * file_regressions/float(file_total)), > + file_improvements, > + (100 * file_improvements/float(file_total))) > + logging.info("") > + > + > + def analyze(self): > + """ > + Analyzes and eventually compares sets of IOzone data. > + """ > + overall = [] > + record_size = [] > + file_size = [] > + for path in self.list_files: > + file = open(path, 'r') > + logging.info('FILE: %s', path) > + > + results = self.parse_file(file) > + > + overall_results = self.process_results(results) > + record_size_results = self.process_results(results, 'record_size') > + file_size_results = self.process_results(results, 'file_size') > + self.report(overall_results, record_size_results, file_size_results) > + > + if len(self.list_files) == 2: > + overall.append(overall_results) > + record_size.append(record_size_results) > + file_size.append(file_size_results) > + > + if len(self.list_files) == 2: > + record_comparison = compare_matrices(*record_size) > + file_comparison = compare_matrices(*file_size) > + self.report_comparison(record_comparison, file_comparison) > + > + > +class IOzonePlotter(object): > + """ > + Plots graphs based on the results of an IOzone run. > + > + Plots graphs based on the results of an IOzone run. Uses gnuplot to > + generate the graphs. > + """ > + def __init__(self, results_file, output_dir): > + self.active = True > + try: > + self.gnuplot = os_dep.command("gnuplot") > + except: > + logging.error("Command gnuplot not found, disabling graph " > + "generation") > + self.active = False > + > + if not os.path.isdir(output_dir): > + os.makedirs(output_dir) > + self.output_dir = output_dir > + > + if not os.path.isfile(results_file): > + logging.error("Invalid file %s provided, disabling graph " > + "generation", results_file) > + self.active = False > + self.results_file = None > + else: > + self.results_file = results_file > + self.generate_data_source() > + > + > + def generate_data_source(self): > + """ > + Creates data file without headers for gnuplot consumption. > + """ > + results_file = open(self.results_file, 'r') > + self.datasource = os.path.join(self.output_dir, '3d-datasource') > + datasource = open(self.datasource, 'w') > + for line in results_file.readlines(): > + fields = line.split() > + if len(fields) != 15: > + continue > + try: > + values = [int(i) for i in fields] > + datasource.write(line) > + except ValueError: > + continue > + datasource.close() > + > + > + def plot_2d_graphs(self): > + """ > + For each one of the throughput parameters, generate a set of gnuplot > + commands that will create a parametric surface with file size vs. > + record size vs. throughput. > + """ > + datasource_2d = os.path.join(self.output_dir, '2d-datasource-file') > + for index, label in zip(range(1, 14), _LABELS[2:]): > + commands_path = os.path.join(self.output_dir, '2d-%s.do' % label) > + commands = "" > + commands += "set title 'Iozone performance: %s'\n" % label > + commands += "set logscale x\n" > + commands += "set xlabel 'File size (KB)'\n" > + commands += "set ylabel 'Througput (MB/s)'\n" > + commands += "set terminal png small size 450 350\n" > + commands += "set output '%s'\n" % os.path.join(self.output_dir, > + '2d-%s.png' % label) > + commands += ("plot '%s' using 1:%s title '%s' with lines \n" % > + (datasource_2d, index, label)) > + commands_file = open(commands_path, 'w') > + commands_file.write(commands) > + commands_file.close() > + try: > + utils.run("%s %s" % (self.gnuplot, commands_path)) > + except error.CmdError, e: > + logging.error("Problem plotting from commands file %s: %s", > + commands_file, str(e)) > + > + > + def plot_3d_graphs(self): > + """ > + For each one of the throughput parameters, generate a set of gnuplot > + commands that will create a parametric surface with file size vs. > + record size vs. throughput. > + """ > + for index, label in zip(range(1, 14), _LABELS[2:]): > + commands_path = os.path.join(self.output_dir, '%s.do' % label) > + commands = "" > + commands += "set title 'Iozone performance: %s'\n" % label > + commands += "set grid lt 2 lw 1\n" > + commands += "set surface\n" > + commands += "set parametric\n" > + commands += "set xtics\n" > + commands += "set ytics\n" > + commands += "set logscale x 2\n" > + commands += "set logscale y 2\n" > + commands += "set logscale z\n" > + commands += "set xrange [2.**5:2.**24]\n" > + commands += "set xlabel 'File size (KB)'\n" > + commands += "set ylabel 'Record size (KB)'\n" > + commands += "set zlabel 'Througput (KB/s)'\n" > + commands += "set data style lines\n" > + commands += "set dgrid3d 80,80, 3\n" > + commands += "set terminal png small size 900 700\n" > + commands += "set output '%s'\n" % os.path.join(self.output_dir, > + '%s.png' % label) > + commands += ("splot '%s' using 1:2:%s title '%s'\n" % > + (self.datasource, index, label)) > + commands_file = open(commands_path, 'w') > + commands_file.write(commands) > + commands_file.close() > + try: > + utils.run("%s %s" % (self.gnuplot, commands_path)) > + except error.CmdError, e: > + logging.error("Problem plotting from commands file %s: %s", > + commands_file, str(e)) > + > + > + def plot_all(self): > + """ > + Plot all graphs that are to be plotted, provided that we have gnuplot. > + """ > + if self.active: > + self.plot_2d_graphs() > + self.plot_3d_graphs() > + > + > +class AnalyzerLoggingConfig(logging_config.LoggingConfig): > + def configure_logging(self, results_dir=None, verbose=False): > + super(AnalyzerLoggingConfig, self).configure_logging(use_console=True, > + verbose=verbose) > + > + > +if __name__ == "__main__": > + parser = optparse.OptionParser("usage: %prog [options] [filenames]") > + options, args = parser.parse_args() > + > + logging_manager.configure_logging(AnalyzerLoggingConfig()) > + > + if args: > + filenames = args > + else: > + parser.print_help() > + sys.exit(1) > + > + if len(args) > 2: > + parser.print_help() > + sys.exit(1) > + > + o = os.path.join(os.getcwd(), > + "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S')) > + if not os.path.isdir(o): > + os.makedirs(o) > + > + a = IOzoneAnalyzer(list_files=filenames, output_dir=o) > + a.analyze() > + p = IOzonePlotter(results_file=filenames[0], output_dir=o) > + p.plot_all() > -- > 1.7.0.1 > > _______________________________________________ > Autotest mailing list > Autotest@xxxxxxxxxxxxxxx > http://test.kernel.org/cgi-bin/mailman/listinfo/autotest > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html