On Fri, Apr 30, 2010 at 2:37 PM, Lucas Meneghel Rodrigues <lmr@xxxxxxxxxx> wrote: > On Fri, 2010-04-30 at 14:23 -0700, Martin Bligh wrote: >> I'm slightly surprised this isn't called from postprocess >> in the test? Any downside to doing that? > > In the second patch I do the change to make the test to use the > postprocessing module. Ah, OK, missed that. Will go look. This one looks good. > >> On Fri, Apr 30, 2010 at 2:20 PM, Lucas Meneghel Rodrigues >> <lmr@xxxxxxxxxx> wrote: >> > This module contains code to postprocess IOzone data >> > in a convenient way so we can generate performance graphs >> > and condensed data. The graph generation part depends >> > on gnuplot, but if the utility is not present, >> > functionality will gracefully degrade. >> > >> > The reason why this was created as a separate module is: >> > * It doesn't pollute the main test class. >> > * Allows us to use the postprocess module as a stand alone program, >> > that can even do performance comparison between 2 IOzone runs. >> > >> > Signed-off-by: Lucas Meneghel Rodrigues <lmr@xxxxxxxxxx> >> > --- >> > client/tests/iozone/postprocessing.py | 487 +++++++++++++++++++++++++++++++++ >> > 1 files changed, 487 insertions(+), 0 deletions(-) >> > create mode 100755 client/tests/iozone/postprocessing.py >> > >> > diff --git a/client/tests/iozone/postprocessing.py b/client/tests/iozone/postprocessing.py >> > new file mode 100755 >> > index 0000000..b495502 >> > --- /dev/null >> > +++ b/client/tests/iozone/postprocessing.py >> > @@ -0,0 +1,487 @@ >> > +#!/usr/bin/python >> > +""" >> > +Postprocessing module for IOzone. It is capable to pick results from an >> > +IOzone run, calculate the geometric mean for all throughput results for >> > +a given file size or record size, and then generate a series of 2D and 3D >> > +graphs. The graph generation functionality depends on gnuplot, and if it >> > +is not present, functionality degrates gracefully. >> > + >> > +@copyright: Red Hat 2010 >> > +""" >> > +import os, sys, optparse, logging, math, time >> > +import common >> > +from autotest_lib.client.common_lib import logging_config, logging_manager >> > +from autotest_lib.client.common_lib import error >> > +from autotest_lib.client.bin import utils, os_dep >> > + >> > + >> > +_LABELS = ('file_size', 'record_size', 'write', 'rewrite', 'read', 'reread', >> > + 'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread', >> > + 'fwrite', 'frewrite', 'fread', 'freread') >> > + >> > + >> > +def unique(list): >> > + """ >> > + Return a list of the elements in list, but without duplicates. >> > + >> > + @param list: List with values. >> > + @return: List with non duplicate elements. >> > + """ >> > + n = len(list) >> > + if n == 0: >> > + return [] >> > + u = {} >> > + try: >> > + for x in list: >> > + u[x] = 1 >> > + except TypeError: >> > + return None >> > + else: >> > + return u.keys() >> > + >> > + >> > +def geometric_mean(values): >> > + """ >> > + Evaluates the geometric mean for a list of numeric values. >> > + >> > + @param values: List with values. >> > + @return: Single value representing the geometric mean for the list values. >> > + @see: http://en.wikipedia.org/wiki/Geometric_mean >> > + """ >> > + try: >> > + values = [int(value) for value in values] >> > + except ValueError: >> > + return None >> > + product = 1 >> > + n = len(values) >> > + if n == 0: >> > + return None >> > + return math.exp(sum([math.log(x) for x in values])/n) >> > + >> > + >> > +def compare_matrices(matrix1, matrix2, treshold=0.05): >> > + """ >> > + Compare 2 matrices nxm and return a matrix nxm with comparison data >> > + >> > + @param matrix1: Reference Matrix with numeric data >> > + @param matrix2: Matrix that will be compared >> > + @param treshold: Any difference bigger than this percent treshold will be >> > + reported. >> > + """ >> > + improvements = 0 >> > + regressions = 0 >> > + same = 0 >> > + comparison_matrix = [] >> > + >> > + new_matrix = [] >> > + for line1, line2 in zip(matrix1, matrix2): >> > + new_line = [] >> > + for element1, element2 in zip(line1, line2): >> > + ratio = float(element2) / float(element1) >> > + if ratio < (1 - treshold): >> > + regressions += 1 >> > + new_line.append((100 * ratio - 1) - 100) >> > + elif ratio > (1 + treshold): >> > + improvements += 1 >> > + new_line.append("+" + str((100 * ratio - 1) - 100)) >> > + else: >> > + same + 1 >> > + if line1.index(element1) == 0: >> > + new_line.append(element1) >> > + else: >> > + new_line.append(".") >> > + new_matrix.append(new_line) >> > + >> > + total = improvements + regressions + same >> > + >> > + return (new_matrix, improvements, regressions, total) >> > + >> > + >> > +class IOzoneAnalyzer(object): >> > + """ >> > + Analyze an unprocessed IOzone file, and generate the following types of >> > + report: >> > + >> > + * Summary of throughput for all file and record sizes combined >> > + * Summary of throughput for all file sizes >> > + * Summary of throughput for all record sizes >> > + >> > + If more than one file is provided to the analyzer object, a comparison >> > + between the two runs is made, searching for regressions in performance. >> > + """ >> > + def __init__(self, list_files, output_dir): >> > + self.list_files = list_files >> > + if not os.path.isdir(output_dir): >> > + os.makedirs(output_dir) >> > + self.output_dir = output_dir >> > + logging.info("Results will be stored in %s", output_dir) >> > + >> > + >> > + def average_performance(self, results, size=None): >> > + """ >> > + Flattens a list containing performance results. >> > + >> > + @param results: List of n lists containing data from performance runs. >> > + @param size: Numerical value of a size (say, file_size) that was used >> > + to filter the original results list. >> > + @return: List with 1 list containing average data from the performance >> > + run. >> > + """ >> > + average_line = [] >> > + if size is not None: >> > + average_line.append(size) >> > + for i in range(2, 15): >> > + average = geometric_mean([line[i] for line in results]) / 1024.0 >> > + average = int(average) >> > + average_line.append(average) >> > + return average_line >> > + >> > + >> > + def process_results(self, results, label=None): >> > + """ >> > + Process a list of IOzone results according to label. >> > + >> > + @label: IOzone column label that we'll use to filter and compute >> > + geometric mean results, in practical term either 'file_size' >> > + or 'record_size'. >> > + @result: A list of n x m columns with original iozone results. >> > + @return: A list of n-? x (m-1) columns with geometric averages for >> > + values of each label (ex, average for all file_sizes). >> > + """ >> > + performance = [] >> > + if label is not None: >> > + index = _LABELS.index(label) >> > + sizes = unique([line[index] for line in results]) >> > + sizes.sort() >> > + for size in sizes: >> > + r_results = [line for line in results if line[index] == size] >> > + performance.append(self.average_performance(r_results, size)) >> > + else: >> > + performance.append(self.average_performance(results)) >> > + >> > + return performance >> > + >> > + >> > + def parse_file(self, file): >> > + """ >> > + Parse an IOzone results file. >> > + >> > + @param file: File object that will be parsed. >> > + @return: Matrix containing IOzone results extracted from the file. >> > + """ >> > + lines = [] >> > + for line in file.readlines(): >> > + fields = line.split() >> > + if len(fields) != 15: >> > + continue >> > + try: >> > + lines.append([int(i) for i in fields]) >> > + except ValueError: >> > + continue >> > + return lines >> > + >> > + >> > + def report(self, overall_results, record_size_results, file_size_results): >> > + """ >> > + Generates analysis data for IOZone run. >> > + >> > + Generates a report to both logs (where it goes with nice headers) and >> > + output files for further processing (graph generation). >> > + >> > + @param overall_results: 1x15 Matrix containing IOzone results for all >> > + file sizes >> > + @param record_size_results: nx15 Matrix containing IOzone results for >> > + each record size tested. >> > + @param file_size_results: nx15 Matrix containing file size results >> > + for each file size tested. >> > + """ >> > + # Here we'll use the logging system to put the output of our analysis >> > + # to files >> > + logger = logging.getLogger() >> > + formatter = logging.Formatter("") >> > + >> > + logging.info("") >> > + logging.info("TABLE: SUMMARY of ALL FILE and RECORD SIZES Results in MB/sec") >> > + logging.info("") >> > + logging.info("FILE & RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE") >> > + logging.info("SIZES (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") >> > + logging.info("-------------------------------------------------------------------------------------------------------------------") >> > + for result_line in overall_results: >> > + logging.info("ALL %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) >> > + logging.info("") >> > + >> > + logging.info("DRILLED DATA:") >> > + >> > + logging.info("") >> > + logging.info("TABLE: RECORD Size against all FILE Sizes Results in MB/sec") >> > + logging.info("") >> > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") >> > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") >> > + logging.info("--------------------------------------------------------------------------------------------------------------") >> > + >> > + foutput_path = os.path.join(self.output_dir, '2d-datasource-file') >> > + if os.path.isfile(foutput_path): >> > + os.unlink(foutput_path) >> > + foutput = logging.FileHandler(foutput_path) >> > + foutput.setFormatter(formatter) >> > + logger.addHandler(foutput) >> > + for result_line in record_size_results: >> > + logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) >> > + logger.removeHandler(foutput) >> > + >> > + logging.info("") >> > + >> > + logging.info("") >> > + logging.info("TABLE: FILE Size against all RECORD Sizes Results in MB/sec") >> > + logging.info("") >> > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") >> > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") >> > + logging.info("--------------------------------------------------------------------------------------------------------------") >> > + >> > + routput_path = os.path.join(self.output_dir, '2d-datasource-record') >> > + if os.path.isfile(routput_path): >> > + os.unlink(routput_path) >> > + routput = logging.FileHandler(routput_path) >> > + routput.setFormatter(formatter) >> > + logger.addHandler(routput) >> > + for result_line in file_size_results: >> > + logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) >> > + logger.removeHandler(routput) >> > + >> > + logging.info("") >> > + >> > + >> > + def report_comparison(self, record, file): >> > + """ >> > + Generates comparison data for 2 IOZone runs. >> > + >> > + It compares 2 sets of nxm results and outputs a table with differences. >> > + If a difference higher or smaller than 5% is found, a warning is >> > + triggered. >> > + >> > + @param record: Tuple with 4 elements containing results for record size. >> > + @param file: Tuple with 4 elements containing results for file size. >> > + """ >> > + (record_size, record_improvements, record_regressions, >> > + record_total) = record >> > + (file_size, file_improvements, file_regressions, >> > + file_total) = file >> > + logging.info("ANALYSIS of DRILLED DATA:") >> > + >> > + logging.info("") >> > + logging.info("TABLE: RECsize Difference between runs Results are % DIFF") >> > + logging.info("") >> > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") >> > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") >> > + logging.info("--------------------------------------------------------------------------------------------------------------") >> > + for result_line in record_size: >> > + logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) >> > + logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", >> > + record_regressions, >> > + (100 * record_regressions/float(record_total)), >> > + record_improvements, >> > + (100 * record_improvements/float(record_total))) >> > + logging.info("") >> > + >> > + logging.info("") >> > + logging.info("TABLE: FILEsize Difference between runs Results are % DIFF") >> > + logging.info("") >> > + logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") >> > + logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") >> > + logging.info("--------------------------------------------------------------------------------------------------------------") >> > + for result_line in file_size: >> > + logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) >> > + logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", >> > + file_regressions, >> > + (100 * file_regressions/float(file_total)), >> > + file_improvements, >> > + (100 * file_improvements/float(file_total))) >> > + logging.info("") >> > + >> > + >> > + def analyze(self): >> > + """ >> > + Analyzes and eventually compares sets of IOzone data. >> > + """ >> > + overall = [] >> > + record_size = [] >> > + file_size = [] >> > + for path in self.list_files: >> > + file = open(path, 'r') >> > + logging.info('FILE: %s', path) >> > + >> > + results = self.parse_file(file) >> > + >> > + overall_results = self.process_results(results) >> > + record_size_results = self.process_results(results, 'record_size') >> > + file_size_results = self.process_results(results, 'file_size') >> > + self.report(overall_results, record_size_results, file_size_results) >> > + >> > + if len(self.list_files) == 2: >> > + overall.append(overall_results) >> > + record_size.append(record_size_results) >> > + file_size.append(file_size_results) >> > + >> > + if len(self.list_files) == 2: >> > + record_comparison = compare_matrices(*record_size) >> > + file_comparison = compare_matrices(*file_size) >> > + self.report_comparison(record_comparison, file_comparison) >> > + >> > + >> > +class IOzonePlotter(object): >> > + """ >> > + Plots graphs based on the results of an IOzone run. >> > + >> > + Plots graphs based on the results of an IOzone run. Uses gnuplot to >> > + generate the graphs. >> > + """ >> > + def __init__(self, results_file, output_dir): >> > + self.active = True >> > + try: >> > + self.gnuplot = os_dep.command("gnuplot") >> > + except: >> > + logging.error("Command gnuplot not found, disabling graph " >> > + "generation") >> > + self.active = False >> > + >> > + if not os.path.isdir(output_dir): >> > + os.makedirs(output_dir) >> > + self.output_dir = output_dir >> > + >> > + if not os.path.isfile(results_file): >> > + logging.error("Invalid file %s provided, disabling graph " >> > + "generation", results_file) >> > + self.active = False >> > + self.results_file = None >> > + else: >> > + self.results_file = results_file >> > + self.generate_data_source() >> > + >> > + >> > + def generate_data_source(self): >> > + """ >> > + Creates data file without headers for gnuplot consumption. >> > + """ >> > + results_file = open(self.results_file, 'r') >> > + self.datasource = os.path.join(self.output_dir, '3d-datasource') >> > + datasource = open(self.datasource, 'w') >> > + for line in results_file.readlines(): >> > + fields = line.split() >> > + if len(fields) != 15: >> > + continue >> > + try: >> > + values = [int(i) for i in fields] >> > + datasource.write(line) >> > + except ValueError: >> > + continue >> > + datasource.close() >> > + >> > + >> > + def plot_2d_graphs(self): >> > + """ >> > + For each one of the throughput parameters, generate a set of gnuplot >> > + commands that will create a parametric surface with file size vs. >> > + record size vs. throughput. >> > + """ >> > + datasource_2d = os.path.join(self.output_dir, '2d-datasource-file') >> > + for index, label in zip(range(1, 14), _LABELS[2:]): >> > + commands_path = os.path.join(self.output_dir, '2d-%s.do' % label) >> > + commands = "" >> > + commands += "set title 'Iozone performance: %s'\n" % label >> > + commands += "set logscale x\n" >> > + commands += "set xlabel 'File size (KB)'\n" >> > + commands += "set ylabel 'Througput (MB/s)'\n" >> > + commands += "set terminal png small size 450 350\n" >> > + commands += "set output '%s'\n" % os.path.join(self.output_dir, >> > + '2d-%s.png' % label) >> > + commands += ("plot '%s' using 1:%s title '%s' with lines \n" % >> > + (datasource_2d, index, label)) >> > + commands_file = open(commands_path, 'w') >> > + commands_file.write(commands) >> > + commands_file.close() >> > + try: >> > + utils.run("%s %s" % (self.gnuplot, commands_path)) >> > + except error.CmdError, e: >> > + logging.error("Problem plotting from commands file %s: %s", >> > + commands_file, str(e)) >> > + >> > + >> > + def plot_3d_graphs(self): >> > + """ >> > + For each one of the throughput parameters, generate a set of gnuplot >> > + commands that will create a parametric surface with file size vs. >> > + record size vs. throughput. >> > + """ >> > + for index, label in zip(range(1, 14), _LABELS[2:]): >> > + commands_path = os.path.join(self.output_dir, '%s.do' % label) >> > + commands = "" >> > + commands += "set title 'Iozone performance: %s'\n" % label >> > + commands += "set grid lt 2 lw 1\n" >> > + commands += "set surface\n" >> > + commands += "set parametric\n" >> > + commands += "set xtics\n" >> > + commands += "set ytics\n" >> > + commands += "set logscale x 2\n" >> > + commands += "set logscale y 2\n" >> > + commands += "set logscale z\n" >> > + commands += "set xrange [2.**5:2.**24]\n" >> > + commands += "set xlabel 'File size (KB)'\n" >> > + commands += "set ylabel 'Record size (KB)'\n" >> > + commands += "set zlabel 'Througput (KB/s)'\n" >> > + commands += "set data style lines\n" >> > + commands += "set dgrid3d 80,80, 3\n" >> > + commands += "set terminal png small size 900 700\n" >> > + commands += "set output '%s'\n" % os.path.join(self.output_dir, >> > + '%s.png' % label) >> > + commands += ("splot '%s' using 1:2:%s title '%s'\n" % >> > + (self.datasource, index, label)) >> > + commands_file = open(commands_path, 'w') >> > + commands_file.write(commands) >> > + commands_file.close() >> > + try: >> > + utils.run("%s %s" % (self.gnuplot, commands_path)) >> > + except error.CmdError, e: >> > + logging.error("Problem plotting from commands file %s: %s", >> > + commands_file, str(e)) >> > + >> > + >> > + def plot_all(self): >> > + """ >> > + Plot all graphs that are to be plotted, provided that we have gnuplot. >> > + """ >> > + if self.active: >> > + self.plot_2d_graphs() >> > + self.plot_3d_graphs() >> > + >> > + >> > +class AnalyzerLoggingConfig(logging_config.LoggingConfig): >> > + def configure_logging(self, results_dir=None, verbose=False): >> > + super(AnalyzerLoggingConfig, self).configure_logging(use_console=True, >> > + verbose=verbose) >> > + >> > + >> > +if __name__ == "__main__": >> > + parser = optparse.OptionParser("usage: %prog [options] [filenames]") >> > + options, args = parser.parse_args() >> > + >> > + logging_manager.configure_logging(AnalyzerLoggingConfig()) >> > + >> > + if args: >> > + filenames = args >> > + else: >> > + parser.print_help() >> > + sys.exit(1) >> > + >> > + if len(args) > 2: >> > + parser.print_help() >> > + sys.exit(1) >> > + >> > + o = os.path.join(os.getcwd(), >> > + "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S')) >> > + if not os.path.isdir(o): >> > + os.makedirs(o) >> > + >> > + a = IOzoneAnalyzer(list_files=filenames, output_dir=o) >> > + a.analyze() >> > + p = IOzonePlotter(results_file=filenames[0], output_dir=o) >> > + p.plot_all() >> > -- >> > 1.7.0.1 >> > >> > _______________________________________________ >> > Autotest mailing list >> > Autotest@xxxxxxxxxxxxxxx >> > http://test.kernel.org/cgi-bin/mailman/listinfo/autotest >> > > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html