RE: fiologparser_hist.py script patch and enhancements?

Kris Davis <Kris.Davis@xxxxxxx> · Thu, 1 Feb 2018 19:03:59 +0000

Other than an internal business comment this hasn't indicated any discussion.
If nothing more: here is the patch for both the modified script fiologparser_hist.py, and the add non-weighted version fiologparser_hist_nw.pu script, generated from a git diff operation: 

Thanks
Kris Davis

diff --git a/tools/hist/fiologparser_hist.py b/tools/hist/fiologparser_hist.py
index 2e05b92..fe9a951 100755
--- a/tools/hist/fiologparser_hist.py
+++ b/tools/hist/fiologparser_hist.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.7
+#!/usr/bin/python
 """ 
     Utility for converting *_clat_hist* files generated by fio into latency statistics.
     
@@ -16,8 +16,16 @@
 import os
 import sys
 import pandas
+import re
 import numpy as np
 
+runascmd = False
+
+if (sys.version_info < (2, 7)):
+    err("ERROR: Python version = %s; version 2.7 or greater is required.\n")
+    exit(1)
+
+
 err = sys.stderr.write
 
 def weighted_percentile(percs, vs, ws):
@@ -64,8 +72,20 @@ def weights(start_ts, end_ts, start, end):
 def weighted_average(vs, ws):
     return np.sum(vs * ws) / np.sum(ws)
 
-columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"]
-percs   = [50, 90, 95, 99]
+
+percs = None
+columns = None
+
+def gen_output_columns(percentiles):
+    global percs,columns
+    strpercs = re.split('[,:]', percentiles)
+    percs = [50.0]  # always print 50% in 'median' column
+    percs.extend(list(map(float,strpercs)))
+    columns = ["end-time", "samples", "min", "avg", "median"]
+    columns.extend(list(map(lambda x: x+'%', strpercs)))
+    columns.append("max")
+        
+
 
 def fmt_float_list(ctx, num=1):
   """ Return a comma separated list of float formatters to the required number
@@ -118,6 +138,7 @@ def histogram_generator(ctx, fps, sz):
 
     # Initial histograms from disk:
     arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()}
+    
     while True:
 
         try:
@@ -177,8 +198,12 @@ def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx):
 
     avg = weighted_average(vs, ws)
     values = [mn, avg] + list(ps) + [mx]
-    row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values)
-    fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d"
+    row = [end, ss_cnt] + list(map(lambda x: float(x) / ctx.divisor, values))
+    if ctx.divisor > 1:
+        fmt = "%d, %d, " + fmt_float_list(ctx, len(percs)+3)
+    else:
+        # max and min are decimal values if no divisor
+        fmt = "%d, %d, %d, " + fmt_float_list(ctx, len(percs)+1) + ", %d"
     print (fmt % tuple(row))
 
 def update_extreme(val, fncn, new_val):
@@ -207,7 +232,7 @@ def process_interval(ctx, samples, iStart, iEnd):
             
         # Only look at bins of the current histogram sample which
         # started before the end of the current time interval [start,end]
-        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0
+        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / ctx.time_divisor
         idx = np.where(start_times < iEnd)
         s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx]
 
@@ -241,7 +266,7 @@ def guess_max_from_bins(ctx, hist_cols):
     idx = np.where(arr == hist_cols)
     if len(idx[1]) == 0:
         table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array','     ')
-        err("Unable to determine bin values from input clat_hist files. Namely \n"
+        errmsg = ("Unable to determine bin values from input clat_hist files. Namely \n"
             "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
             "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
             "This number needs to be equal to one of the following numbers:\n\n"
@@ -250,7 +275,12 @@ def guess_max_from_bins(ctx, hist_cols):
             "  - Input file(s) does not contain histograms.\n"
             "  - You recompiled fio with a different GROUP_NR. If so please specify this\n"
             "    new GROUP_NR on the command line with --group_nr\n")
-        exit(1)
+        if runascmd:
+            err(errmsg)
+            exit(1)
+        else:
+            raise RuntimeError(errmsg) 
+        
     return bins[idx[1][0]]
 
 def main(ctx):
@@ -274,10 +304,19 @@ def main(ctx):
                         ctx.interval = int(hist_msec)
                 except NoOptionError:
                     pass
+    
+    if not hasattr(ctx, 'percentiles'):
+        ctx.percentiles = "90,95,99"
+    gen_output_columns(ctx.percentiles)
 
     if ctx.interval is None:
         ctx.interval = 1000
 
+    if ctx.usbin:
+        ctx.time_divisor = 1000.0        # bins are in us
+    else:
+        ctx.time_divisor = 1000000.0     # bins are in ns
+
     # Automatically detect how many columns are in the input files,
     # calculate the corresponding 'coarseness' parameter used to generate
     # those files, and calculate the appropriate bin latency values:
@@ -288,9 +327,9 @@ def main(ctx):
 
         max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
         coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
-        bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float)
-        lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float)
-        upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float)
+        bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS))), dtype=float)
+        lower_bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS))), dtype=float)
+        upper_bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS))), dtype=float)
 
     fps = [open(f, 'r') for f in ctx.FILE]
     gen = histogram_generator(ctx, fps, ctx.buff_size)
@@ -304,7 +343,7 @@ def main(ctx):
         while more_data or len(arr) > 0:
             
             # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval.
-            while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end:
+            while len(arr) == 0 or arr[-1][0] < ctx.max_latency + end:
                 try:
                     new_arr = next(gen)
                 except StopIteration:
@@ -338,6 +377,7 @@ def main(ctx):
 
 if __name__ == '__main__':
     import argparse
+    runascmd = True
     p = argparse.ArgumentParser()
     arg = p.add_argument
     arg("FILE", help='space separated list of latency log filenames', nargs='+')
@@ -384,5 +424,18 @@ if __name__ == '__main__':
              'given histogram files. Useful for auto-detecting --log_hist_msec and '
              '--log_unix_epoch (in fio) values.')
 
+    arg('--percentiles',
+        default="90,95,99",
+        type=str,
+        help='Optional argument of comma or colon separated percentiles to print. '
+             'The default is "90.0,95.0,99.0".  min, median(50%%) and max percentiles are always printed')
+    
+    arg('--usbin',
+        default=False,
+        action='store_true',
+        help='histogram bin latencies are in us (fio versions < 2.99. fio uses ns for version >= 2.99')
+    
+    
+
     main(p.parse_args())
 
diff --git a/tools/hist/fiologparser_hist_nw.py b/tools/hist/fiologparser_hist_nw.py
new file mode 100644
index 0000000..c8c6e87
--- /dev/null
+++ b/tools/hist/fiologparser_hist_nw.py
@@ -0,0 +1,384 @@
+#!/usr/bin/python
+""" 
+    Utility for converting *_clat_hist* files generated by fio into latency statistics.
+    
+    Example usage:
+    
+            $ fiologparser_hist.py *_clat_hist*
+            end-time, samples, min, avg, median, 90%, 95%, 99%, max
+            1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000
+            2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000
+            4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
+            ...
+    
+    @author Karl Cronburg <karl.cronburg@xxxxxxxxx>
+    modification by Kris Davis <shimrot@xxxxxxxxx>
+"""
+import os
+import sys
+import re
+import numpy as np
+
+runascmd = False
+
+if (sys.version_info < (2, 7)):
+    err("ERROR: Python version = %s; version 2.7 or greater is required.\n")
+    exit(1)
+
+
+err = sys.stderr.write
+
+class HistFileRdr():
+    """ Class to read a hist file line by line, buffering 
+        a value array for the latest line, and allowing a preview
+        of the next timestamp in next line
+        Note: this does not follow a generator pattern, but must explicitly
+        get next bin array.
+    """
+    def __init__(self, file):
+        self.fp = open(file, 'r')
+        self.data = self.nextData()
+        
+    def close(self):
+        self.fp.close()
+        self.fp = None
+        
+    def nextData(self):
+        self.data = None
+        if self.fp: 
+            line = self.fp.readline()
+            if line == "":
+                self.close()
+            else:
+                self.data = [int(x) for x in line.replace(' ', '').rstrip().split(',')]
+                
+        return self.data
+ 
+    @property
+    def curTS(self):
+        ts = None
+        if self.data:
+            ts = self.data[0]
+        return ts
+             
+    @property
+    def curBins(self):
+        return self.data[3:]
+                
+    
+
+def weighted_percentile(percs, vs, ws):
+    """ Use linear interpolation to calculate the weighted percentile.
+        
+        Value and weight arrays are first sorted by value. The cumulative
+        distribution function (cdf) is then computed, after which np.interp
+        finds the two values closest to our desired weighted percentile(s)
+        and linearly interpolates them.
+        
+        percs  :: List of percentiles we want to calculate
+        vs     :: Array of values we are computing the percentile of
+        ws     :: Array of weights for our corresponding values
+        return :: Array of percentiles
+    """
+    idx = np.argsort(vs)
+    vs, ws = vs[idx], ws[idx] # weights and values sorted by value
+    cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum()
+    return np.interp(percs, cdf, vs) # linear interpolation
+
+def weighted_average(vs, ws):
+    return np.sum(vs * ws) / np.sum(ws)
+
+
+percs = None
+columns = None
+
+def gen_output_columns(percentiles):
+    global percs,columns
+    strpercs = re.split('[,:]', percentiles)
+    percs = [50.0]  # always print 50% in 'median' column
+    percs.extend(list(map(float,strpercs)))
+    columns = ["end-time", "samples", "min", "avg", "median"]
+    columns.extend(list(map(lambda x: x+'%', strpercs)))
+    columns.append("max")
+        
+
+
+def fmt_float_list(ctx, num=1):
+  """ Return a comma separated list of float formatters to the required number
+      of decimal places. For instance:
+
+        fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f"
+  """
+  return ', '.join(["%%.%df" % ctx.decimals] * num)
+
+# Default values - see beginning of main() for how we detect number columns in
+# the input files:
+__HIST_COLUMNS = 1216
+__NON_HIST_COLUMNS = 3
+__TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS
+    
+def get_min(fps, arrs):
+    """ Find the file with the current first row with the smallest start time """
+    return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0])
+
+def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64):
+    """ Taken from fio's stat.c for calculating the latency value of a bin
+        from that bin's index.
+        
+            idx  : the value of the index into the histogram bins
+            edge : fractional value in the range [0,1]** indicating how far into
+            the bin we wish to compute the latency value of.
+        
+        ** edge = 0.0 and 1.0 computes the lower and upper latency bounds
+           respectively of the given bin index. """
+
+    # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use
+    # all bits of the sample as index
+    if (idx < (FIO_IO_U_PLAT_VAL << 1)):
+        return idx 
+
+    # Find the group and compute the minimum value of that group
+    error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1 
+    base = 1 << (error_bits + FIO_IO_U_PLAT_BITS)
+
+    # Find its bucket number of the group
+    k = idx % FIO_IO_U_PLAT_VAL
+
+    # Return the mean (if edge=0.5) of the range of the bucket
+    return base + ((k + edge) * (1 << error_bits))
+    
+def plat_idx_to_val_coarse(idx, coarseness, edge=0.5):
+    """ Converts the given *coarse* index into a non-coarse index as used by fio
+        in stat.h:plat_idx_to_val(), subsequently computing the appropriate
+        latency value for that bin.
+        """
+
+    # Multiply the index by the power of 2 coarseness to get the bin
+    # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h)
+    stride = 1 << coarseness
+    idx = idx * stride
+    lower = _plat_idx_to_val(idx, edge=0.0)
+    upper = _plat_idx_to_val(idx + stride, edge=1.0)
+    return lower + (upper - lower) * edge
+
+def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx):
+    ps = weighted_percentile(percs, vs, ws)
+
+    avg = weighted_average(vs, ws)
+    values = [mn, avg] + list(ps) + [mx]
+    row = [end, ss_cnt] + list(map(lambda x: float(x) / ctx.divisor, values))
+    if ctx.divisor > 1:
+        fmt = "%d, %d, " + fmt_float_list(ctx, len(percs)+3)
+    else:
+        # max and min are decimal values if no divisor
+        fmt = "%d, %d, %d, " + fmt_float_list(ctx, len(percs)+1) + ", %d"
+    print (fmt % tuple(row))
+
+def update_extreme(val, fncn, new_val):
+    """ Calculate min / max in the presence of None values """
+    if val is None: return new_val
+    else: return fncn(val, new_val)
+
+# See beginning of main() for how bin_vals are computed
+bin_vals = []
+lower_bin_vals = [] # lower edge of each bin
+upper_bin_vals = [] # upper edge of each bin 
+
+def process_interval(ctx, iHist, iEnd):
+    """ print estimated percentages for the given merged sample
+    """
+    ss_cnt = 0 # number of samples affecting this interval
+    mn_bin_val, mx_bin_val = None, None
+   
+    # Update total number of samples affecting current interval histogram:
+    ss_cnt += np.sum(iHist)
+        
+    # Update min and max bin values
+    idxs = np.nonzero(iHist != 0)[0]
+    if idxs.size > 0:
+        mn_bin_val = bin_vals[idxs[0]]
+        mx_bin_val = bin_vals[idxs[-1]]
+
+    if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val)
+
+def guess_max_from_bins(ctx, hist_cols):
+    """ Try to guess the GROUP_NR from given # of histogram
+        columns seen in an input file """
+    max_coarse = 8
+    if ctx.group_nr < 19 or ctx.group_nr > 26:
+        bins = [ctx.group_nr * (1 << 6)]
+    else:
+        bins = [1216,1280,1344,1408,1472,1536,1600,1664]
+    coarses = range(max_coarse + 1)
+    fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else -10, coarses))
+    
+    arr = np.transpose(list(map(fncn, bins)))
+    idx = np.where(arr == hist_cols)
+    if len(idx[1]) == 0:
+        table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array','     ')
+        errmsg = ("Unable to determine bin values from input clat_hist files. Namely \n"
+            "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
+            "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
+            "This number needs to be equal to one of the following numbers:\n\n"
+            + table + "\n\n"
+            "Possible reasons and corresponding solutions:\n"
+            "  - Input file(s) does not contain histograms.\n"
+            "  - You recompiled fio with a different GROUP_NR. If so please specify this\n"
+            "    new GROUP_NR on the command line with --group_nr\n")
+        if runascmd:
+            err(errmsg)
+            exit(1)
+        else:
+            raise RuntimeError(errmsg) 
+        
+    return bins[idx[1][0]]
+
+def main(ctx):
+
+    if ctx.job_file:
+        try:
+            from configparser import SafeConfigParser, NoOptionError
+        except ImportError:
+            from ConfigParser import SafeConfigParser, NoOptionError
+
+        cp = SafeConfigParser(allow_no_value=True)
+        with open(ctx.job_file, 'r') as fp:
+            cp.readfp(fp)
+
+        if ctx.interval is None:
+            # Auto detect --interval value
+            for s in cp.sections():
+                try:
+                    hist_msec = cp.get(s, 'log_hist_msec')
+                    if hist_msec is not None:
+                        ctx.interval = int(hist_msec)
+                except NoOptionError:
+                    pass
+    
+    if not hasattr(ctx, 'percentiles'):
+        ctx.percentiles = "90,95,99"
+    gen_output_columns(ctx.percentiles)
+
+    if ctx.interval is None:
+        ctx.interval = 1000
+        
+    # Automatically detect how many columns are in the input files,
+    # calculate the corresponding 'coarseness' parameter used to generate
+    # those files, and calculate the appropriate bin latency values:
+    with open(ctx.FILE[0], 'r') as fp:
+        global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS
+        __TOTAL_COLUMNS = len(fp.readline().split(','))
+        __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS
+
+        max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
+        coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
+        bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS))), dtype=float)
+        lower_bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS))), dtype=float)
+        upper_bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS))), dtype=float)
+
+    fps = [HistFileRdr(f) for f in ctx.FILE]
+
+    print(', '.join(columns))
+
+    start = 0
+    end = ctx.interval
+    while True:
+        
+        more_data = False
+        
+        # add bins from all files in target intervals
+        arr = None
+        numSamples = 0
+        while True:
+            foundSamples = False
+            for fp in fps:
+                ts = fp.curTS
+                if ts and ts+10 < end:  # shift sample time when very close to an end time                 
+                    numSamples += 1
+                    foundSamples = True
+                    if arr is None: 
+                        arr = np.zeros(shape=(__HIST_COLUMNS), dtype=int)
+                    arr = np.add(arr, fp.curBins)
+                    more_data = True
+                    fp.nextData()
+                elif ts:
+                    more_data = True
+            
+            # reached end of all files
+            # or gone through all files without finding sample in interval 
+            if not more_data or not foundSamples:
+                break
+        
+        if arr is not None:
+            #print("{} size({}) samples({}) nonzero({}):".format(end, arr.size, numSamples, np.count_nonzero(arr)), str(arr), )
+            process_interval(ctx, arr, end)         
+        
+        # reach end of all files
+        if not more_data:
+            break
+            
+        start += ctx.interval
+        end = start + ctx.interval
+        
+        #if end > 20000: break
+
+
+if __name__ == '__main__':
+    import argparse
+    runascmd = True
+    p = argparse.ArgumentParser()
+    arg = p.add_argument
+    arg("FILE", help='space separated list of latency log filenames', nargs='+')
+    arg('--buff_size',
+        default=10000,
+        type=int,
+        help='number of samples to buffer into numpy at a time')
+
+    arg('--max_latency',
+        default=20,
+        type=float,
+        help='number of seconds of data to process at a time')
+
+    arg('-i', '--interval',
+        type=int,
+        help='interval width (ms), default 1000 ms '
+        '(no weighting between samples performed, results represent sample period only)')
+
+    arg('-d', '--divisor',
+        required=False,
+        type=int,
+        default=1,
+        help='divide the results by this value.')
+
+    arg('--decimals',
+        default=3,
+        type=int,
+        help='number of decimal places to print floats to')
+
+    arg('--warn',
+        dest='warn',
+        action='store_true',
+        default=False,
+        help='print warning messages to stderr')
+
+    arg('--group_nr',
+        default=29,
+        type=int,
+        help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h')
+
+    arg('--job-file',
+        default=None,
+        type=str,
+        help='Optional argument pointing to the job file used to create the '
+             'given histogram files. Useful for auto-detecting --log_hist_msec and '
+             '--log_unix_epoch (in fio) values.')
+
+    arg('--percentiles',
+        default="90,95,99",
+        type=str,
+        help='Optional argument of comma or colon separated percentiles to print. '
+             'The default is "90.0,95.0,99.0".  min, median(50%%) and max percentiles are always printed')
+        
+
+    main(p.parse_args())
+



-----Original Message-----
From: fio-owner@xxxxxxxxxxxxxxx [mailto:fio-owner@xxxxxxxxxxxxxxx] On Behalf Of Kris Davis
Sent: Tuesday, January 30, 2018 1:13 PM
To: fio@xxxxxxxxxxxxxxx
Cc: karl.cronburg@xxxxxxxxx
Subject: fiologparser_hist.py script patch and enhancements?

Greetings all, 

While working with the clat histogram log data, I ran into a few difficulties with the "fiologparser_hist.py" script.  
I've created a patch to address these, but of course need some discussion and review.

The issues: 

1) The fiologparser_hist script didn't support the new nanosecond bin values.  So I changed the operation to assume nanosecond histogram bins, and new "--usbin" option to allow user to override so same script can still process older version histogram logs.

2) The script asppeared hardcoded to only return 50% (median), 90%, 95%, and 99% values (along with min and max). 
I added "--percentiles" option to allow a request for more values ('median' always printed, even if a duplicate 50% column is requested, for backward compatibility).

3) I use python >3.4 almost exclusively in my environment, and it was relatively simple to alter script so would run with either python 2.7 or 3x . 
I added a check to make sure the python version is at least 2.7 or above, but only actually tested with python 2.7.9 and 3.4.4 I understand there are some minor python differences between 3.0 and 3.4 that might be an issue, but haven't tried.

4) The process can be slow for large or combining many log files.  I have some automation which will generically process many log files, and found I cut the process time in half if I loaded as a module rather than calling as a command.  So, changed so can call main directly in a script, but needed to slightly alter the end of "guess_max_from_bins" to throw an exception on error rather than exit, when called as a module.  
Someone might know of a better, more conventional pythonic design pattern to use, but it works.

5) The script appears to assume that the log is never missing samples.  That is, weight samples to the requested intervals, I think with the assumption that the log samples are at longer intervals, or at least the same interval length as the "--interval" value.  If the workload actually contains "thinktime" intervals (with missing sample when zero data), the script cannot know this, and assumes the logged operations should still be spread across all the intervals.  

In my case, I'm mostly interested in results at the same interval as gathered during logging, so I tweaked into an alternate version I named 'fiologparser_hist_nw.py', which doesn't perform any weighting of samples.  It has an added advantage of much better performance. For example, fiologparser_hist took about 1/2 hr to combine about 350 logs, but fiologparser_hist_nw took 45 seconds, way better for my automation.

Of course, larger number of 9's percentiles would have additional inaccuracies when there are not enough operations in a sample period, but that is just user beware.

I've listed a patch below for fiologparser_hist.py.  I'm not sure if can actually "attach" files or would have included zipped copies of both fiologparser_hist.py and fiologparser_hist_nw.py.  But, I could include them in a github issue.

Thanks

Kris



diff --git a/tools/hist/fiologparser_hist.py b/tools/hist/fiologparser_hist.py index 2e05b92..fe9a951
--- a/tools/hist/fiologparser_hist.py
+++ b/tools/hist/fiologparser_hist.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.7
+#!/usr/bin/python
 """ 
     Utility for converting *_clat_hist* files generated by fio into latency statistics.
     
@@ -16,7 +16,15 @@
 import os
 import sys
 import pandas
+import re
 import numpy as np
+
+runascmd = False
+
+if (sys.version_info < (2, 7)):
+    err("ERROR: Python version = %s; version 2.7 or greater is required.\n")
+    exit(1)
+
 
 err = sys.stderr.write
 
@@ -64,8 +72,20 @@
 def weighted_average(vs, ws):
     return np.sum(vs * ws) / np.sum(ws)
 
-columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"]
-percs   = [50, 90, 95, 99]
+
+percs = None
+columns = None
+
+def gen_output_columns(percentiles):
+    global percs,columns
+    strpercs = re.split('[,:]', percentiles)
+    percs = [50.0]  # always print 50% in 'median' column
+    percs.extend(list(map(float,strpercs)))
+    columns = ["end-time", "samples", "min", "avg", "median"]
+    columns.extend(list(map(lambda x: x+'%', strpercs)))
+    columns.append("max")
+        
+
 
 def fmt_float_list(ctx, num=1):
   """ Return a comma separated list of float formatters to the required number @@ -118,6 +138,7 @@
 
     # Initial histograms from disk:
     arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()}
+    
     while True:
 
         try:
@@ -177,8 +198,12 @@
 
     avg = weighted_average(vs, ws)
     values = [mn, avg] + list(ps) + [mx]
-    row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values)
-    fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d"
+    row = [end, ss_cnt] + list(map(lambda x: float(x) / ctx.divisor, values))
+    if ctx.divisor > 1:
+        fmt = "%d, %d, " + fmt_float_list(ctx, len(percs)+3)
+    else:
+        # max and min are decimal values if no divisor
+        fmt = "%d, %d, %d, " + fmt_float_list(ctx, len(percs)+1) + ", %d"
     print (fmt % tuple(row))
 
 def update_extreme(val, fncn, new_val):
@@ -207,7 +232,7 @@
             
         # Only look at bins of the current histogram sample which
         # started before the end of the current time interval [start,end]
-        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0
+        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 
+ ctx.time_divisor
         idx = np.where(start_times < iEnd)
         s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx]
 
@@ -241,7 +266,7 @@
     idx = np.where(arr == hist_cols)
     if len(idx[1]) == 0:
         table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array','     ')
-        err("Unable to determine bin values from input clat_hist files. Namely \n"
+        errmsg = ("Unable to determine bin values from input clat_hist files. Namely \n"
             "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
             "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
             "This number needs to be equal to one of the following numbers:\n\n"
@@ -250,7 +275,12 @@
             "  - Input file(s) does not contain histograms.\n"
             "  - You recompiled fio with a different GROUP_NR. If so please specify this\n"
             "    new GROUP_NR on the command line with --group_nr\n")
-        exit(1)
+        if runascmd:
+            err(errmsg)
+            exit(1)
+        else:
+            raise RuntimeError(errmsg)
+        
     return bins[idx[1][0]]
 
 def main(ctx):
@@ -274,9 +304,18 @@
                         ctx.interval = int(hist_msec)
                 except NoOptionError:
                     pass
+    
+    if not hasattr(ctx, 'percentiles'):
+        ctx.percentiles = "90,95,99"
+    gen_output_columns(ctx.percentiles)
 
     if ctx.interval is None:
         ctx.interval = 1000
+
+    if ctx.usbin:
+        ctx.time_divisor = 1000.0        # bins are in us
+    else:
+        ctx.time_divisor = 1000000.0     # bins are in ns
 
     # Automatically detect how many columns are in the input files,
     # calculate the corresponding 'coarseness' parameter used to generate @@ -288,9 +327,9 @@
 
         max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
         coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
-        bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float)
-        lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float)
-        upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float)
+        bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS))), dtype=float)
+        lower_bin_vals = np.array(list(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS))), dtype=float)
+        upper_bin_vals = np.array(list(map(lambda x: 
+ plat_idx_to_val_coarse(x, coarseness, 1.0), 
+ np.arange(__HIST_COLUMNS))), dtype=float)
 
     fps = [open(f, 'r') for f in ctx.FILE]
     gen = histogram_generator(ctx, fps, ctx.buff_size) @@ -304,7 +343,7 @@
         while more_data or len(arr) > 0:
             
             # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval.
-            while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end:
+            while len(arr) == 0 or arr[-1][0] < ctx.max_latency + end:
                 try:
                     new_arr = next(gen)
                 except StopIteration:
@@ -338,6 +377,7 @@
 
 if __name__ == '__main__':
     import argparse
+    runascmd = True
     p = argparse.ArgumentParser()
     arg = p.add_argument
     arg("FILE", help='space separated list of latency log filenames', nargs='+') @@ -384,5 +424,18 @@
              'given histogram files. Useful for auto-detecting --log_hist_msec and '
              '--log_unix_epoch (in fio) values.')
 
+    arg('--percentiles',
+        default="90,95,99",
+        type=str,
+        help='Optional argument of comma or colon separated percentiles to print. '
+             'The default is "90.0,95.0,99.0".  min, median(50%%) and 
+ max percentiles are always printed')
+    
+    arg('--usbin',
+        default=False,
+        action='store_true',
+        help='histogram bin latencies are in us (fio versions < 2.99. 
+ fio uses ns for version >= 2.99')
+    
+    
+
     main(p.parse_args())

--
To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html