The following changes since commit 108fea772db5f1dd91e2fb67737e3e0d36827b76: client/server: fixup "All clients" reporting (2012-11-14 13:09:45 -0700) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (6): server: properly reset stat_number in fio_reset_state() Get rid of -fno-omit-frame-pointer genzipf: add help/usage screen (-h) Disable random map automatically if a non-uniform random distribution is given Document random distribution Fio 2.0.11 Vincent Kang Fu (1): t/genzfip: add -c option for csv output FIO-VERSION-GEN | 2 +- HOWTO | 19 ++++++ Makefile | 2 +- fio.1 | 26 ++++++++ init.c | 7 ++ libfio.c | 1 + os/windows/install.wxs | 2 +- server.c | 2 - t/genzipf.c | 163 +++++++++++++++++++++++++++++------------------- 9 files changed, 155 insertions(+), 69 deletions(-) --- Diff of recent changes: diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN index cc7eb83..035ddaf 100755 --- a/FIO-VERSION-GEN +++ b/FIO-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=FIO-VERSION-FILE -DEF_VER=fio-2.0.10 +DEF_VER=fio-2.0.11 LF=' ' diff --git a/HOWTO b/HOWTO index 40fe65f..6391b82 100644 --- a/HOWTO +++ b/HOWTO @@ -716,6 +716,25 @@ rwmixwrite=int How large a percentage of the mix should be writes. If both if fio is asked to limit reads or writes to a certain rate. If that is the case, then the distribution may be skewed. +random_distribution=str:float By default, fio will use a completely uniform + random distribution when asked to perform random IO. Sometimes + it is useful to skew the distribution in specific ways, + ensuring that some parts of the data is more hot than others. + fio includes the following distribution models: + + random Uniform random distribution + zipf Zipf distribution + pareto Pareto distribution + + When using a zipf or pareto distribution, an input value + is also needed to define the access pattern. For zipf, this + is the zipf theta. For pareto, it's the pareto power. Fio + includes a test program, genzipf, that can be used visualize + what the given input values will yield in terms of hit rates. + If you wanted to use zipf with a theta of 1.2, you would use + random_distribution=zipf:1.2 as the option. If a non-uniform + model is used, fio will disable use of the random map. + norandommap Normally fio will cover every block of the file when doing random IO. If this option is given, fio will just get a new random offset without looking at past io history. This diff --git a/Makefile b/Makefile index 3589770..8473238 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ endif DEBUGFLAGS = -D_FORTIFY_SOURCE=2 -DFIO_INC_DEBUG CPPFLAGS= -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 \ $(DEBUGFLAGS) -OPTFLAGS= -O3 -fno-omit-frame-pointer -g $(EXTFLAGS) +OPTFLAGS= -O3 -g $(EXTFLAGS) CFLAGS = -std=gnu99 -Wwrite-strings -Wall $(OPTFLAGS) LIBS = -lm $(EXTLIBS) PROGS = fio diff --git a/fio.1 b/fio.1 index 08d6c0f..2db9793 100644 --- a/fio.1 +++ b/fio.1 @@ -580,6 +580,32 @@ overrides the first. This may interfere with a given rate setting, if fio is asked to limit reads or writes to a certain rate. If that is the case, then the distribution may be skewed. Default: 50. .TP +.BI random_distribution \fR=\fPstr:float +By default, fio will use a completely uniform random distribution when asked +to perform random IO. Sometimes it is useful to skew the distribution in +specific ways, ensuring that some parts of the data is more hot than others. +Fio includes the following distribution models: +.RS +.TP +.B random +Uniform random distribution +.TP +.B zipf +Zipf distribution +.TP +.B pareto +Pareto distribution +.TP +.RE +.P +When using a zipf or pareto distribution, an input value is also needed to +define the access pattern. For zipf, this is the zipf theta. For pareto, +it's the pareto power. Fio includes a test program, genzipf, that can be +used visualize what the given input values will yield in terms of hit rates. +If you wanted to use zipf with a theta of 1.2, you would use +random_distribution=zipf:1.2 as the option. If a non-uniform model is used, +fio will disable use of the random map. +.TP .B norandommap Normally \fBfio\fR will cover every block of the file when doing random I/O. If this parameter is given, a new offset will be chosen without looking at past diff --git a/init.c b/init.c index a682423..7021741 100644 --- a/init.c +++ b/init.c @@ -596,6 +596,13 @@ static int fixup_options(struct thread_data *td) td->o.compress_percentage = 0; } + /* + * Using a non-uniform random distribution excludes usage of + * a random map + */ + if (td->o.random_distribution != FIO_RAND_DIST_RANDOM) + td->o.norandommap = 1; + return ret; } diff --git a/libfio.c b/libfio.c index 43e1a61..0cfd7f6 100644 --- a/libfio.c +++ b/libfio.c @@ -127,6 +127,7 @@ void reset_fio_state(void) { groupid = 0; thread_number = 0; + stat_number = 0; nr_process = 0; nr_thread = 0; done_secs = 0; diff --git a/os/windows/install.wxs b/os/windows/install.wxs index e02347e..dbf0e21 100755 --- a/os/windows/install.wxs +++ b/os/windows/install.wxs @@ -10,7 +10,7 @@ <Product Id="2BA394F9-0D9E-4597-BB9D-6B18097D64BB" Codepage="1252" Language="1033" Manufacturer="fio" Name="fio" - UpgradeCode="2338A332-5511-43cf-b9BD-5C60496CCFCC" Version="2.0.10"> + UpgradeCode="2338A332-5511-43cf-b9BD-5C60496CCFCC" Version="2.0.11"> <Package Comments="Contact: Your local administrator" Description="Flexible IO Tester" diff --git a/server.c b/server.c index 33b80d6..f8c3635 100644 --- a/server.c +++ b/server.c @@ -336,8 +336,6 @@ static int handle_job_cmd(struct fio_net_cmd *cmd) struct cmd_end_pdu epdu; int ret; - stat_number = 0; - if (parse_jobs_ini(buf, 1, 0)) { fio_server_send_quit_cmd(); return -1; diff --git a/t/genzipf.c b/t/genzipf.c index 2d1b107..c5f098c 100644 --- a/t/genzipf.c +++ b/t/genzipf.c @@ -21,7 +21,6 @@ #include "../lib/zipf.h" #include "../flist.h" #include "../hash.h" -#include "../rbtree.h" #define DEF_NR 1000000 #define DEF_NR_OUTPUT 23 @@ -49,6 +48,7 @@ static unsigned long block_size = 4096; static unsigned long output_nranges = DEF_NR_OUTPUT; static double percentage; static double dist_val; +static int output_csv = 0; #define DEF_ZIPF_VAL 1.2 #define DEF_PARETO_VAL 0.3 @@ -78,13 +78,29 @@ static struct node *hash_insert(struct node *n, unsigned long long val) return n; } +static void usage(void) +{ + printf("genzipf: test zipf/pareto values for fio input\n"); + printf("\t-h\tThis help screen\n"); + printf("\t-p\tGenerate size of data set that are hit by this percentage\n"); + printf("\t-t\tDistribution type (zipf or pareto)\n"); + printf("\t-i\tDistribution algorithm input (zipf theta or pareto power)\n"); + printf("\t-b\tBlock size of a given range (in bytes)\n"); + printf("\t-g\tSize of data set (in gigabytes)\n"); + printf("\t-o\tNumber of output columns\n"); + printf("\t-c\tOutput ranges in CSV format\n"); +} + static int parse_options(int argc, char *argv[]) { - const char *optstring = "t:g:i:o:b:p:"; + const char *optstring = "t:g:i:o:b:p:ch"; int c, dist_val_set = 0; while ((c = getopt(argc, argv, optstring)) != -1) { switch (c) { + case 'h': + usage(); + return 1; case 'p': percentage = atof(optarg); break; @@ -111,6 +127,9 @@ static int parse_options(int argc, char *argv[]) case 'o': output_nranges = strtoul(optarg, NULL, 10); break; + case 'c': + output_csv = 1; + break; default: printf("bad option %c\n", c); return 1; @@ -162,7 +181,8 @@ int main(int argc, char *argv[]) if (parse_options(argc, argv)) return 1; - printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); + if( !output_csv ) + printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); nranges = gb_size * 1024 * 1024 * 1024ULL; nranges /= block_size; @@ -208,80 +228,95 @@ int main(int argc, char *argv[]) nnodes = j; nr_vals = nnodes; - interval = (nr_vals + output_nranges - 1) / output_nranges; - - output_sums = malloc(output_nranges * sizeof(struct output_sum)); - for (i = 0; i < output_nranges; i++) { - output_sums[i].output = 0.0; - output_sums[i].nranges = 1; - } - - total_vals = i = j = cur_vals = 0; - - for (k = 0; k < nnodes; k++) { - struct output_sum *os = &output_sums[j]; - struct node *node = &nodes[k]; - - if (i >= interval) { - os->output = (double) (cur_vals + 1) / (double) nranges; - os->output *= 100.0; - j++; - cur_vals = node->hits; - interval += (nr_vals + output_nranges - 1) / output_nranges; - } else { - cur_vals += node->hits; - os->nranges += node->hits; + if (output_csv) { + printf("rank, count\n"); + for (k = 0; k < nnodes; k++) + printf("%lu, %lu\n", k, nodes[k].hits); + } else { + interval = (nr_vals + output_nranges - 1) / output_nranges; + + output_sums = malloc(output_nranges * sizeof(struct output_sum)); + for (i = 0; i < output_nranges; i++) { + output_sums[i].output = 0.0; + output_sums[i].nranges = 1; } - i++; - total_vals += node->hits; - - if (percentage) { - unsigned long blocks = percentage * nranges / 100; - - if (total_vals >= blocks) { - double cs = i * block_size / (1024 * 1024); - char p = 'M'; + total_vals = i = j = cur_vals = 0; + + for (k = 0; k < nnodes; k++) { + struct output_sum *os = &output_sums[j]; + struct node *node = &nodes[k]; + + if (i >= interval) { + os->output = + (double)(cur_vals + 1) / (double)nranges; + os->output *= 100.0; + j++; + cur_vals = node->hits; + interval += + (nr_vals + output_nranges - + 1) / output_nranges; + } else { + cur_vals += node->hits; + os->nranges += node->hits; + } - if (cs > 1024.0) { - cs /= 1024.0; - p = 'G'; + i++; + total_vals += node->hits; + + if (percentage) { + unsigned long blocks = + percentage * nranges / 100; + + if (total_vals >= blocks) { + double cs = + i * block_size / (1024 * 1024); + char p = 'M'; + + if (cs > 1024.0) { + cs /= 1024.0; + p = 'G'; + } + if (cs > 1024.0) { + cs /= 1024.0; + p = 'T'; + } + + printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); + percentage = 0.0; } - if (cs > 1024.0) { - cs /= 1024.0; - p = 'T'; - } - - printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); - percentage = 0.0; } } - } - perc_i = 100.0 / (double) output_nranges; - perc = 0.0; + perc_i = 100.0 / (double)output_nranges; + perc = 0.0; - printf("\n Rows Hits No Hits Size\n"); - printf("--------------------------------------------------------\n"); - for (i = 0; i < j; i++) { - struct output_sum *os = &output_sums[i]; - double gb = (double) os->nranges * block_size / 1024.0; - char p = 'K'; + printf("\n Rows Hits No Hits Size\n"); + printf("--------------------------------------------------------\n"); + for (i = 0; i < j; i++) { + struct output_sum *os = &output_sums[i]; + double gb = (double)os->nranges * block_size / 1024.0; + char p = 'K'; - if (gb > 1024.0) { - p = 'M'; - gb /= 1024.0; - } - if (gb > 1024.0) { - p = 'G'; - gb /= 1024.0; + if (gb > 1024.0) { + p = 'M'; + gb /= 1024.0; + } + if (gb > 1024.0) { + p = 'G'; + gb /= 1024.0; + } + + perc += perc_i; + printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", + i ? "|->" : "Top", perc, os->output, os->nranges, + gb, p); } - perc += perc_i; - printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", i ? "|->" : "Top", perc, os->output, os->nranges, gb, p); + free(output_sums); } - free(output_sums); free(hash); + free(nodes); return 0; } -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html