Recent changes (master)

Jens Axboe <jaxboe@xxxxxxxxxxxx> · Fri, 16 Nov 2012 06:00:02 +0100 (CET)

The following changes since commit 108fea772db5f1dd91e2fb67737e3e0d36827b76:

  client/server: fixup "All clients" reporting (2012-11-14 13:09:45 -0700)

are available in the git repository at:
  git://git.kernel.dk/fio.git master

Jens Axboe (6):
      server: properly reset stat_number in fio_reset_state()
      Get rid of -fno-omit-frame-pointer
      genzipf: add help/usage screen (-h)
      Disable random map automatically if a non-uniform random distribution is given
      Document random distribution
      Fio 2.0.11

Vincent Kang Fu (1):
      t/genzfip: add -c option for csv output

 FIO-VERSION-GEN        |    2 +-
 HOWTO                  |   19 ++++++
 Makefile               |    2 +-
 fio.1                  |   26 ++++++++
 init.c                 |    7 ++
 libfio.c               |    1 +
 os/windows/install.wxs |    2 +-
 server.c               |    2 -
 t/genzipf.c            |  163 +++++++++++++++++++++++++++++-------------------
 9 files changed, 155 insertions(+), 69 deletions(-)

---

Diff of recent changes:

diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN
index cc7eb83..035ddaf 100755
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-2.0.10
+DEF_VER=fio-2.0.11
 
 LF='
 '
diff --git a/HOWTO b/HOWTO
index 40fe65f..6391b82 100644
--- a/HOWTO
+++ b/HOWTO
@@ -716,6 +716,25 @@ rwmixwrite=int	How large a percentage of the mix should be writes. If both
 		if fio is asked to limit reads or writes to a certain rate.
 		If that is the case, then the distribution may be skewed.
 
+random_distribution=str:float	By default, fio will use a completely uniform
+		random distribution when asked to perform random IO. Sometimes
+		it is useful to skew the distribution in specific ways,
+		ensuring that some parts of the data is more hot than others.
+		fio includes the following distribution models:
+
+		random		Uniform random distribution
+		zipf		Zipf distribution
+		pareto		Pareto distribution
+
+		When using a zipf or pareto distribution, an input value
+		is also needed to define the access pattern. For zipf, this
+		is the zipf theta. For pareto, it's the pareto power. Fio
+		includes a test program, genzipf, that can be used visualize
+		what the given input values will yield in terms of hit rates.
+		If you wanted to use zipf with a theta of 1.2, you would use
+		random_distribution=zipf:1.2 as the option. If a non-uniform
+		model is used, fio will disable use of the random map.
+
 norandommap	Normally fio will cover every block of the file when doing
 		random IO. If this option is given, fio will just get a
 		new random offset without looking at past io history. This
diff --git a/Makefile b/Makefile
index 3589770..8473238 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ endif
 DEBUGFLAGS = -D_FORTIFY_SOURCE=2 -DFIO_INC_DEBUG
 CPPFLAGS= -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 \
 	$(DEBUGFLAGS)
-OPTFLAGS= -O3 -fno-omit-frame-pointer -g $(EXTFLAGS)
+OPTFLAGS= -O3 -g $(EXTFLAGS)
 CFLAGS	= -std=gnu99 -Wwrite-strings -Wall $(OPTFLAGS)
 LIBS	= -lm $(EXTLIBS)
 PROGS	= fio
diff --git a/fio.1 b/fio.1
index 08d6c0f..2db9793 100644
--- a/fio.1
+++ b/fio.1
@@ -580,6 +580,32 @@ overrides the first. This may interfere with a given rate setting, if fio is
 asked to limit reads or writes to a certain rate. If that is the case, then
 the distribution may be skewed. Default: 50.
 .TP
+.BI random_distribution \fR=\fPstr:float
+By default, fio will use a completely uniform random distribution when asked
+to perform random IO. Sometimes it is useful to skew the distribution in
+specific ways, ensuring that some parts of the data is more hot than others.
+Fio includes the following distribution models:
+.RS
+.TP
+.B random
+Uniform random distribution
+.TP
+.B zipf
+Zipf distribution
+.TP
+.B pareto
+Pareto distribution
+.TP
+.RE
+.P
+When using a zipf or pareto distribution, an input value is also needed to
+define the access pattern. For zipf, this is the zipf theta. For pareto,
+it's the pareto power. Fio includes a test program, genzipf, that can be
+used visualize what the given input values will yield in terms of hit rates.
+If you wanted to use zipf with a theta of 1.2, you would use
+random_distribution=zipf:1.2 as the option. If a non-uniform model is used,
+fio will disable use of the random map.
+.TP
 .B norandommap
 Normally \fBfio\fR will cover every block of the file when doing random I/O. If
 this parameter is given, a new offset will be chosen without looking at past
diff --git a/init.c b/init.c
index a682423..7021741 100644
--- a/init.c
+++ b/init.c
@@ -596,6 +596,13 @@ static int fixup_options(struct thread_data *td)
 		td->o.compress_percentage = 0;
 	}
 
+	/*
+	 * Using a non-uniform random distribution excludes usage of
+	 * a random map
+	 */
+	if (td->o.random_distribution != FIO_RAND_DIST_RANDOM)
+		td->o.norandommap = 1;
+
 	return ret;
 }
 
diff --git a/libfio.c b/libfio.c
index 43e1a61..0cfd7f6 100644
--- a/libfio.c
+++ b/libfio.c
@@ -127,6 +127,7 @@ void reset_fio_state(void)
 {
 	groupid = 0;
 	thread_number = 0;
+	stat_number = 0;
 	nr_process = 0;
 	nr_thread = 0;
 	done_secs = 0;
diff --git a/os/windows/install.wxs b/os/windows/install.wxs
index e02347e..dbf0e21 100755
--- a/os/windows/install.wxs
+++ b/os/windows/install.wxs
@@ -10,7 +10,7 @@
 	<Product Id="2BA394F9-0D9E-4597-BB9D-6B18097D64BB"
 	  Codepage="1252" Language="1033"
 	  Manufacturer="fio" Name="fio"
-	  UpgradeCode="2338A332-5511-43cf-b9BD-5C60496CCFCC" Version="2.0.10">
+	  UpgradeCode="2338A332-5511-43cf-b9BD-5C60496CCFCC" Version="2.0.11">
 		<Package 
 		  Comments="Contact: Your local administrator"
 		  Description="Flexible IO Tester"
diff --git a/server.c b/server.c
index 33b80d6..f8c3635 100644
--- a/server.c
+++ b/server.c
@@ -336,8 +336,6 @@ static int handle_job_cmd(struct fio_net_cmd *cmd)
 	struct cmd_end_pdu epdu;
 	int ret;
 
-	stat_number = 0;
-
 	if (parse_jobs_ini(buf, 1, 0)) {
 		fio_server_send_quit_cmd();
 		return -1;
diff --git a/t/genzipf.c b/t/genzipf.c
index 2d1b107..c5f098c 100644
--- a/t/genzipf.c
+++ b/t/genzipf.c
@@ -21,7 +21,6 @@
 #include "../lib/zipf.h"
 #include "../flist.h"
 #include "../hash.h"
-#include "../rbtree.h"
 
 #define DEF_NR		1000000
 #define DEF_NR_OUTPUT	23
@@ -49,6 +48,7 @@ static unsigned long block_size = 4096;
 static unsigned long output_nranges = DEF_NR_OUTPUT;
 static double percentage;
 static double dist_val;
+static int output_csv = 0;
 
 #define DEF_ZIPF_VAL	1.2
 #define DEF_PARETO_VAL	0.3
@@ -78,13 +78,29 @@ static struct node *hash_insert(struct node *n, unsigned long long val)
 	return n;
 }
 
+static void usage(void)
+{
+	printf("genzipf: test zipf/pareto values for fio input\n");
+	printf("\t-h\tThis help screen\n");
+	printf("\t-p\tGenerate size of data set that are hit by this percentage\n");
+	printf("\t-t\tDistribution type (zipf or pareto)\n");
+	printf("\t-i\tDistribution algorithm input (zipf theta or pareto power)\n");
+	printf("\t-b\tBlock size of a given range (in bytes)\n");
+	printf("\t-g\tSize of data set (in gigabytes)\n");
+	printf("\t-o\tNumber of output columns\n");
+	printf("\t-c\tOutput ranges in CSV format\n");
+}
+
 static int parse_options(int argc, char *argv[])
 {
-	const char *optstring = "t:g:i:o:b:p:";
+	const char *optstring = "t:g:i:o:b:p:ch";
 	int c, dist_val_set = 0;
 
 	while ((c = getopt(argc, argv, optstring)) != -1) {
 		switch (c) {
+		case 'h':
+			usage();
+			return 1;
 		case 'p':
 			percentage = atof(optarg);
 			break;
@@ -111,6 +127,9 @@ static int parse_options(int argc, char *argv[])
 		case 'o':
 			output_nranges = strtoul(optarg, NULL, 10);
 			break;
+		case 'c':
+			output_csv = 1;
+			break;
 		default:
 			printf("bad option %c\n", c);
 			return 1;
@@ -162,7 +181,8 @@ int main(int argc, char *argv[])
 	if (parse_options(argc, argv))
 		return 1;
 
-	printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size);
+	if( !output_csv )
+		printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size);
 
 	nranges = gb_size * 1024 * 1024 * 1024ULL;
 	nranges /= block_size;
@@ -208,80 +228,95 @@ int main(int argc, char *argv[])
 	nnodes = j;
 	nr_vals = nnodes;
 
-	interval = (nr_vals + output_nranges - 1) / output_nranges;
-
-	output_sums = malloc(output_nranges * sizeof(struct output_sum));
-	for (i = 0; i < output_nranges; i++) {
-		output_sums[i].output = 0.0;
-		output_sums[i].nranges = 1;
-	}
-
-	total_vals = i = j = cur_vals = 0;
-	
-	for (k = 0; k < nnodes; k++) {
-		struct output_sum *os = &output_sums[j];
-		struct node *node = &nodes[k];
-
-		if (i >= interval) {
-			os->output = (double) (cur_vals + 1) / (double) nranges;
-			os->output *= 100.0;
-			j++;
-			cur_vals = node->hits;
-			interval += (nr_vals + output_nranges - 1) / output_nranges;
-		} else {
-			cur_vals += node->hits;
-			os->nranges += node->hits;
+	if (output_csv) {
+		printf("rank, count\n");
+		for (k = 0; k < nnodes; k++)
+			printf("%lu, %lu\n", k, nodes[k].hits);
+	} else {
+		interval = (nr_vals + output_nranges - 1) / output_nranges;
+
+		output_sums = malloc(output_nranges * sizeof(struct output_sum));
+		for (i = 0; i < output_nranges; i++) {
+			output_sums[i].output = 0.0;
+			output_sums[i].nranges = 1;
 		}
 
-		i++;
-		total_vals += node->hits;
-
-		if (percentage) {
-			unsigned long blocks = percentage * nranges / 100;
-
-			if (total_vals >= blocks) {
-				double cs = i * block_size / (1024 * 1024);
-				char p = 'M';
+		total_vals = i = j = cur_vals = 0;
+
+		for (k = 0; k < nnodes; k++) {
+			struct output_sum *os = &output_sums[j];
+			struct node *node = &nodes[k];
+
+			if (i >= interval) {
+				os->output =
+				    (double)(cur_vals + 1) / (double)nranges;
+				os->output *= 100.0;
+				j++;
+				cur_vals = node->hits;
+				interval +=
+				    (nr_vals + output_nranges -
+				     1) / output_nranges;
+			} else {
+				cur_vals += node->hits;
+				os->nranges += node->hits;
+			}
 
-				if (cs > 1024.0) {
-					cs /= 1024.0;
-					p = 'G';
+			i++;
+			total_vals += node->hits;
+
+			if (percentage) {
+				unsigned long blocks =
+				    percentage * nranges / 100;
+
+				if (total_vals >= blocks) {
+					double cs =
+					    i * block_size / (1024 * 1024);
+					char p = 'M';
+
+					if (cs > 1024.0) {
+						cs /= 1024.0;
+						p = 'G';
+					}
+					if (cs > 1024.0) {
+						cs /= 1024.0;
+						p = 'T';
+					}
+
+					printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p);
+					percentage = 0.0;
 				}
-				if (cs > 1024.0) {
-					cs /= 1024.0;
-					p = 'T';
-				}
-
-				printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p);
-				percentage = 0.0;
 			}
 		}
-	}
 
-	perc_i = 100.0 / (double) output_nranges;
-	perc = 0.0;
+		perc_i = 100.0 / (double)output_nranges;
+		perc = 0.0;
 
-	printf("\n   Rows           Hits           No Hits         Size\n");
-	printf("--------------------------------------------------------\n");
-	for (i = 0; i < j; i++) {
-		struct output_sum *os = &output_sums[i];
-		double gb = (double) os->nranges * block_size / 1024.0;
-		char p = 'K';
+		printf("\n   Rows           Hits           No Hits         Size\n");
+		printf("--------------------------------------------------------\n");
+		for (i = 0; i < j; i++) {
+			struct output_sum *os = &output_sums[i];
+			double gb = (double)os->nranges * block_size / 1024.0;
+			char p = 'K';
 
-		if (gb > 1024.0) {
-			p = 'M';
-			gb /= 1024.0;
-		}
-		if (gb > 1024.0) {
-			p = 'G';
-			gb /= 1024.0;
+			if (gb > 1024.0) {
+				p = 'M';
+				gb /= 1024.0;
+			}
+			if (gb > 1024.0) {
+				p = 'G';
+				gb /= 1024.0;
+			}
+
+			perc += perc_i;
+			printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n",
+			       i ? "|->" : "Top", perc, os->output, os->nranges,
+			       gb, p);
 		}
 
-		perc += perc_i;
-		printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", i ? "|->" : "Top", perc, os->output, os->nranges, gb, p);
+		free(output_sums);
 	}
 
-	free(output_sums);
 	free(hash);
+	free(nodes);
 	return 0;
 }
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html