Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit 39ab7da23768081db50b0026e0c2a8e38752e7a4:

  Move code around to satisfy t/stest linkage (2012-11-06 22:10:43 +0100)

are available in the git repository at:
  git://git.kernel.dk/fio.git master

Jens Axboe (11):
      Make the zipf/pareto state per file
      zipf/pareto: mix blocks with hashing
      rbtree: add rb_next()
      genzipf: improve accuracy
      genzipf: fix off-by-one in output array calculation
      Merge branch 'master' of ssh://brick.kernel.dk/data/git/fio
      Makefile: typo
      Add safe checks for valid pareto input value
      zipf/pareto: ensure that 0 isn't always the hottest block
      zipf: cleanup
      zipf: seed zipf/pareto rand with filename hash and job id

 HOWTO       |    2 +-
 Makefile    |    4 +-
 eta.c       |    5 ++-
 file.h      |    6 ++
 filesetup.c |   39 ++++++++++++++
 fio.h       |    7 +--
 hash.h      |    9 +++-
 init.c      |   20 -------
 io_u.c      |    4 +-
 lib/zipf.c  |   35 +++++++------
 lib/zipf.h  |    5 +-
 options.c   |   10 +++-
 rbtree.c    |   31 +++++++++++
 rbtree.h    |    1 +
 t/genzipf.c |  165 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 15 files changed, 256 insertions(+), 87 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO
index a7bd1fb..40fe65f 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1385,7 +1385,7 @@ Idle	Run
 ----    ---
 P		Thread setup, but not started.
 C		Thread created.
-I		Thread initialized, waiting.
+I		Thread initialized, waiting or generating necessary data.
 	p	Thread running pre-reading file(s).
 	R	Running, doing sequential reads.
 	r	Running, doing random reads.
diff --git a/Makefile b/Makefile
index 88d397b..7e877fe 100644
--- a/Makefile
+++ b/Makefile
@@ -77,8 +77,8 @@ T_IEEE_OBJS += lib/ieee754.o
 T_IEEE_PROGS = t/ieee754
 
 T_ZIPF_OBS = t/genzipf.o
-T_ZIPF_OBJS += t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o
-T_ZIPF_PROGS = t/genzip
+T_ZIPF_OBJS += rbtree.o t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o
+T_ZIPF_PROGS = t/genzipf
 
 T_OBJS = $(T_SMALLOC_OBJS)
 T_OBJS += $(T_IEEE_OBJS)
diff --git a/eta.c b/eta.c
index 075ce8c..1f67301 100644
--- a/eta.c
+++ b/eta.c
@@ -78,6 +78,7 @@ static void check_str_update(struct thread_data *td)
 		c = 'C';
 		break;
 	case TD_INITIALIZED:
+	case TD_SETTING_UP:
 		c = 'I';
 		break;
 	case TD_NOT_CREATED:
@@ -318,7 +319,9 @@ int calc_thread_status(struct jobs_eta *je, int force)
 		} else if (td->runstate == TD_RAMP) {
 			je->nr_running++;
 			je->nr_ramp++;
-		} else if (td->runstate < TD_RUNNING)
+		} else if (td->runstate == TD_SETTING_UP)
+			je->nr_running++;
+		else if (td->runstate < TD_RUNNING)
 			je->nr_pending++;
 
 		if (je->elapsed_sec >= 3)
diff --git a/file.h b/file.h
index 42fd58c..38e9d0d 100644
--- a/file.h
+++ b/file.h
@@ -5,6 +5,7 @@
 #include "compiler/compiler.h"
 #include "io_ddir.h"
 #include "flist.h"
+#include "lib/zipf.h"
 
 /*
  * The type of object we are working on
@@ -112,6 +113,11 @@ struct fio_file {
 	unsigned long last_free_lookup;
 	unsigned failed_rands;
 
+	/*
+	 * Used for zipf random distribution
+	 */
+	struct zipf_state zipf;
+
 	int references;
 	enum fio_file_flags flags;
 
diff --git a/filesetup.c b/filesetup.c
index 9679c88..8636e16 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -12,6 +12,7 @@
 #include "smalloc.h"
 #include "filehash.h"
 #include "os/os.h"
+#include "hash.h"
 
 #ifdef FIO_HAVE_LINUX_FALLOCATE
 #include <linux/falloc.h>
@@ -862,12 +863,50 @@ int pre_read_files(struct thread_data *td)
 	return 1;
 }
 
+static int __init_rand_distribution(struct thread_data *td, struct fio_file *f)
+{
+	unsigned int range_size, seed;
+	unsigned long nranges;
+
+	range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
+
+	nranges = (f->real_file_size + range_size - 1) / range_size;
+
+	seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
+	if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
+		zipf_init(&f->zipf, nranges, td->o.zipf_theta, seed);
+	else
+		pareto_init(&f->zipf, nranges, td->o.pareto_h, seed);
+
+	return 1;
+}
+
+static int init_rand_distribution(struct thread_data *td)
+{
+	struct fio_file *f;
+	unsigned int i;
+	int state;
+
+	if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
+		return 0;
+
+	state = td->runstate;
+	td_set_runstate(td, TD_SETTING_UP);
+	for_each_file(td, f, i)
+		__init_rand_distribution(td, f);
+	td_set_runstate(td, state);
+
+	return 1;
+}
+
 int init_random_map(struct thread_data *td)
 {
 	unsigned long long blocks, num_maps;
 	struct fio_file *f;
 	unsigned int i;
 
+	if (init_rand_distribution(td))
+		return 0;
 	if (td->o.norandommap || !td_random(td))
 		return 0;
 
diff --git a/fio.h b/fio.h
index 7eb0abb..1526d19 100644
--- a/fio.h
+++ b/fio.h
@@ -39,7 +39,6 @@ struct thread_data;
 #include "server.h"
 #include "stat.h"
 #include "flow.h"
-#include "lib/zipf.h"
 
 #ifdef FIO_HAVE_GUASI
 #include <guasi.h>
@@ -457,11 +456,6 @@ struct thread_data {
 		struct frand_state __random_state;
 	};
 
-	/*
-	 * Used for zipf random distribution
-	 */
-	struct zipf_state zipf;
-
 	struct timeval start;	/* start of this loop */
 	struct timeval epoch;	/* time job was started */
 	struct timeval last_issue;
@@ -691,6 +685,7 @@ enum {
 	TD_NOT_CREATED = 0,
 	TD_CREATED,
 	TD_INITIALIZED,
+	TD_SETTING_UP,
 	TD_RAMP,
 	TD_RUNNING,
 	TD_PRE_READING,
diff --git a/hash.h b/hash.h
index 4b8c6bf..93dd831 100644
--- a/hash.h
+++ b/hash.h
@@ -28,7 +28,7 @@
 #error Define GOLDEN_RATIO_PRIME for your wordsize.
 #endif
 
-static inline unsigned long hash_long(unsigned long val, unsigned int bits)
+static inline unsigned long __hash_long(unsigned long val)
 {
 	unsigned long hash = val;
 
@@ -52,8 +52,13 @@ static inline unsigned long hash_long(unsigned long val, unsigned int bits)
 	hash *= GOLDEN_RATIO_PRIME;
 #endif
 
+	return hash;
+}
+
+static inline unsigned long hash_long(unsigned long val, unsigned int bits)
+{
 	/* High bits are more random, so use them. */
-	return hash >> (BITS_PER_LONG - bits);
+	return __hash_long(val) >> (BITS_PER_LONG - bits);
 }
 	
 static inline unsigned long hash_ptr(void *ptr, unsigned int bits)
diff --git a/init.c b/init.c
index bf4aa03..23be863 100644
--- a/init.c
+++ b/init.c
@@ -382,24 +382,6 @@ static int fixed_block_size(struct thread_options *o)
 		o->min_bs[DDIR_READ] == o->min_bs[DDIR_TRIM];
 }
 
-static void init_rand_distribution(struct thread_data *td)
-{
-	unsigned int range_size;
-	unsigned long nranges;
-
-	if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
-		return;
-
-	range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
-
-	nranges = (td->o.size + range_size - 1) / range_size;
-
-	if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
-		zipf_init(&td->zipf, nranges, td->o.zipf_theta);
-	else
-		pareto_init(&td->zipf, nranges, td->o.pareto_h);
-}
-
 /*
  * Lazy way of fixing up options that depend on each other. We could also
  * define option callback handlers, but this is easier.
@@ -610,8 +592,6 @@ static int fixup_options(struct thread_data *td)
 		td->o.compress_percentage = 0;
 	}
 
-	init_rand_distribution(td);
-
 	return ret;
 }
 
diff --git a/io_u.c b/io_u.c
index 688249b..551c5ff 100644
--- a/io_u.c
+++ b/io_u.c
@@ -238,7 +238,7 @@ static int __get_next_rand_offset_zipf(struct thread_data *td,
 				       struct fio_file *f, enum fio_ddir ddir,
 				       unsigned long long *b)
 {
-	*b = zipf_next(&td->zipf);
+	*b = zipf_next(&f->zipf);
 	return 0;
 }
 
@@ -246,7 +246,7 @@ static int __get_next_rand_offset_pareto(struct thread_data *td,
 					 struct fio_file *f, enum fio_ddir ddir,
 					 unsigned long long *b)
 {
-	*b = pareto_next(&td->zipf);
+	*b = pareto_next(&f->zipf);
 	return 0;
 }
 
diff --git a/lib/zipf.c b/lib/zipf.c
index 28e8d77..c7bb8a8 100644
--- a/lib/zipf.c
+++ b/lib/zipf.c
@@ -9,6 +9,7 @@
 #include "../log.h"
 #include "zipf.h"
 #include "../minmax.h"
+#include "../hash.h"
 #include "../os/os.h"
 
 struct fio_zipf_disk {
@@ -87,26 +88,29 @@ punt:
 	zs->nranges = f.nranges;
 }
 
-void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta)
+static void shared_rand_init(struct zipf_state *zs, unsigned long nranges,
+			     unsigned int seed)
 {
-	unsigned int i;
-
 	memset(zs, 0, sizeof(*zs));
-
 	zs->nranges = nranges;
-	zs->theta = theta;
 
-	for (i = 1; i <= 2; i++)
-		zs->zeta2 += pow(1.0 / (double) i, zs->theta);
+	init_rand_seed(&zs->rand, seed);
+	zs->rand_off = __rand(&zs->rand);
+}
 
-	init_rand(&zs->rand);
+void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta,
+	       unsigned int seed)
+{
+	shared_rand_init(zs, nranges, seed);
+
+	zs->theta = theta;
+	zs->zeta2 = pow(1.0, zs->theta) + pow(0.5, zs->theta);
 
 	zipf_load_gen_zeta(zs);
 }
 
 unsigned long long zipf_next(struct zipf_state *zs)
 {
-
 	double alpha, eta, rand_uni, rand_z;
 	unsigned long long n = zs->nranges;
 	unsigned long long val;
@@ -124,17 +128,14 @@ unsigned long long zipf_next(struct zipf_state *zs)
 	else
 		val = 1 + (unsigned long long)(n * pow(eta*rand_uni - eta + 1.0, alpha));
 
-	return val - 1;
+	return (__hash_long(val - 1) + zs->rand_off) % zs->nranges;
 }
 
-void pareto_init(struct zipf_state *zs, unsigned long nranges, double h)
+void pareto_init(struct zipf_state *zs, unsigned long nranges, double h,
+		 unsigned int seed)
 {
-	memset(zs, 0, sizeof(*zs));
-
-	zs->nranges = nranges;
+	shared_rand_init(zs, nranges, seed);
 	zs->pareto_pow = log(h) / log(1.0 - h);
-
-	init_rand(&zs->rand);
 }
 
 unsigned long long pareto_next(struct zipf_state *zs)
@@ -142,5 +143,5 @@ unsigned long long pareto_next(struct zipf_state *zs)
 	double rand = (double) __rand(&zs->rand) / (double) FRAND_MAX;
 	unsigned long long n = zs->nranges - 1;
 
-	return n * pow(rand, zs->pareto_pow);
+	return (__hash_long(n * pow(rand, zs->pareto_pow)) + zs->rand_off) % zs->nranges;
 }
diff --git a/lib/zipf.h b/lib/zipf.h
index c66b064..dbcaffb 100644
--- a/lib/zipf.h
+++ b/lib/zipf.h
@@ -11,12 +11,13 @@ struct zipf_state {
 	double zetan;
 	double pareto_pow;
 	struct frand_state rand;
+	unsigned long rand_off;
 };
 
-void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta);
+void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta, unsigned int seed);
 unsigned long long zipf_next(struct zipf_state *zs);
 
-void pareto_init(struct zipf_state *zs, unsigned long nranges, double h);
+void pareto_init(struct zipf_state *zs, unsigned long nranges, double h, unsigned int seed);
 unsigned long long pareto_next(struct zipf_state *zs);
 
 #endif
diff --git a/options.c b/options.c
index ea1ffb1..c240692 100644
--- a/options.c
+++ b/options.c
@@ -748,12 +748,18 @@ static int str_random_distribution_cb(void *data, const char *str)
 		return 1;
 	}
 
+	free(nr);
+
 	if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
 		td->o.zipf_theta = val;
-	else
+	else {
+		if (val <= 0.00 || val >= 1.00) {
+			log_err("fio: pareto input out of range (0 < input < 1.0)\n");
+			return 1;
+		}
 		td->o.pareto_h = val;
+	}
 
-	free(nr);
 	return 0;
 }
 
diff --git a/rbtree.c b/rbtree.c
index 7cff649..883bc72 100644
--- a/rbtree.c
+++ b/rbtree.c
@@ -300,3 +300,34 @@ struct rb_node *rb_first(struct rb_root *root)
 		n = n->rb_left;
 	return n;
 }
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (RB_EMPTY_NODE(node))
+		return NULL;
+
+	/*
+	 * If we have a right-hand child, go down and then left as far
+	 * as we can.
+	 */
+	if (node->rb_right) {
+		node = node->rb_right; 
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/*
+	 * No right-hand children. Everything down and left is smaller than us,
+	 * so any 'next' node must be in the general direction of our parent.
+	 * Go up the tree; any time the ancestor is a right-hand child of its
+	 * parent, keep going up. First time it's a left-hand child of its
+	 * parent, said parent is our 'next' node.
+	 */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
diff --git a/rbtree.h b/rbtree.h
index 7563725..c6cfe4a 100644
--- a/rbtree.h
+++ b/rbtree.h
@@ -141,6 +141,7 @@ extern void rb_erase(struct rb_node *, struct rb_root *);
 
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_first(struct rb_root *);
+extern struct rb_node *rb_next(const struct rb_node *);
 
 static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
 				struct rb_node ** rb_link)
diff --git a/t/genzipf.c b/t/genzipf.c
index f286744..dfb8992 100644
--- a/t/genzipf.c
+++ b/t/genzipf.c
@@ -18,25 +18,107 @@
 #include <string.h>
 
 #include "../lib/zipf.h"
+#include "../flist.h"
+#include "../hash.h"
+#include "../rbtree.h"
 
 #define DEF_NR		1000000
 #define DEF_NR_OUTPUT	23
 
-static int val_cmp(const void *p1, const void *p2)
+struct node {
+	struct flist_head list;
+	struct rb_node rb;
+	unsigned long long val;
+	unsigned long hits;
+};
+
+static struct flist_head *hash;
+static unsigned long hash_bits = 24;
+static unsigned long hash_size = 1 << 24;
+static struct rb_root rb;
+
+static struct node *hash_lookup(unsigned long long val)
+{
+	struct flist_head *l = &hash[hash_long(val, hash_bits)];
+	struct flist_head *entry;
+	struct node *n;
+
+	flist_for_each(entry, l) {
+		n = flist_entry(entry, struct node, list);
+		if (n->val == val)
+			return n;
+	}
+
+	return NULL;
+}
+
+static void hash_insert(unsigned long long val)
+{
+	struct flist_head *l = &hash[hash_long(val, hash_bits)];
+	struct node *n = malloc(sizeof(*n));
+
+	n->val = val;
+	n->hits = 1;
+	flist_add_tail(&n->list, l);
+}
+
+static void rb_insert(struct node *n)
+{
+	struct rb_node **p, *parent;
+
+	memset(&n->rb, 0, sizeof(n->rb));
+	p = &rb.rb_node;
+	parent = NULL;
+	while (*p) {
+		struct node *__n;
+
+		parent = *p;
+		__n = rb_entry(parent, struct node, rb);
+		if (n->hits > __n->hits)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&n->rb, parent, p);
+	rb_insert_color(&n->rb, &rb);
+}
+
+static unsigned long rb_add(struct flist_head *list)
 {
-	const unsigned long *v1 = p1;
-	const unsigned long *v2 = p2;
+	struct flist_head *entry;
+	unsigned long ret = 0;
+	struct node *n;
 
-	return *v1 - *v2;
+	flist_for_each(entry, list) {
+		n = flist_entry(entry, struct node, list);
+
+		rb_insert(n);
+		ret++;
+	}
+
+	return ret;
+}
+
+static unsigned long rb_gen(void)
+{
+	unsigned long ret = 0;
+	unsigned int i;
+
+	for (i = 0; i < hash_size; i++)
+		ret += rb_add(&hash[i]);
+
+	return ret;
 }
 
 int main(int argc, char *argv[])
 {
 	unsigned long nranges, output_nranges;
-	unsigned long *vals;
-	unsigned long i, j, nr_vals, cur_vals, max_val, interval;
+	unsigned long offset;
+	unsigned long i, j, nr_vals, cur_vals, interval;
 	double *output, perc, perc_i;
 	struct zipf_state zs;
+	struct rb_node *n;
 	int use_zipf;
 	double val;
 
@@ -55,6 +137,10 @@ int main(int argc, char *argv[])
 	}
 
 	val = atof(argv[2]);
+	if ((val >= 1.00 || val < 0.00) && !use_zipf) {
+		printf("pareto input must be > 0.00 and < 1.00\n");
+		return 1;
+	}
 
 	nranges = DEF_NR;
 	output_nranges = DEF_NR_OUTPUT;
@@ -67,56 +153,71 @@ int main(int argc, char *argv[])
 	printf("Generating %s distribution with %f input and %lu ranges.\n", use_zipf ? "zipf" : "pareto", val, nranges);
 
 	if (use_zipf)
-		zipf_init(&zs, nranges, val);
+		zipf_init(&zs, nranges, val, 1);
 	else
-		pareto_init(&zs, nranges, val);
+		pareto_init(&zs, nranges, val, 1);
+
+	hash_bits = 0;
+	hash_size = nranges;
+	while ((hash_size >>= 1) != 0)
+		hash_bits++;
+
+	hash_size = 1 << hash_bits;
+
+	hash = malloc(hash_size * sizeof(struct flist_head));
+	for (i = 0; i < hash_size; i++)
+		INIT_FLIST_HEAD(&hash[i]);
 
-	vals = malloc(nranges * sizeof(unsigned long));
+	for (nr_vals = 0, i = 0; i < nranges; i++) {
+		struct node *n;
 
-	max_val = nr_vals = 0;
-	for (i = 0; i < nranges; i++) {
 		if (use_zipf)
-			vals[nr_vals] = zipf_next(&zs);
+			offset = zipf_next(&zs);
 		else
-			vals[nr_vals] = pareto_next(&zs);
+			offset = pareto_next(&zs);
+
+		n = hash_lookup(offset);
+		if (n)
+			n->hits++;
+		else
+			hash_insert(offset);
 
-		if (vals[nr_vals] > max_val)
-			max_val = vals[nr_vals];
 		nr_vals++;
 	}
 
-	qsort(vals, nr_vals, sizeof(unsigned long), val_cmp);
+	nr_vals = rb_gen();
 
-	interval = (max_val + output_nranges - 1) / output_nranges;
+	interval = (nr_vals + output_nranges - 1) / output_nranges;
 
 	output = malloc(output_nranges * sizeof(double));
 
-	for (i = j = 0, cur_vals = 0; i < nr_vals; i++) {
-		if (vals[i] > interval) {
-			output[j] = (double) (cur_vals + 1) / (double) nr_vals;
+	i = j = cur_vals = 0;
+	
+	n = rb_first(&rb);
+	while (n) {
+		struct node *node = rb_entry(n, struct node, rb);
+
+		if (i >= interval) {
+			output[j] = (double) (cur_vals + 1) / (double) nranges;
 			output[j] *= 100.0;
 			j++;
-			cur_vals = 0;
-			interval += (max_val + output_nranges - 1) / output_nranges;
-			continue;
-		}
-		cur_vals++;
-	}
+			cur_vals = node->hits;
+			interval += (nr_vals + output_nranges - 1) / output_nranges;
+		} else
+			cur_vals += node->hits;
 
-	if (cur_vals) {
-		output[j] = (double) (cur_vals + 1) / (double) nr_vals;
-		output[j] *= 100.0;
-		j++;
+		n = rb_next(n);
+		i++;
 	}
 
 	perc_i = 100.0 / (double) output_nranges;
 	perc = 0.0;
 	for (i = 0; i < j; i++) {
-		printf("%6.2f%% -> %6.2f%%:\t%6.2f%%\n",  perc, perc + perc_i, output[i]);
 		perc += perc_i;
+		printf("%s %6.2f%%:\t%6.2f%% of hits\n", i ? "|->" : "Top", perc, output[i]);
 	}
 
 	free(output);
-	free(vals);
+	free(hash);
 	return 0;
 }
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux