The following changes since commit 39ab7da23768081db50b0026e0c2a8e38752e7a4: Move code around to satisfy t/stest linkage (2012-11-06 22:10:43 +0100) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (11): Make the zipf/pareto state per file zipf/pareto: mix blocks with hashing rbtree: add rb_next() genzipf: improve accuracy genzipf: fix off-by-one in output array calculation Merge branch 'master' of ssh://brick.kernel.dk/data/git/fio Makefile: typo Add safe checks for valid pareto input value zipf/pareto: ensure that 0 isn't always the hottest block zipf: cleanup zipf: seed zipf/pareto rand with filename hash and job id HOWTO | 2 +- Makefile | 4 +- eta.c | 5 ++- file.h | 6 ++ filesetup.c | 39 ++++++++++++++ fio.h | 7 +-- hash.h | 9 +++- init.c | 20 ------- io_u.c | 4 +- lib/zipf.c | 35 +++++++------ lib/zipf.h | 5 +- options.c | 10 +++- rbtree.c | 31 +++++++++++ rbtree.h | 1 + t/genzipf.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++----------- 15 files changed, 256 insertions(+), 87 deletions(-) --- Diff of recent changes: diff --git a/HOWTO b/HOWTO index a7bd1fb..40fe65f 100644 --- a/HOWTO +++ b/HOWTO @@ -1385,7 +1385,7 @@ Idle Run ---- --- P Thread setup, but not started. C Thread created. -I Thread initialized, waiting. +I Thread initialized, waiting or generating necessary data. p Thread running pre-reading file(s). R Running, doing sequential reads. r Running, doing random reads. diff --git a/Makefile b/Makefile index 88d397b..7e877fe 100644 --- a/Makefile +++ b/Makefile @@ -77,8 +77,8 @@ T_IEEE_OBJS += lib/ieee754.o T_IEEE_PROGS = t/ieee754 T_ZIPF_OBS = t/genzipf.o -T_ZIPF_OBJS += t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o -T_ZIPF_PROGS = t/genzip +T_ZIPF_OBJS += rbtree.o t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o +T_ZIPF_PROGS = t/genzipf T_OBJS = $(T_SMALLOC_OBJS) T_OBJS += $(T_IEEE_OBJS) diff --git a/eta.c b/eta.c index 075ce8c..1f67301 100644 --- a/eta.c +++ b/eta.c @@ -78,6 +78,7 @@ static void check_str_update(struct thread_data *td) c = 'C'; break; case TD_INITIALIZED: + case TD_SETTING_UP: c = 'I'; break; case TD_NOT_CREATED: @@ -318,7 +319,9 @@ int calc_thread_status(struct jobs_eta *je, int force) } else if (td->runstate == TD_RAMP) { je->nr_running++; je->nr_ramp++; - } else if (td->runstate < TD_RUNNING) + } else if (td->runstate == TD_SETTING_UP) + je->nr_running++; + else if (td->runstate < TD_RUNNING) je->nr_pending++; if (je->elapsed_sec >= 3) diff --git a/file.h b/file.h index 42fd58c..38e9d0d 100644 --- a/file.h +++ b/file.h @@ -5,6 +5,7 @@ #include "compiler/compiler.h" #include "io_ddir.h" #include "flist.h" +#include "lib/zipf.h" /* * The type of object we are working on @@ -112,6 +113,11 @@ struct fio_file { unsigned long last_free_lookup; unsigned failed_rands; + /* + * Used for zipf random distribution + */ + struct zipf_state zipf; + int references; enum fio_file_flags flags; diff --git a/filesetup.c b/filesetup.c index 9679c88..8636e16 100644 --- a/filesetup.c +++ b/filesetup.c @@ -12,6 +12,7 @@ #include "smalloc.h" #include "filehash.h" #include "os/os.h" +#include "hash.h" #ifdef FIO_HAVE_LINUX_FALLOCATE #include <linux/falloc.h> @@ -862,12 +863,50 @@ int pre_read_files(struct thread_data *td) return 1; } +static int __init_rand_distribution(struct thread_data *td, struct fio_file *f) +{ + unsigned int range_size, seed; + unsigned long nranges; + + range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]); + + nranges = (f->real_file_size + range_size - 1) / range_size; + + seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number; + if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) + zipf_init(&f->zipf, nranges, td->o.zipf_theta, seed); + else + pareto_init(&f->zipf, nranges, td->o.pareto_h, seed); + + return 1; +} + +static int init_rand_distribution(struct thread_data *td) +{ + struct fio_file *f; + unsigned int i; + int state; + + if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) + return 0; + + state = td->runstate; + td_set_runstate(td, TD_SETTING_UP); + for_each_file(td, f, i) + __init_rand_distribution(td, f); + td_set_runstate(td, state); + + return 1; +} + int init_random_map(struct thread_data *td) { unsigned long long blocks, num_maps; struct fio_file *f; unsigned int i; + if (init_rand_distribution(td)) + return 0; if (td->o.norandommap || !td_random(td)) return 0; diff --git a/fio.h b/fio.h index 7eb0abb..1526d19 100644 --- a/fio.h +++ b/fio.h @@ -39,7 +39,6 @@ struct thread_data; #include "server.h" #include "stat.h" #include "flow.h" -#include "lib/zipf.h" #ifdef FIO_HAVE_GUASI #include <guasi.h> @@ -457,11 +456,6 @@ struct thread_data { struct frand_state __random_state; }; - /* - * Used for zipf random distribution - */ - struct zipf_state zipf; - struct timeval start; /* start of this loop */ struct timeval epoch; /* time job was started */ struct timeval last_issue; @@ -691,6 +685,7 @@ enum { TD_NOT_CREATED = 0, TD_CREATED, TD_INITIALIZED, + TD_SETTING_UP, TD_RAMP, TD_RUNNING, TD_PRE_READING, diff --git a/hash.h b/hash.h index 4b8c6bf..93dd831 100644 --- a/hash.h +++ b/hash.h @@ -28,7 +28,7 @@ #error Define GOLDEN_RATIO_PRIME for your wordsize. #endif -static inline unsigned long hash_long(unsigned long val, unsigned int bits) +static inline unsigned long __hash_long(unsigned long val) { unsigned long hash = val; @@ -52,8 +52,13 @@ static inline unsigned long hash_long(unsigned long val, unsigned int bits) hash *= GOLDEN_RATIO_PRIME; #endif + return hash; +} + +static inline unsigned long hash_long(unsigned long val, unsigned int bits) +{ /* High bits are more random, so use them. */ - return hash >> (BITS_PER_LONG - bits); + return __hash_long(val) >> (BITS_PER_LONG - bits); } static inline unsigned long hash_ptr(void *ptr, unsigned int bits) diff --git a/init.c b/init.c index bf4aa03..23be863 100644 --- a/init.c +++ b/init.c @@ -382,24 +382,6 @@ static int fixed_block_size(struct thread_options *o) o->min_bs[DDIR_READ] == o->min_bs[DDIR_TRIM]; } -static void init_rand_distribution(struct thread_data *td) -{ - unsigned int range_size; - unsigned long nranges; - - if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) - return; - - range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]); - - nranges = (td->o.size + range_size - 1) / range_size; - - if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) - zipf_init(&td->zipf, nranges, td->o.zipf_theta); - else - pareto_init(&td->zipf, nranges, td->o.pareto_h); -} - /* * Lazy way of fixing up options that depend on each other. We could also * define option callback handlers, but this is easier. @@ -610,8 +592,6 @@ static int fixup_options(struct thread_data *td) td->o.compress_percentage = 0; } - init_rand_distribution(td); - return ret; } diff --git a/io_u.c b/io_u.c index 688249b..551c5ff 100644 --- a/io_u.c +++ b/io_u.c @@ -238,7 +238,7 @@ static int __get_next_rand_offset_zipf(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - *b = zipf_next(&td->zipf); + *b = zipf_next(&f->zipf); return 0; } @@ -246,7 +246,7 @@ static int __get_next_rand_offset_pareto(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { - *b = pareto_next(&td->zipf); + *b = pareto_next(&f->zipf); return 0; } diff --git a/lib/zipf.c b/lib/zipf.c index 28e8d77..c7bb8a8 100644 --- a/lib/zipf.c +++ b/lib/zipf.c @@ -9,6 +9,7 @@ #include "../log.h" #include "zipf.h" #include "../minmax.h" +#include "../hash.h" #include "../os/os.h" struct fio_zipf_disk { @@ -87,26 +88,29 @@ punt: zs->nranges = f.nranges; } -void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta) +static void shared_rand_init(struct zipf_state *zs, unsigned long nranges, + unsigned int seed) { - unsigned int i; - memset(zs, 0, sizeof(*zs)); - zs->nranges = nranges; - zs->theta = theta; - for (i = 1; i <= 2; i++) - zs->zeta2 += pow(1.0 / (double) i, zs->theta); + init_rand_seed(&zs->rand, seed); + zs->rand_off = __rand(&zs->rand); +} - init_rand(&zs->rand); +void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta, + unsigned int seed) +{ + shared_rand_init(zs, nranges, seed); + + zs->theta = theta; + zs->zeta2 = pow(1.0, zs->theta) + pow(0.5, zs->theta); zipf_load_gen_zeta(zs); } unsigned long long zipf_next(struct zipf_state *zs) { - double alpha, eta, rand_uni, rand_z; unsigned long long n = zs->nranges; unsigned long long val; @@ -124,17 +128,14 @@ unsigned long long zipf_next(struct zipf_state *zs) else val = 1 + (unsigned long long)(n * pow(eta*rand_uni - eta + 1.0, alpha)); - return val - 1; + return (__hash_long(val - 1) + zs->rand_off) % zs->nranges; } -void pareto_init(struct zipf_state *zs, unsigned long nranges, double h) +void pareto_init(struct zipf_state *zs, unsigned long nranges, double h, + unsigned int seed) { - memset(zs, 0, sizeof(*zs)); - - zs->nranges = nranges; + shared_rand_init(zs, nranges, seed); zs->pareto_pow = log(h) / log(1.0 - h); - - init_rand(&zs->rand); } unsigned long long pareto_next(struct zipf_state *zs) @@ -142,5 +143,5 @@ unsigned long long pareto_next(struct zipf_state *zs) double rand = (double) __rand(&zs->rand) / (double) FRAND_MAX; unsigned long long n = zs->nranges - 1; - return n * pow(rand, zs->pareto_pow); + return (__hash_long(n * pow(rand, zs->pareto_pow)) + zs->rand_off) % zs->nranges; } diff --git a/lib/zipf.h b/lib/zipf.h index c66b064..dbcaffb 100644 --- a/lib/zipf.h +++ b/lib/zipf.h @@ -11,12 +11,13 @@ struct zipf_state { double zetan; double pareto_pow; struct frand_state rand; + unsigned long rand_off; }; -void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta); +void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta, unsigned int seed); unsigned long long zipf_next(struct zipf_state *zs); -void pareto_init(struct zipf_state *zs, unsigned long nranges, double h); +void pareto_init(struct zipf_state *zs, unsigned long nranges, double h, unsigned int seed); unsigned long long pareto_next(struct zipf_state *zs); #endif diff --git a/options.c b/options.c index ea1ffb1..c240692 100644 --- a/options.c +++ b/options.c @@ -748,12 +748,18 @@ static int str_random_distribution_cb(void *data, const char *str) return 1; } + free(nr); + if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) td->o.zipf_theta = val; - else + else { + if (val <= 0.00 || val >= 1.00) { + log_err("fio: pareto input out of range (0 < input < 1.0)\n"); + return 1; + } td->o.pareto_h = val; + } - free(nr); return 0; } diff --git a/rbtree.c b/rbtree.c index 7cff649..883bc72 100644 --- a/rbtree.c +++ b/rbtree.c @@ -300,3 +300,34 @@ struct rb_node *rb_first(struct rb_root *root) n = n->rb_left; return n; } + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} diff --git a/rbtree.h b/rbtree.h index 7563725..c6cfe4a 100644 --- a/rbtree.h +++ b/rbtree.h @@ -141,6 +141,7 @@ extern void rb_erase(struct rb_node *, struct rb_root *); /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_first(struct rb_root *); +extern struct rb_node *rb_next(const struct rb_node *); static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, struct rb_node ** rb_link) diff --git a/t/genzipf.c b/t/genzipf.c index f286744..dfb8992 100644 --- a/t/genzipf.c +++ b/t/genzipf.c @@ -18,25 +18,107 @@ #include <string.h> #include "../lib/zipf.h" +#include "../flist.h" +#include "../hash.h" +#include "../rbtree.h" #define DEF_NR 1000000 #define DEF_NR_OUTPUT 23 -static int val_cmp(const void *p1, const void *p2) +struct node { + struct flist_head list; + struct rb_node rb; + unsigned long long val; + unsigned long hits; +}; + +static struct flist_head *hash; +static unsigned long hash_bits = 24; +static unsigned long hash_size = 1 << 24; +static struct rb_root rb; + +static struct node *hash_lookup(unsigned long long val) +{ + struct flist_head *l = &hash[hash_long(val, hash_bits)]; + struct flist_head *entry; + struct node *n; + + flist_for_each(entry, l) { + n = flist_entry(entry, struct node, list); + if (n->val == val) + return n; + } + + return NULL; +} + +static void hash_insert(unsigned long long val) +{ + struct flist_head *l = &hash[hash_long(val, hash_bits)]; + struct node *n = malloc(sizeof(*n)); + + n->val = val; + n->hits = 1; + flist_add_tail(&n->list, l); +} + +static void rb_insert(struct node *n) +{ + struct rb_node **p, *parent; + + memset(&n->rb, 0, sizeof(n->rb)); + p = &rb.rb_node; + parent = NULL; + while (*p) { + struct node *__n; + + parent = *p; + __n = rb_entry(parent, struct node, rb); + if (n->hits > __n->hits) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&n->rb, parent, p); + rb_insert_color(&n->rb, &rb); +} + +static unsigned long rb_add(struct flist_head *list) { - const unsigned long *v1 = p1; - const unsigned long *v2 = p2; + struct flist_head *entry; + unsigned long ret = 0; + struct node *n; - return *v1 - *v2; + flist_for_each(entry, list) { + n = flist_entry(entry, struct node, list); + + rb_insert(n); + ret++; + } + + return ret; +} + +static unsigned long rb_gen(void) +{ + unsigned long ret = 0; + unsigned int i; + + for (i = 0; i < hash_size; i++) + ret += rb_add(&hash[i]); + + return ret; } int main(int argc, char *argv[]) { unsigned long nranges, output_nranges; - unsigned long *vals; - unsigned long i, j, nr_vals, cur_vals, max_val, interval; + unsigned long offset; + unsigned long i, j, nr_vals, cur_vals, interval; double *output, perc, perc_i; struct zipf_state zs; + struct rb_node *n; int use_zipf; double val; @@ -55,6 +137,10 @@ int main(int argc, char *argv[]) } val = atof(argv[2]); + if ((val >= 1.00 || val < 0.00) && !use_zipf) { + printf("pareto input must be > 0.00 and < 1.00\n"); + return 1; + } nranges = DEF_NR; output_nranges = DEF_NR_OUTPUT; @@ -67,56 +153,71 @@ int main(int argc, char *argv[]) printf("Generating %s distribution with %f input and %lu ranges.\n", use_zipf ? "zipf" : "pareto", val, nranges); if (use_zipf) - zipf_init(&zs, nranges, val); + zipf_init(&zs, nranges, val, 1); else - pareto_init(&zs, nranges, val); + pareto_init(&zs, nranges, val, 1); + + hash_bits = 0; + hash_size = nranges; + while ((hash_size >>= 1) != 0) + hash_bits++; + + hash_size = 1 << hash_bits; + + hash = malloc(hash_size * sizeof(struct flist_head)); + for (i = 0; i < hash_size; i++) + INIT_FLIST_HEAD(&hash[i]); - vals = malloc(nranges * sizeof(unsigned long)); + for (nr_vals = 0, i = 0; i < nranges; i++) { + struct node *n; - max_val = nr_vals = 0; - for (i = 0; i < nranges; i++) { if (use_zipf) - vals[nr_vals] = zipf_next(&zs); + offset = zipf_next(&zs); else - vals[nr_vals] = pareto_next(&zs); + offset = pareto_next(&zs); + + n = hash_lookup(offset); + if (n) + n->hits++; + else + hash_insert(offset); - if (vals[nr_vals] > max_val) - max_val = vals[nr_vals]; nr_vals++; } - qsort(vals, nr_vals, sizeof(unsigned long), val_cmp); + nr_vals = rb_gen(); - interval = (max_val + output_nranges - 1) / output_nranges; + interval = (nr_vals + output_nranges - 1) / output_nranges; output = malloc(output_nranges * sizeof(double)); - for (i = j = 0, cur_vals = 0; i < nr_vals; i++) { - if (vals[i] > interval) { - output[j] = (double) (cur_vals + 1) / (double) nr_vals; + i = j = cur_vals = 0; + + n = rb_first(&rb); + while (n) { + struct node *node = rb_entry(n, struct node, rb); + + if (i >= interval) { + output[j] = (double) (cur_vals + 1) / (double) nranges; output[j] *= 100.0; j++; - cur_vals = 0; - interval += (max_val + output_nranges - 1) / output_nranges; - continue; - } - cur_vals++; - } + cur_vals = node->hits; + interval += (nr_vals + output_nranges - 1) / output_nranges; + } else + cur_vals += node->hits; - if (cur_vals) { - output[j] = (double) (cur_vals + 1) / (double) nr_vals; - output[j] *= 100.0; - j++; + n = rb_next(n); + i++; } perc_i = 100.0 / (double) output_nranges; perc = 0.0; for (i = 0; i < j; i++) { - printf("%6.2f%% -> %6.2f%%:\t%6.2f%%\n", perc, perc + perc_i, output[i]); perc += perc_i; + printf("%s %6.2f%%:\t%6.2f%% of hits\n", i ? "|->" : "Top", perc, output[i]); } free(output); - free(vals); + free(hash); return 0; } -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html