The following changes since commit 7477673323a943b99ea203bb9434661d13a0159c: lfsr: ensure we don't generate an offset + buflen that exceeds the max size (2013-01-11 14:03:25 +0100) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (2): Pre-load and sort random blocks for pure read verify workloads configure: enable e4defrag engine regardless of MOVE_EXTENT compile test Makefile | 2 +- backend.c | 1 + configure | 7 ++- fio.h | 3 + flist.h | 3 + io_u.c | 100 ++++++++++++++++++++++++++++++-------- lib/flist_sort.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ options.c | 10 ++++ 8 files changed, 243 insertions(+), 23 deletions(-) create mode 100644 lib/flist_sort.c --- Diff of recent changes: diff --git a/Makefile b/Makefile index 96819c8..299e5e9 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ SOURCE := gettime.c fio.c ioengines.c init.c stat.c log.c time.c filesetup.c \ engines/mmap.c engines/sync.c engines/null.c engines/net.c \ memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \ json.c lib/zipf.c lib/axmap.c lib/lfsr.c gettime-thread.c \ - helpers.c + helpers.c lib/flist_sort.c ifdef CONFIG_64BIT CFLAGS += -DBITS_PER_LONG=64 diff --git a/backend.c b/backend.c index 8e96fb0..7cebf4d 100644 --- a/backend.c +++ b/backend.c @@ -1025,6 +1025,7 @@ static void *thread_main(void *data) INIT_FLIST_HEAD(&td->io_hist_list); INIT_FLIST_HEAD(&td->verify_list); INIT_FLIST_HEAD(&td->trim_list); + INIT_FLIST_HEAD(&td->next_rand_list); pthread_mutex_init(&td->io_u_lock, NULL); td->io_hist_tree = RB_ROOT; diff --git a/configure b/configure index 9cdfc49..3558bf4 100755 --- a/configure +++ b/configure @@ -585,7 +585,12 @@ int main(int argc, char **argv) return ioctl(0, EXT4_IOC_MOVE_EXT, &me); } EOF -if compile_prog "" "" "ext4 move extent"; then +if compile_prog "" "" "ext4 move extent" ; then + ext4_me="yes" +elif test $targetos = "Linux" ; then + # On Linux, just default to it on and let it error at runtime if we really + # don't have it. None of my updated systems have it defined, but it does + # work. Takes a while to bubble back. ext4_me="yes" fi echo "EXT4 move extent $ext4_me" diff --git a/fio.h b/fio.h index a528f6a..c5e2bf1 100644 --- a/fio.h +++ b/fio.h @@ -149,6 +149,7 @@ struct thread_options { unsigned int verify; unsigned int do_verify; unsigned int verifysort; + unsigned int verifysort_nr; unsigned int verify_interval; unsigned int verify_offset; char verify_pattern[MAX_PATTERN_SIZE]; @@ -515,6 +516,8 @@ struct thread_data { struct flist_head trim_list; unsigned long trim_entries; + struct flist_head next_rand_list; + /* * for fileservice, how often to switch to a new file */ diff --git a/flist.h b/flist.h index 7aca973..8e13041 100644 --- a/flist.h +++ b/flist.h @@ -176,4 +176,7 @@ static inline void flist_splice_init(struct flist_head *list, for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) +extern void flist_sort(void *priv, struct flist_head *head, + int (*cmp)(void *priv, struct flist_head *a, struct flist_head *b)); + #endif diff --git a/io_u.c b/io_u.c index 94fd123..87b8019 100644 --- a/io_u.c +++ b/io_u.c @@ -24,7 +24,7 @@ struct io_completion_data { * The ->io_axmap contains a map of blocks we have or have not done io * to yet. Used to make sure we cover the entire range in a fair fashion. */ -static int random_map_free(struct fio_file *f, const unsigned long long block) +static int random_map_free(struct fio_file *f, const uint64_t block) { return !axmap_isset(f->io_axmap, block); } @@ -36,10 +36,10 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u) { unsigned int min_bs = td->o.rw_min_bs; struct fio_file *f = io_u->file; - unsigned long long block; unsigned int nr_blocks; + uint64_t block; - block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs; + block = (io_u->offset - f->file_offset) / (uint64_t) min_bs; nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; if (!(io_u->flags & IO_U_F_BUSY_OK)) @@ -67,24 +67,29 @@ static uint64_t last_block(struct thread_data *td, struct fio_file *f, if (td->o.zone_range) max_size = td->o.zone_range; - max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; + max_blocks = max_size / (uint64_t) td->o.ba[ddir]; if (!max_blocks) return 0; return max_blocks; } +struct rand_off { + struct flist_head list; + uint64_t off; +}; + static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f, - enum fio_ddir ddir, unsigned long long *b) + enum fio_ddir ddir, uint64_t *b) { - unsigned long long r, lastb; + uint64_t r, lastb; lastb = last_block(td, f, ddir); if (!lastb) return 1; if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE) { - unsigned long long rmax; + uint64_t rmax; rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX; @@ -98,7 +103,7 @@ static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f, dprint(FD_RANDOM, "off rand %llu\n", r); - *b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0)); + *b = (lastb - 1) * (r / ((uint64_t) rmax + 1.0)); } else { uint64_t off = 0; @@ -131,7 +136,7 @@ ret: static int __get_next_rand_offset_zipf(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, - unsigned long long *b) + uint64_t *b) { *b = zipf_next(&f->zipf); return 0; @@ -139,14 +144,22 @@ static int __get_next_rand_offset_zipf(struct thread_data *td, static int __get_next_rand_offset_pareto(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, - unsigned long long *b) + uint64_t *b) { *b = pareto_next(&f->zipf); return 0; } -static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, - enum fio_ddir ddir, unsigned long long *b) +static int flist_cmp(void *data, struct flist_head *a, struct flist_head *b) +{ + struct rand_off *r1 = flist_entry(a, struct rand_off, list); + struct rand_off *r2 = flist_entry(b, struct rand_off, list); + + return r1->off - r2->off; +} + +static int get_off_from_method(struct thread_data *td, struct fio_file *f, + enum fio_ddir ddir, uint64_t *b) { if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) return __get_next_rand_offset(td, f, ddir, b); @@ -159,8 +172,52 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, return 1; } +static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, + enum fio_ddir ddir, uint64_t *b) +{ + struct rand_off *r; + int i, ret = 1; + + /* + * If sort not enabled, or not a pure random read workload without + * any stored write metadata, just return a random offset + */ + if (!td->o.verifysort_nr || !(ddir == READ && td->o.do_verify && + td->o.verify != VERIFY_NONE && td_random(td))) + return get_off_from_method(td, f, ddir, b); + + if (!flist_empty(&td->next_rand_list)) { + struct rand_off *r; +fetch: + r = flist_entry(td->next_rand_list.next, struct rand_off, list); + flist_del(&r->list); + *b = r->off; + free(r); + return 0; + } + + for (i = 0; i < td->o.verifysort_nr; i++) { + r = malloc(sizeof(*r)); + + ret = get_off_from_method(td, f, ddir, &r->off); + if (ret) { + free(r); + break; + } + + flist_add(&r->list, &td->next_rand_list); + } + + if (ret && !i) + return ret; + + assert(!flist_empty(&td->next_rand_list)); + flist_sort(NULL, &td->next_rand_list, flist_cmp); + goto fetch; +} + static int get_next_rand_block(struct thread_data *td, struct fio_file *f, - enum fio_ddir ddir, unsigned long long *b) + enum fio_ddir ddir, uint64_t *b) { if (!get_next_rand_offset(td, f, ddir, b)) return 0; @@ -177,7 +234,7 @@ static int get_next_rand_block(struct thread_data *td, struct fio_file *f, } static int get_next_seq_offset(struct thread_data *td, struct fio_file *f, - enum fio_ddir ddir, unsigned long long *offset) + enum fio_ddir ddir, uint64_t *offset) { assert(ddir_rw(ddir)); @@ -185,7 +242,7 @@ static int get_next_seq_offset(struct thread_data *td, struct fio_file *f, f->last_pos = f->last_pos - f->io_size; if (f->last_pos < f->real_file_size) { - unsigned long long pos; + uint64_t pos; if (f->last_pos == f->file_offset && td->o.ddir_seq_add < 0) f->last_pos = f->real_file_size; @@ -205,7 +262,7 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u, enum fio_ddir ddir, int rw_seq) { struct fio_file *f = io_u->file; - unsigned long long b, offset; + uint64_t b, offset; int ret; assert(ddir_rw(ddir)); @@ -1106,7 +1163,7 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u) static void small_content_scramble(struct io_u *io_u) { unsigned int i, nr_blocks = io_u->buflen / 512; - unsigned long long boffset; + uint64_t boffset; unsigned int offset; void *p, *end; @@ -1124,9 +1181,9 @@ static void small_content_scramble(struct io_u *io_u) * and the actual offset. */ offset = (io_u->start_time.tv_usec ^ boffset) & 511; - offset &= ~(sizeof(unsigned long long) - 1); - if (offset >= 512 - sizeof(unsigned long long)) - offset -= sizeof(unsigned long long); + offset &= ~(sizeof(uint64_t) - 1); + if (offset >= 512 - sizeof(uint64_t)) + offset -= sizeof(uint64_t); memcpy(p + offset, &boffset, sizeof(boffset)); end = p + 512 - sizeof(io_u->start_time); @@ -1285,7 +1342,8 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u, static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir) { - unsigned long long secs, remainder, bps, bytes; + uint64_t secs, remainder, bps, bytes; + bytes = td->this_io_bytes[ddir]; bps = td->rate_bps[ddir]; secs = bytes / bps; diff --git a/lib/flist_sort.c b/lib/flist_sort.c new file mode 100644 index 0000000..1c91cc4 --- /dev/null +++ b/lib/flist_sort.c @@ -0,0 +1,140 @@ +#include <stdio.h> +#include <string.h> +#include "../flist.h" +#include "../log.h" + +#define MAX_LIST_LENGTH_BITS 20 + +/* + * Returns a list organized in an intermediate format suited + * to chaining of merge() calls: null-terminated, no reserved or + * sentinel head node, "prev" links not maintained. + */ +static struct flist_head *merge(void *priv, + int (*cmp)(void *priv, struct flist_head *a, + struct flist_head *b), + struct flist_head *a, struct flist_head *b) +{ + struct flist_head head, *tail = &head; + + while (a && b) { + /* if equal, take 'a' -- important for sort stability */ + if ((*cmp)(priv, a, b) <= 0) { + tail->next = a; + a = a->next; + } else { + tail->next = b; + b = b->next; + } + tail = tail->next; + } + tail->next = a?:b; + return head.next; +} + +/* + * Combine final list merge with restoration of standard doubly-linked + * list structure. This approach duplicates code from merge(), but + * runs faster than the tidier alternatives of either a separate final + * prev-link restoration pass, or maintaining the prev links + * throughout. + */ +static void merge_and_restore_back_links(void *priv, + int (*cmp)(void *priv, struct flist_head *a, + struct flist_head *b), + struct flist_head *head, + struct flist_head *a, struct flist_head *b) +{ + struct flist_head *tail = head; + + while (a && b) { + /* if equal, take 'a' -- important for sort stability */ + if ((*cmp)(priv, a, b) <= 0) { + tail->next = a; + a->prev = tail; + a = a->next; + } else { + tail->next = b; + b->prev = tail; + b = b->next; + } + tail = tail->next; + } + tail->next = a ? : b; + + do { + /* + * In worst cases this loop may run many iterations. + * Continue callbacks to the client even though no + * element comparison is needed, so the client's cmp() + * routine can invoke cond_resched() periodically. + */ + (*cmp)(priv, tail->next, tail->next); + + tail->next->prev = tail; + tail = tail->next; + } while (tail->next); + + tail->next = head; + head->prev = tail; +} + +/** + * list_sort - sort a list + * @priv: private data, opaque to list_sort(), passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function + * + * This function implements "merge sort", which has O(nlog(n)) + * complexity. + * + * The comparison function @cmp must return a negative value if @a + * should sort before @b, and a positive value if @a should sort after + * @b. If @a and @b are equivalent, and their original relative + * ordering is to be preserved, @cmp must return 0. + */ +void flist_sort(void *priv, struct flist_head *head, + int (*cmp)(void *priv, struct flist_head *a, + struct flist_head *b)) +{ + struct flist_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists + -- last slot is a sentinel */ + int lev; /* index into part[] */ + int max_lev = 0; + struct flist_head *list; + + if (flist_empty(head)) + return; + + memset(part, 0, sizeof(part)); + + head->prev->next = NULL; + list = head->next; + + while (list) { + struct flist_head *cur = list; + list = list->next; + cur->next = NULL; + + for (lev = 0; part[lev]; lev++) { + cur = merge(priv, cmp, part[lev], cur); + part[lev] = NULL; + } + if (lev > max_lev) { + if (lev >= MAX_LIST_LENGTH_BITS) { + log_err("fio: list passed to" + " list_sort() too long for" + " efficiency\n"); + lev--; + } + max_lev = lev; + } + part[lev] = cur; + } + + for (lev = 0; lev < max_lev; lev++) + if (part[lev]) + list = merge(priv, cmp, part[lev], list); + + merge_and_restore_back_links(priv, cmp, head, part[max_lev], list); +} diff --git a/options.c b/options.c index 3a406fa..1760762 100644 --- a/options.c +++ b/options.c @@ -1887,6 +1887,16 @@ static struct fio_option options[FIO_MAX_OPTS] = { .parent = "verify", }, { + .name = "verifysort_nr", + .type = FIO_OPT_INT, + .off1 = td_var_offset(verifysort_nr), + .help = "Pre-load and sort verify blocks for a read workload", + .minval = 0, + .maxval = 131072, + .def = "1024", + .parent = "verify", + }, + { .name = "verify_interval", .type = FIO_OPT_INT, .off1 = td_var_offset(verify_interval), -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html