The following changes since commit 5f739e0e2912b809635fac635a8c1140155577e6: Fix compile on environment of SuperH (2011-08-28 10:00:17 +0200) are available in the git repository at: git://git.kernel.dk/fio.git master Dan Ehrenberg (1): Adding userspace_libaio_reap option Jens Axboe (2): Fix bad latency reporting for rated IO jobs Add sub-option support (sort-of) and convert libaio_userspace_reap HOWTO | 10 ++++++++++ engines/libaio.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- fio.1 | 8 +++++++- fio.h | 2 ++ io_u.c | 10 ++++++++++ options.c | 16 ++++++++++++++++ parse.c | 47 ++++++++++++++++++++++++++++++++++------------- parse.h | 1 + 8 files changed, 131 insertions(+), 15 deletions(-) --- Diff of recent changes: diff --git a/HOWTO b/HOWTO index a1b2e8c..724b604 100644 --- a/HOWTO +++ b/HOWTO @@ -517,6 +517,16 @@ ioengine=str Defines how the job issues io to the file. The following libaio Linux native asynchronous io. Note that Linux may only support queued behaviour with non-buffered IO (set direct=1 or buffered=0). + This engine also has a sub-option, + userspace_reap. To set it, use + ioengine=libaio:userspace_reap. Normally, with + the libaio engine in use, fio will use the + io_getevents system call to reap newly returned + events. With this flag turned on, the AIO ring + will be read directly from user-space to reap + events. The reaping mode is only enabled when + polling for a minimum of 0 events (eg when + iodepth_batch_complete=0). posixaio glibc posix asynchronous io. diff --git a/engines/libaio.c b/engines/libaio.c index c837ab6..ad34d06 100644 --- a/engines/libaio.c +++ b/engines/libaio.c @@ -58,6 +58,47 @@ static struct io_u *fio_libaio_event(struct thread_data *td, int event) return io_u; } +struct aio_ring { + unsigned id; /** kernel internal index number */ + unsigned nr; /** number of io_events */ + unsigned head; + unsigned tail; + + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /** size of aio_ring */ + + struct io_event events[0]; +}; + +#define AIO_RING_MAGIC 0xa10a10a1 + +static int user_io_getevents(io_context_t aio_ctx, unsigned int max, + struct io_event *events) +{ + long i = 0; + unsigned head; + struct aio_ring *ring = (struct aio_ring*) aio_ctx; + + while (i < max) { + head = ring->head; + + if (head == ring->tail) { + /* There are no more completions */ + break; + } else { + /* There is another completion to reap */ + events[i] = ring->events[head]; + read_barrier(); + ring->head = (head + 1) % ring->nr; + i++; + } + } + + return i; +} + static int fio_libaio_getevents(struct thread_data *td, unsigned int min, unsigned int max, struct timespec *t) { @@ -66,7 +107,16 @@ static int fio_libaio_getevents(struct thread_data *td, unsigned int min, int r, events = 0; do { - r = io_getevents(ld->aio_ctx, actual_min, max, ld->aio_events + events, t); + if (td->o.userspace_libaio_reap == 1 + && actual_min == 0 + && ((struct aio_ring *)(ld->aio_ctx))->magic + == AIO_RING_MAGIC) { + r = user_io_getevents(ld->aio_ctx, max, + ld->aio_events + events); + } else { + r = io_getevents(ld->aio_ctx, actual_min, + max, ld->aio_events + events, t); + } if (r >= 0) events += r; else if (r == -EAGAIN) diff --git a/fio.1 b/fio.1 index 488896c..ffc97c9 100644 --- a/fio.1 +++ b/fio.1 @@ -367,7 +367,13 @@ Basic \fIreadv\fR\|(2) or \fIwritev\fR\|(2) I/O. Will emulate queuing by coalescing adjacents IOs into a single submission. .TP .B libaio -Linux native asynchronous I/O. +Linux native asynchronous I/O. This engine also has a sub-option, +\fBuserspace_reap\fR. To set it, use \fBioengine=libaio:userspace_reap\fR. +Normally, with the libaio engine in use, fio will use the +\fIio_getevents\fR\|(3) system call to reap newly returned events. With this +flag turned on, the AIO ring will be read directly from user-space to reap +events. The reaping mode is only enabled when polling for a minimum of \fB0\fR +events (eg when \fBiodepth_batch_complete=0\fR). .TP .B posixaio POSIX asynchronous I/O using \fIaio_read\fR\|(3) and \fIaio_write\fR\|(3). diff --git a/fio.h b/fio.h index 9d2a61c..0c86f28 100644 --- a/fio.h +++ b/fio.h @@ -413,6 +413,8 @@ struct thread_options { unsigned int gid; unsigned int sync_file_range; + + unsigned int userspace_libaio_reap; }; #define FIO_VERROR_SIZE 128 diff --git a/io_u.c b/io_u.c index 6a53bda..7709df5 100644 --- a/io_u.c +++ b/io_u.c @@ -486,6 +486,16 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) } else usec = td->rate_pending_usleep[ddir]; + /* + * We are going to sleep, ensure that we flush anything pending as + * not to skew our latency numbers + */ + if (td->cur_depth) { + int fio_unused ret; + + ret = io_u_queued_complete(td, td->cur_depth, NULL); + } + fio_gettime(&t, NULL); usec_sleep(td, usec); usec = utime_since_now(&t); diff --git a/options.c b/options.c index 6a87e98..74c24d0 100644 --- a/options.c +++ b/options.c @@ -226,6 +226,21 @@ static int str_rw_cb(void *data, const char *str) return 0; } +#ifdef FIO_HAVE_LIBAIO +static int str_libaio_cb(void *data, const char *str) +{ + struct thread_data *td = data; + + if (!strcmp(str, "userspace_reap")) { + td->o.userspace_libaio_reap = 1; + return 0; + } + + log_err("fio: bad libaio sub-option: %s\n", str); + return 1; +} +#endif + static int str_mem_cb(void *data, const char *mem) { struct thread_data *td = data; @@ -961,6 +976,7 @@ static struct fio_option options[FIO_MAX_OPTS] = { #ifdef FIO_HAVE_LIBAIO { .ival = "libaio", .help = "Linux native asynchronous IO", + .cb = str_libaio_cb, }, #endif #ifdef FIO_HAVE_POSIXAIO diff --git a/parse.c b/parse.c index c2c5bf2..4458121 100644 --- a/parse.c +++ b/parse.c @@ -346,6 +346,9 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, double uf; char **cp; int ret = 0, is_time = 0; + const struct value_pair *vp; + struct value_pair posval[PARSE_MAX_VP]; + int i, all_skipped = 1; dprint(FD_PARSE, "__handle_option=%s, type=%d, ptr=%s\n", o->name, o->type, ptr); @@ -359,9 +362,6 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, case FIO_OPT_STR: case FIO_OPT_STR_MULTI: { fio_opt_str_fn *fn = o->cb; - const struct value_pair *vp; - struct value_pair posval[PARSE_MAX_VP]; - int i, all_skipped = 1; posval_sort(o, posval); @@ -487,19 +487,40 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, case FIO_OPT_STR_STORE: { fio_opt_str_fn *fn = o->cb; - if (o->roff1) - cp = (char **) o->roff1; - else - cp = td_var(data, o->off1); + posval_sort(o, posval); - *cp = strdup(ptr); - if (fn) { - ret = fn(data, ptr); - if (ret) { - free(*cp); - *cp = NULL; + ret = 1; + for (i = 0; i < PARSE_MAX_VP; i++) { + vp = &posval[i]; + if (!vp->ival || vp->ival[0] == '\0') + continue; + all_skipped = 0; + if (!strncmp(vp->ival, ptr, opt_len(ptr))) { + char *rest; + + ret = 0; + if (vp->cb) + fn = vp->cb; + if (o->roff1) + cp = (char **) o->roff1; + else + cp = td_var(data, o->off1); + *cp = strdup(ptr); + rest = strstr(*cp, ":"); + if (rest) { + *rest = '\0'; + ptr = rest + 1; + } else + ptr = NULL; + break; } } + + if (ret && !all_skipped) + show_option_values(o); + else if (fn && ptr) + ret = fn(data, ptr); + break; } case FIO_OPT_RANGE: { diff --git a/parse.h b/parse.h index c5a7417..f2265a4 100644 --- a/parse.h +++ b/parse.h @@ -29,6 +29,7 @@ struct value_pair { unsigned int oval; /* output value */ const char *help; /* help text for sub option */ int or; /* OR value */ + void *cb; /* sub-option callback */ }; #define OPT_LEN_MAX 4096 -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html