The following changes since commit d708ac5e21c97a92c9c7b6b7593d9f2b0d7c9d3f: Fix assert (2010-09-02 13:29:55 +0200) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (2): Align memory buffers for RAW io engines Add binject IO engine Makefile | 1 + engines/binject.c | 295 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fio.c | 3 +- ioengine.h | 3 + options.c | 5 + os/binject.h | 65 ++++++++++++ os/os-linux.h | 2 + 7 files changed, 373 insertions(+), 1 deletions(-) create mode 100644 engines/binject.c create mode 100644 os/binject.h --- Diff of recent changes: diff --git a/Makefile b/Makefile index 9fec137..13baee8 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,7 @@ OBJS += engines/null.o OBJS += engines/net.o OBJS += engines/syslet-rw.o OBJS += engines/guasi.o +OBJS += engines/binject.o OBJS += profiles/tiobench.o diff --git a/engines/binject.c b/engines/binject.c new file mode 100644 index 0000000..3f663bd --- /dev/null +++ b/engines/binject.c @@ -0,0 +1,295 @@ +/* + * binject engine + * + * IO engine that uses the Linux binject interface to directly inject + * bio's to block devices. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <assert.h> +#include <string.h> +#include <sys/poll.h> + +#include "../fio.h" + +#ifdef FIO_HAVE_BINJECT + +struct binject_data { + struct b_user_cmd *cmds; + struct io_u **events; + struct pollfd *pfds; + int *fd_flags; + unsigned int bs; +}; + +static void binject_buc_init(struct binject_data *bd, struct io_u *io_u) +{ + struct b_user_cmd *buc = &io_u->buc; + + memset(buc, 0, sizeof(*buc)); + binject_buc_set_magic(buc); + + buc->buf = (unsigned long) io_u->xfer_buf; + buc->len = io_u->xfer_buflen; + buc->offset = io_u->offset; + buc->usr_ptr = (unsigned long) io_u; + + buc->flags = B_FLAG_NOIDLE | B_FLAG_UNPLUG; + assert(buc->buf); +} + +static int pollin_events(struct pollfd *pfds, int fds) +{ + int i; + + for (i = 0; i < fds; i++) + if (pfds[i].revents & POLLIN) + return 1; + + return 0; +} + +static int fio_binject_getevents(struct thread_data *td, unsigned int min, + unsigned int max, struct timespec fio_unused *t) +{ + struct binject_data *bd = td->io_ops->data; + int left = max, ret, r = 0, ev_index = 0; + void *buf = bd->cmds; + unsigned int i, events; + struct fio_file *f; + + /* + * Fill in the file descriptors + */ + for_each_file(td, f, i) { + /* + * don't block for min events == 0 + */ + if (!min) { + bd->fd_flags[i] = fcntl(f->fd, F_GETFL); + fcntl(f->fd, F_SETFL, bd->fd_flags[i] | O_NONBLOCK); + } + bd->pfds[i].fd = f->fd; + bd->pfds[i].events = POLLIN; + } + + while (left) { + void *p; + + do { + if (!min) + break; + + ret = poll(bd->pfds, td->o.nr_files, -1); + if (ret < 0) { + if (!r) + r = -errno; + td_verror(td, errno, "poll"); + break; + } else if (!ret) + continue; + + if (pollin_events(bd->pfds, td->o.nr_files)) + break; + } while (1); + + if (r < 0) + break; + +re_read: + p = buf; + events = 0; + for_each_file(td, f, i) { + ret = read(f->fd, p, left * sizeof(struct b_user_cmd)); + if (ret < 0) { + if (errno == EAGAIN) + continue; + r = -errno; + td_verror(td, errno, "read"); + break; + } else if (ret) { + p += ret; + events += ret / sizeof(struct b_user_cmd); + } + } + + if (r < 0) + break; + if (!events) { + usleep(1000); + goto re_read; + } + + left -= events; + r += events; + + for (i = 0; i < events; i++) { + struct b_user_cmd *buc = (struct b_user_cmd *) buf + i; + + bd->events[ev_index] = (struct io_u *) buc->usr_ptr; + ev_index++; + } + } + + if (!min) { + for_each_file(td, f, i) + fcntl(f->fd, F_SETFL, bd->fd_flags[i]); + } + + if (r > 0) + assert(ev_index == r); + + return r; +} + +static int fio_binject_doio(struct thread_data *td, struct io_u *io_u) +{ + struct b_user_cmd *buc = &io_u->buc; + struct fio_file *f = io_u->file; + int ret; + + ret = write(f->fd, buc, sizeof(*buc)); + if (ret < 0) + return ret; + + return FIO_Q_QUEUED; +} + +static int fio_binject_prep(struct thread_data *td, struct io_u *io_u) +{ + struct binject_data *bd = td->io_ops->data; + struct b_user_cmd *buc = &io_u->buc; + + bd->bs = 512; + + if (io_u->xfer_buflen & (bd->bs - 1)) { + log_err("read/write not sector aligned\n"); + return EINVAL; + } + + if (io_u->ddir == DDIR_READ) { + binject_buc_init(bd, io_u); + buc->type = B_TYPE_READ; + } else if (io_u->ddir == DDIR_WRITE) { + binject_buc_init(bd, io_u); + buc->type = B_TYPE_WRITEZERO; + } else if (io_u->ddir == DDIR_TRIM) { + binject_buc_init(bd, io_u); + buc->type = B_TYPE_DISCARD; + } else { + assert(0); + } + + return 0; +} + +static int fio_binject_queue(struct thread_data *td, struct io_u *io_u) +{ + int ret; + + fio_ro_check(td, io_u); + + ret = fio_binject_doio(td, io_u); + + if (ret < 0) + io_u->error = errno; + + if (io_u->error) { + td_verror(td, io_u->error, "xfer"); + return FIO_Q_COMPLETED; + } + + return ret; +} + +static struct io_u *fio_binject_event(struct thread_data *td, int event) +{ + struct binject_data *bd = td->io_ops->data; + + return bd->events[event]; +} + +static void fio_binject_cleanup(struct thread_data *td) +{ + struct binject_data *bd = td->io_ops->data; + + if (bd) { + free(bd->events); + free(bd->cmds); + free(bd->fd_flags); + free(bd->pfds); + free(bd); + } +} + +static int fio_binject_init(struct thread_data *td) +{ + struct binject_data *bd; + + bd = malloc(sizeof(*bd)); + memset(bd, 0, sizeof(*bd)); + + bd->cmds = malloc(td->o.iodepth * sizeof(struct b_user_cmd)); + memset(bd->cmds, 0, td->o.iodepth * sizeof(struct b_user_cmd)); + + bd->events = malloc(td->o.iodepth * sizeof(struct io_u *)); + memset(bd->events, 0, td->o.iodepth * sizeof(struct io_u *)); + + bd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files); + memset(bd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files); + + bd->fd_flags = malloc(sizeof(int) * td->o.nr_files); + memset(bd->fd_flags, 0, sizeof(int) * td->o.nr_files); + + td->io_ops->data = bd; + return 0; +} + +static struct ioengine_ops ioengine = { + .name = "binject", + .version = FIO_IOOPS_VERSION, + .init = fio_binject_init, + .prep = fio_binject_prep, + .queue = fio_binject_queue, + .getevents = fio_binject_getevents, + .event = fio_binject_event, + .cleanup = fio_binject_cleanup, + .open_file = generic_open_file, + .close_file = generic_close_file, + .get_file_size = generic_get_file_size, + .flags = FIO_RAWIO, +}; + +#else /* FIO_HAVE_BINJECT */ + +/* + * When we have a proper configure system in place, we simply wont build + * and install this io engine. For now install a crippled version that + * just complains and fails to load. + */ +static int fio_binject_init(struct thread_data fio_unused *td) +{ + fprintf(stderr, "fio: ioengine binject not available\n"); + return 1; +} + +static struct ioengine_ops ioengine = { + .name = "binject", + .version = FIO_IOOPS_VERSION, + .init = fio_binject_init, +}; + +#endif + +static void fio_init fio_binject_register(void) +{ + register_ioengine(&ioengine); +} + +static void fio_exit fio_binject_unregister(void) +{ + unregister_ioengine(&ioengine); +} diff --git a/fio.c b/fio.c index 7b03250..d20fc24 100644 --- a/fio.c +++ b/fio.c @@ -811,7 +811,8 @@ static int init_io_u(struct thread_data *td) if (allocate_io_mem(td)) return 1; - if (td->o.odirect || td->o.mem_align) + if (td->o.odirect || td->o.mem_align || + (td->io_ops->flags & FIO_RAWIO)) p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align; else p = td->orig_buffer; diff --git a/ioengine.h b/ioengine.h index f6238f8..344cdbf 100644 --- a/ioengine.h +++ b/ioengine.h @@ -32,6 +32,9 @@ struct io_u { #ifdef FIO_HAVE_SOLARISAIO aio_result_t resultp; #endif +#ifdef FIO_HAVE_BINJECT + struct b_user_cmd buc; +#endif void *mmap_data; }; struct timeval start_time; diff --git a/options.c b/options.c index e255e94..bdf3582 100644 --- a/options.c +++ b/options.c @@ -974,6 +974,11 @@ static struct fio_option options[FIO_MAX_OPTS] = { .help = "GUASI IO engine", }, #endif +#ifdef FIO_HAVE_BINJECT + { .ival = "binject", + .help = "binject direct inject block engine", + }, +#endif { .ival = "external", .help = "Load external engine (append name)", }, diff --git a/os/binject.h b/os/binject.h new file mode 100644 index 0000000..1f358f3 --- /dev/null +++ b/os/binject.h @@ -0,0 +1,65 @@ +#ifndef BINJECT_H +#define BINJECT_H + +#include <linux/types.h> + +#define BINJECT_MAGIC 0x89 +#define BINJECT_VER 0x01 +#define BINJECT_MAGIC_SHIFT 8 +#define BINJECT_VER_MASK ((1 << BINJECT_MAGIC_SHIFT) - 1) + +struct b_user_cmd { + __u16 magic; /* INPUT */ + __u16 type; /* INPUT */ + __u32 error; /* OUTPUT */ + __u32 flags; /* INPUT */ + __u32 len; /* INPUT */ + __u64 offset; /* INPUT */ + __u64 buf; /* INPUT */ + __u64 usr_ptr; /* PASSED THROUGH */ + __u64 nsec; /* OUTPUT */ +}; + +struct b_ioctl_cmd { + int fd; + int minor; +}; + +enum { + B_TYPE_READ = 0, + B_TYPE_WRITE, + B_TYPE_DISCARD, + B_TYPE_READVOID, + B_TYPE_WRITEZERO, + B_TYPE_NR +}; + +enum { + __B_FLAG_SYNC = 0, + __B_FLAG_UNPLUG, + __B_FLAG_NOIDLE, + __B_FLAG_BARRIER, + __B_FLAG_META, + __B_FLAG_RAHEAD, + __B_FLAG_FAILFAST_DEV, + __B_FLAG_FAILFAST_TRANSPORT, + __B_FLAG_FAILFAST_DRIVER, + __B_FLAG_NR, + + B_FLAG_SYNC = 1 << __B_FLAG_SYNC, + B_FLAG_UNPLUG = 1 << __B_FLAG_UNPLUG, + B_FLAG_NOIDLE = 1 << __B_FLAG_NOIDLE, + B_FLAG_BARRIER = 1 << __B_FLAG_BARRIER, + B_FLAG_META = 1 << __B_FLAG_META, + B_FLAG_RAHEAD = 1 << __B_FLAG_RAHEAD, + B_FLAG_FAILFAST_DEV = 1 << __B_FLAG_FAILFAST_DEV, + B_FLAG_FAILFAST_TRANSPORT = 1 << __B_FLAG_FAILFAST_TRANSPORT, + B_FLAG_FAILFAST_DRIVER = 1 << __B_FLAG_FAILFAST_DRIVER, +}; + +static inline void binject_buc_set_magic(struct b_user_cmd *buc) +{ + buc->magic = (BINJECT_MAGIC << BINJECT_MAGIC_SHIFT) | BINJECT_VER; +} + +#endif diff --git a/os/os-linux.h b/os/os-linux.h index f7154a4..20f2a94 100644 --- a/os/os-linux.h +++ b/os/os-linux.h @@ -14,6 +14,7 @@ #include <linux/major.h> #include "indirect.h" +#include "binject.h" #define FIO_HAVE_LIBAIO #define FIO_HAVE_POSIXAIO @@ -37,6 +38,7 @@ #define FIO_HAVE_FDATASYNC #define FIO_HAVE_FS_STAT #define FIO_HAVE_TRIM +#define FIO_HAVE_BINJECT #ifdef SYNC_FILE_RANGE_WAIT_BEFORE #define FIO_HAVE_SYNC_FILE_RANGE -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html