The following changes since commit d175907a7bd0b78cdd18bf47882428368a448057: Fio 1.44 (2010-10-07 10:31:16 +0200) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (4): binject: setup and teardown mappings internally binject updates Add missing file.h f->file_data change Initial support for explicit write barriers engines/binject.c | 212 ++++++++++++++++++++++++++++++++++++++++++---------- file.h | 9 +-- filesetup.c | 1 + fio.h | 1 + io_u.c | 15 +++- ioengine.h | 2 + options.c | 7 ++ os/binject.h | 2 + 8 files changed, 199 insertions(+), 50 deletions(-) --- Diff of recent changes: diff --git a/engines/binject.c b/engines/binject.c index 68b6353..44a3796 100644 --- a/engines/binject.c +++ b/engines/binject.c @@ -24,6 +24,12 @@ struct binject_data { int *fd_flags; }; +struct binject_file { + unsigned int bs; + int minor; + int fd; +}; + static void binject_buc_init(struct binject_data *bd, struct io_u *io_u) { struct b_user_cmd *buc = &io_u->buc; @@ -51,6 +57,37 @@ static int pollin_events(struct pollfd *pfds, int fds) return 0; } +static unsigned int binject_read_commands(struct thread_data *td, void *p, + int left, int *err) +{ + struct binject_file *bf; + struct fio_file *f; + int i, ret, events; + +one_more: + events = 0; + for_each_file(td, f, i) { + bf = f->file_data; + ret = read(bf->fd, p, left * sizeof(struct b_user_cmd)); + if (ret < 0) { + if (errno == EAGAIN) + continue; + *err = -errno; + td_verror(td, errno, "read"); + break; + } else if (ret) { + p += ret; + events += ret / sizeof(struct b_user_cmd); + } + } + + if (*err || events) + return events; + + usleep(1000); + goto one_more; +} + static int fio_binject_getevents(struct thread_data *td, unsigned int min, unsigned int max, struct timespec fio_unused *t) { @@ -59,29 +96,27 @@ static int fio_binject_getevents(struct thread_data *td, unsigned int min, void *buf = bd->cmds; unsigned int i, events; struct fio_file *f; + struct binject_file *bf; /* * Fill in the file descriptors */ for_each_file(td, f, i) { + bf = f->file_data; + /* * don't block for min events == 0 */ if (!min) { - bd->fd_flags[i] = fcntl(f->fd, F_GETFL); - fcntl(f->fd, F_SETFL, bd->fd_flags[i] | O_NONBLOCK); + bd->fd_flags[i] = fcntl(bf->fd, F_GETFL); + fcntl(bf->fd, F_SETFL, bd->fd_flags[i] | O_NONBLOCK); } - bd->pfds[i].fd = f->fd; + bd->pfds[i].fd = bf->fd; bd->pfds[i].events = POLLIN; } while (left) { - void *p; - - do { - if (!min) - break; - + while (!min) { ret = poll(bd->pfds, td->o.nr_files, -1); if (ret < 0) { if (!r) @@ -93,34 +128,15 @@ static int fio_binject_getevents(struct thread_data *td, unsigned int min, if (pollin_events(bd->pfds, td->o.nr_files)) break; - } while (1); + } if (r < 0) break; -re_read: - p = buf; - events = 0; - for_each_file(td, f, i) { - ret = read(f->fd, p, left * sizeof(struct b_user_cmd)); - if (ret < 0) { - if (errno == EAGAIN) - continue; - r = -errno; - td_verror(td, errno, "read"); - break; - } else if (ret) { - p += ret; - events += ret / sizeof(struct b_user_cmd); - } - } + events = binject_read_commands(td, buf, left, &r); if (r < 0) break; - if (!events) { - usleep(1000); - goto re_read; - } left -= events; r += events; @@ -134,8 +150,10 @@ re_read: } if (!min) { - for_each_file(td, f, i) - fcntl(f->fd, F_SETFL, bd->fd_flags[i]); + for_each_file(td, f, i) { + bf = f->file_data; + fcntl(bf->fd, F_SETFL, bd->fd_flags[i]); + } } if (r > 0) @@ -147,10 +165,10 @@ re_read: static int fio_binject_doio(struct thread_data *td, struct io_u *io_u) { struct b_user_cmd *buc = &io_u->buc; - struct fio_file *f = io_u->file; + struct binject_file *bf = io_u->file->file_data; int ret; - ret = write(f->fd, buc, sizeof(*buc)); + ret = write(bf->fd, buc, sizeof(*buc)); if (ret < 0) return ret; @@ -160,10 +178,10 @@ static int fio_binject_doio(struct thread_data *td, struct io_u *io_u) static int fio_binject_prep(struct thread_data *td, struct io_u *io_u) { struct binject_data *bd = td->io_ops->data; - unsigned int bs = io_u->file->file_data; struct b_user_cmd *buc = &io_u->buc; + struct binject_file *bf = io_u->file->file_data; - if (io_u->xfer_buflen & (bs - 1)) { + if (io_u->xfer_buflen & (bf->bs - 1)) { log_err("read/write not sector aligned\n"); return EINVAL; } @@ -173,7 +191,10 @@ static int fio_binject_prep(struct thread_data *td, struct io_u *io_u) buc->type = B_TYPE_READ; } else if (io_u->ddir == DDIR_WRITE) { binject_buc_init(bd, io_u); - buc->type = B_TYPE_WRITE; + if (io_u->flags & IO_U_F_BARRIER) + buc->type = B_TYPE_WRITEBARRIER; + else + buc->type = B_TYPE_WRITE; } else if (io_u->ddir == DDIR_TRIM) { binject_buc_init(bd, io_u); buc->type = B_TYPE_DISCARD; @@ -210,8 +231,107 @@ static struct io_u *fio_binject_event(struct thread_data *td, int event) return bd->events[event]; } +static void binject_unmap_dev(struct thread_data *td, struct binject_file *bf) +{ + struct b_ioctl_cmd bic; + int fdb; + + if (bf->fd >= 0) { + close(bf->fd); + bf->fd = -1; + } + + fdb = open("/dev/binject-ctl", O_RDWR); + if (fdb < 0) { + td_verror(td, errno, "open binject-ctl"); + return; + } + + bic.minor = bf->minor; + + if (ioctl(fdb, 1, &bic) < 0) { + td_verror(td, errno, "binject dev unmap"); + close(fdb); + return; + } + + close(fdb); +} + +static int binject_map_dev(struct thread_data *td, struct binject_file *bf, + int fd) +{ + struct b_ioctl_cmd bic; + char name[80]; + struct stat sb; + int fdb, dev_there, loops; + + fdb = open("/dev/binject-ctl", O_RDWR); + if (fdb < 0) { + td_verror(td, errno, "binject ctl open"); + return 1; + } + + bic.fd = fd; + + if (ioctl(fdb, 0, &bic) < 0) { + td_verror(td, errno, "binject dev map"); + close(fdb); + return 1; + } + + bf->minor = bic.minor; + + sprintf(name, "/dev/binject%u", bf->minor); + + /* + * Wait for udev to create the node... + */ + dev_there = loops = 0; + do { + if (!stat(name, &sb)) { + dev_there = 1; + break; + } + + usleep(10000); + } while (++loops < 100); + + close(fdb); + + if (!dev_there) { + log_err("fio: timed out waiting for binject dev\n"); + goto err_unmap; + } + + bf->fd = open(name, O_RDWR); + if (bf->fd < 0) { + td_verror(td, errno, "binject dev open"); +err_unmap: + binject_unmap_dev(td, bf); + return 1; + } + + return 0; +} + +static int fio_binject_close_file(struct thread_data *td, struct fio_file *f) +{ + struct binject_file *bf = f->file_data; + + if (bf) { + binject_unmap_dev(td, bf); + free(bf); + f->file_data = NULL; + return generic_close_file(td, f); + } + + return 0; +} + static int fio_binject_open_file(struct thread_data *td, struct fio_file *f) { + struct binject_file *bf; unsigned int bs; int ret; @@ -221,14 +341,24 @@ static int fio_binject_open_file(struct thread_data *td, struct fio_file *f) if (f->filetype != FIO_TYPE_BD) { log_err("fio: binject only works with block devices\n"); - return 1; + goto err_close; } if (ioctl(f->fd, BLKSSZGET, &bs) < 0) { td_verror(td, errno, "BLKSSZGET"); + goto err_close; + } + + bf = malloc(sizeof(*bf)); + bf->bs = bs; + bf->minor = bf->fd = -1; + f->file_data = bf; + + if (binject_map_dev(td, bf, f->fd)) { +err_close: + ret = generic_close_file(td, f); return 1; } - f->file_data = bs; return 0; } @@ -278,9 +408,9 @@ static struct ioengine_ops ioengine = { .event = fio_binject_event, .cleanup = fio_binject_cleanup, .open_file = fio_binject_open_file, - .close_file = generic_close_file, + .close_file = fio_binject_close_file, .get_file_size = generic_get_file_size, - .flags = FIO_RAWIO, + .flags = FIO_RAWIO | FIO_BARRIER, }; #else /* FIO_HAVE_BINJECT */ diff --git a/file.h b/file.h index 6349465..fa8c1d2 100644 --- a/file.h +++ b/file.h @@ -47,13 +47,8 @@ struct fio_file { struct flist_head hash_list; enum fio_filetype filetype; - /* - * A file may not be a file descriptor, let the io engine decide - */ - union { - unsigned long file_data; - int fd; - }; + void *file_data; + int fd; /* * filename and possible memory mapping diff --git a/filesetup.c b/filesetup.c index 5a8105a..ae893c7 100644 --- a/filesetup.c +++ b/filesetup.c @@ -244,6 +244,7 @@ static int bdev_size(struct thread_data *td, struct fio_file *f) r = blockdev_size(f->fd, &bytes); if (r) { td_verror(td, r, "blockdev_size"); + printf("fd is %d\n", f->fd); goto err; } diff --git a/fio.h b/fio.h index c2a0d4d..4ed8cb1 100644 --- a/fio.h +++ b/fio.h @@ -217,6 +217,7 @@ struct thread_options { unsigned int thinktime_blocks; unsigned int fsync_blocks; unsigned int fdatasync_blocks; + unsigned int barrier_blocks; unsigned long start_delay; unsigned long long timeout; unsigned long long ramp_time; diff --git a/io_u.c b/io_u.c index baa961b..6d539a0 100644 --- a/io_u.c +++ b/io_u.c @@ -501,6 +501,17 @@ static enum fio_ddir get_rw_ddir(struct thread_data *td) return td->rwmix_ddir; } +static void set_rw_ddir(struct thread_data *td, struct io_u *io_u) +{ + io_u->ddir = get_rw_ddir(td); + + if (io_u->ddir == DDIR_WRITE && (td->io_ops->flags & FIO_BARRIER) && + td->o.barrier_blocks && + !(td->io_issues[DDIR_WRITE] % td->o.barrier_blocks) && + td->io_issues[DDIR_WRITE]) + io_u->flags |= IO_U_F_BARRIER; +} + void put_file_log(struct thread_data *td, struct fio_file *f) { int ret = put_file(td, f); @@ -560,7 +571,7 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u) if (td->io_ops->flags & FIO_NOIO) goto out; - io_u->ddir = get_rw_ddir(td); + set_rw_ddir(td, io_u); /* * fsync() or fdatasync() or trim etc, we are done @@ -963,7 +974,7 @@ again: if (io_u) { assert(io_u->flags & IO_U_F_FREE); io_u->flags &= ~(IO_U_F_FREE | IO_U_F_FREE_DEF); - io_u->flags &= ~IO_U_F_TRIMMED; + io_u->flags &= ~(IO_U_F_TRIMMED | IO_U_F_BARRIER); io_u->error = 0; flist_del(&io_u->list); diff --git a/ioengine.h b/ioengine.h index 344cdbf..7a3c08f 100644 --- a/ioengine.h +++ b/ioengine.h @@ -10,6 +10,7 @@ enum { IO_U_F_IN_CUR_DEPTH = 1 << 3, IO_U_F_BUSY_OK = 1 << 4, IO_U_F_TRIMMED = 1 << 5, + IO_U_F_BARRIER = 1 << 6, }; /* @@ -126,6 +127,7 @@ enum fio_ioengine_flags { FIO_NOIO = 1 << 6, /* thread does only pseudo IO */ FIO_SIGQUIT = 1 << 7, /* needs SIGQUIT to exit */ FIO_PIPEIO = 1 << 8, /* input/output no seekable */ + FIO_BARRIER = 1 << 9, /* engine supports barriers */ }; /* diff --git a/options.c b/options.c index bdf3582..d111018 100644 --- a/options.c +++ b/options.c @@ -1185,6 +1185,13 @@ static struct fio_option options[FIO_MAX_OPTS] = { .help = "Issue fdatasync for writes every given number of blocks", .def = "0", }, + { + .name = "write_barrier", + .type = FIO_OPT_INT, + .off1 = td_var_offset(barrier_blocks), + .help = "Make every Nth write a barrier write", + .def = "0", + }, #ifdef FIO_HAVE_SYNC_FILE_RANGE { .name = "sync_file_range", diff --git a/os/binject.h b/os/binject.h index 1f358f3..24eee0d 100644 --- a/os/binject.h +++ b/os/binject.h @@ -31,6 +31,8 @@ enum { B_TYPE_DISCARD, B_TYPE_READVOID, B_TYPE_WRITEZERO, + B_TYPE_READBARRIER, + B_TYPE_WRITEBARRIER, B_TYPE_NR }; -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html