The following changes since commit 7eb36574da703ebdfac414d7428712320f552a96: Make profile io op overrides a dedicated structure (2010-03-08 13:58:49 +0100) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (9): Add profile td init/exit with stored data Remove debug printf() Initial suppor for sync_file_range() Allow OR'able option values Make sure we handle multiple arguments to sync_file_range Add documentation for 'sync_file_range' Abstract out generic sync helper Assign io_u->error directly in do_io_u_sync() Update libaio/posixaio/splice for sync updates HOWTO | 16 +++++++++++++- engines/libaio.c | 12 +--------- engines/posixaio.c | 4 +-- engines/splice.c | 2 +- engines/sync.c | 20 ++++------------- file.h | 3 ++ fio.1 | 21 +++++++++++++++++++ fio.h | 5 ++++ init.c | 5 ++++ io_ddir.h | 4 ++- io_u.c | 26 ++++++++++++++++++++++++ ioengine.h | 4 ++- ioengines.c | 39 ++++++++++++++++++++++++++++++++++++ log.c | 5 ++- options.c | 41 ++++++++++++++++++++++++++++++++++++++ os/os-linux.h | 1 + parse.c | 56 ++++++++++++++++++++++++++++----------------------- parse.h | 2 + profile.c | 18 ++++++++++++++++ profile.h | 6 +++++ 20 files changed, 231 insertions(+), 59 deletions(-) --- Diff of recent changes: diff --git a/HOWTO b/HOWTO index 886515b..6c7f05b 100644 --- a/HOWTO +++ b/HOWTO @@ -566,11 +566,25 @@ fsync=int If writing to a file, issue a sync of the dirty data not sync the file. The exception is the sg io engine, which synchronizes the disk cache anyway. -fsyncdata=int Like fsync= but uses fdatasync() to only sync data and not +fdatasync=int Like fsync= but uses fdatasync() to only sync data and not metadata blocks. In FreeBSD there is no fdatasync(), this falls back to using fsync() +sync_file_range=str:val Use sync_file_range() for every 'val' number of + write operations. Fio will track range of writes that + have happened since the last sync_file_range() call. 'str' + can currently be one or more of: + + wait_before SYNC_FILE_RANGE_WAIT_BEFORE + write SYNC_FILE_RANGE_WRITE + wait_after SYNC_FILE_RANGE_WAIT_AFTER + + So if you do sync_file_range=wait_before,write:8, fio would + use SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE for + every 8 writes. Also see the sync_file_range(2) man page. + This option is Linux specific. + overwrite=bool If true, writes to a file will always overwrite existing data. If the file doesn't already exist, it will be created before the write phase begins. If the file exists diff --git a/engines/libaio.c b/engines/libaio.c index bd8ebb8..ff5709b 100644 --- a/engines/libaio.c +++ b/engines/libaio.c @@ -97,19 +97,11 @@ static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u) * support aio fsync yet. So return busy for the case where we * have pending io, to let fio complete those first. */ - if (io_u->ddir == DDIR_SYNC) { + if (ddir_sync(io_u->ddir)) { if (ld->iocbs_nr) return FIO_Q_BUSY; - if (fsync(io_u->file->fd) < 0) - io_u->error = errno; - - return FIO_Q_COMPLETED; - } else if (io_u->ddir == DDIR_DATASYNC) { - if (ld->iocbs_nr) - return FIO_Q_BUSY; - if (fdatasync(io_u->file->fd) < 0) - io_u->error = errno; + do_io_u_sync(td, io_u); return FIO_Q_COMPLETED; } diff --git a/engines/posixaio.c b/engines/posixaio.c index 3ffdcb6..70282f2 100644 --- a/engines/posixaio.c +++ b/engines/posixaio.c @@ -175,9 +175,7 @@ static int fio_posixaio_queue(struct thread_data fio_unused *td, if (pd->queued) return FIO_Q_BUSY; - if (fsync(io_u->file->fd) < 0) - io_u->error = errno; - + do_io_u_sync(td, io_u); return FIO_Q_COMPLETED; #endif } diff --git a/engines/splice.c b/engines/splice.c index ca43e43..e9b6aad 100644 --- a/engines/splice.c +++ b/engines/splice.c @@ -224,7 +224,7 @@ static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) } else if (io_u->ddir == DDIR_WRITE) ret = fio_splice_write(td, io_u); else - ret = fsync(io_u->file->fd); + ret = do_io_u_sync(td, io_u); if (ret != (int) io_u->xfer_buflen) { if (ret >= 0) { diff --git a/engines/sync.c b/engines/sync.c index 12b85f6..5194f0f 100644 --- a/engines/sync.c +++ b/engines/sync.c @@ -76,7 +76,7 @@ static int fio_psyncio_queue(struct thread_data *td, struct io_u *io_u) else if (io_u->ddir == DDIR_WRITE) ret = pwrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); else - ret = fsync(f->fd); + ret = do_io_u_sync(td, io_u); return fio_io_end(td, io_u, ret); } @@ -93,7 +93,7 @@ static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u) else if (io_u->ddir == DDIR_WRITE) ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen); else - ret = fsync(f->fd); + ret = do_io_u_sync(td, io_u); return fio_io_end(td, io_u, ret); } @@ -163,22 +163,12 @@ static int fio_vsyncio_queue(struct thread_data *td, struct io_u *io_u) */ if (sd->queued) return FIO_Q_BUSY; - if (io_u->ddir == DDIR_SYNC) { - int ret = fsync(io_u->file->fd); + if (ddir_sync(io_u->ddir)) { + int ret = do_io_u_sync(td, io_u); return fio_io_end(td, io_u, ret); - } else if (io_u->ddir == DDIR_DATASYNC) { - int ret; -#ifdef FIO_HAVE_FDATASYNC - ret = fdatasync(io_u->file->fd); -#else - ret = io_u->xfer_buflen; - io_u->error = EINVAL; -#endif - return fio_io_end(td, io_u, ret); - } - + sd->queued = 0; sd->queued_bytes = 0; fio_vsyncio_set_iov(sd, io_u, 0); diff --git a/file.h b/file.h index 2abe3ba..30293fc 100644 --- a/file.h +++ b/file.h @@ -74,6 +74,9 @@ struct fio_file { unsigned long long last_pos; + unsigned long long first_write; + unsigned long long last_write; + /* * For use by the io engine */ diff --git a/fio.1 b/fio.1 index 60f787a..5d0988b 100644 --- a/fio.1 +++ b/fio.1 @@ -380,6 +380,27 @@ How many I/Os to perform before issuing an \fBfsync\fR\|(2) of dirty data. If Like \fBfsync\fR, but uses \fBfdatasync\fR\|(2) instead to only sync the data parts of the file. Default: 0. .TP +.BI sync_file_range \fR=\fPstr:int +Use sync_file_range() for every \fRval\fP number of write operations. Fio will +track range of writes that have happened since the last sync_file_range() call. +\fRstr\fP can currently be one or more of: +.RS +.TP +.B wait_before +SYNC_FILE_RANGE_WAIT_BEFORE +.TP +.B write +SYNC_FILE_RANGE_WRITE +.TP +.B wait_after +SYNC_FILE_RANGE_WRITE +.TP +.RE +.P +So if you do sync_file_range=wait_before,write:8, fio would use +\fBSYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE\fP for every 8 writes. +Also see the sync_file_range(2) man page. This option is Linux specific. +.TP .BI overwrite \fR=\fPbool If writing, setup the file first and do overwrites. Default: false. .TP diff --git a/fio.h b/fio.h index 09cd01c..05911c0 100644 --- a/fio.h +++ b/fio.h @@ -282,6 +282,8 @@ struct thread_options { unsigned int uid; unsigned int gid; + + unsigned int sync_file_range; }; #define FIO_VERROR_SIZE 128 @@ -417,6 +419,8 @@ struct thread_data { unsigned int file_service_left; struct fio_file *file_service_file; + unsigned int sync_file_range_nr; + /* * For generating file sizes */ @@ -432,6 +436,7 @@ struct thread_data { * Can be overloaded by profiles */ struct prof_io_ops prof_io_ops; + void *prof_data; }; /* diff --git a/init.c b/init.c index a79bd1a..5d185fe 100644 --- a/init.c +++ b/init.c @@ -178,6 +178,8 @@ static void put_job(struct thread_data *td) { if (td == &def_thread) return; + + profile_td_exit(td); if (td->error) log_info("fio: %s\n", td->verror); @@ -502,6 +504,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num) return 0; } + if (profile_td_init(td)) + return 1; + engine = get_engine_name(td->o.ioengine); td->io_ops = load_ioengine(td, engine); if (!td->io_ops) { diff --git a/io_ddir.h b/io_ddir.h index 03eefdb..87cded4 100644 --- a/io_ddir.h +++ b/io_ddir.h @@ -6,6 +6,7 @@ enum fio_ddir { DDIR_WRITE, DDIR_SYNC, DDIR_DATASYNC, + DDIR_SYNC_FILE_RANGE, DDIR_WAIT, DDIR_INVAL = -1, }; @@ -28,7 +29,8 @@ enum td_ddir { static inline int ddir_sync(enum fio_ddir ddir) { - return ddir == DDIR_SYNC || ddir == DDIR_DATASYNC; + return ddir == DDIR_SYNC || ddir == DDIR_DATASYNC || + ddir == DDIR_SYNC_FILE_RANGE; } #endif diff --git a/io_u.c b/io_u.c index 9b9570e..a4bf0c0 100644 --- a/io_u.c +++ b/io_u.c @@ -394,6 +394,14 @@ static enum fio_ddir get_rw_ddir(struct thread_data *td) td->io_issues[DDIR_WRITE] && should_fsync(td)) return DDIR_DATASYNC; + /* + * see if it's time to sync_file_range + */ + if (td->sync_file_range_nr && + !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr) && + td->io_issues[DDIR_WRITE] && should_fsync(td)) + return DDIR_SYNC_FILE_RANGE; + if (td_rw(td)) { /* * Check if it's time to seed a new data direction. @@ -996,6 +1004,7 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, * initialized, silence that warning. */ unsigned long uninitialized_var(usec); + struct fio_file *f; dprint_io_u(io_u, "io complete"); @@ -1006,6 +1015,11 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, if (ddir_sync(io_u->ddir)) { td->last_was_sync = 1; + f = io_u->file; + if (f) { + f->first_write = -1ULL; + f->last_write = -1ULL; + } return; } @@ -1021,6 +1035,18 @@ static void io_completed(struct thread_data *td, struct io_u *io_u, td->io_bytes[idx] += bytes; td->this_io_bytes[idx] += bytes; + if (idx == DDIR_WRITE) { + f = io_u->file; + if (f) { + if (f->first_write == -1ULL || + io_u->offset < f->first_write) + f->first_write = io_u->offset; + if (f->last_write == -1ULL || + ((io_u->offset + bytes) > f->last_write)) + f->last_write = io_u->offset + bytes; + } + } + if (ramp_time_over(td)) { unsigned long uninitialized_var(lusec); diff --git a/ioengine.h b/ioengine.h index eb6655d..91dd429 100644 --- a/ioengine.h +++ b/ioengine.h @@ -1,7 +1,7 @@ #ifndef FIO_IOENGINE_H #define FIO_IOENGINE_H -#define FIO_IOOPS_VERSION 10 +#define FIO_IOOPS_VERSION 11 enum { IO_U_F_FREE = 1 << 0, @@ -153,6 +153,8 @@ extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int void io_u_mark_complete(struct thread_data *, unsigned int); void io_u_mark_submit(struct thread_data *, unsigned int); +int do_io_u_sync(struct thread_data *, struct io_u *); + #ifdef FIO_INC_DEBUG static inline void dprint_io_u(struct io_u *io_u, const char *p) { diff --git a/ioengines.c b/ioengines.c index 7f0a5c4..366382e 100644 --- a/ioengines.c +++ b/ioengines.c @@ -431,3 +431,42 @@ int td_io_get_file_size(struct thread_data *td, struct fio_file *f) return td->io_ops->get_file_size(td, f); } + +static int do_sync_file_range(struct thread_data *td, struct fio_file *f) +{ + off64_t offset, nbytes; + + offset = f->first_write; + nbytes = f->last_write - f->first_write; + + if (!nbytes) + return 0; + + return sync_file_range(f->fd, offset, nbytes, td->o.sync_file_range); +} + +int do_io_u_sync(struct thread_data *td, struct io_u *io_u) +{ + int ret; + + if (io_u->ddir == DDIR_SYNC) { + ret = fsync(io_u->file->fd); + } else if (io_u->ddir == DDIR_DATASYNC) { +#ifdef FIO_HAVE_FDATASYNC + ret = fdatasync(io_u->file->fd); +#else + ret = io_u->xfer_buflen; + io_u->error = EINVAL; +#endif + } else if (io_u->ddir == DDIR_SYNC_FILE_RANGE) + ret = do_sync_file_range(td, io_u->file); + else { + ret = io_u->xfer_buflen; + io_u->error = EINVAL; + } + + if (ret < 0) + io_u->error = errno; + + return ret; +} diff --git a/log.c b/log.c index ba52f07..99f20b5 100644 --- a/log.c +++ b/log.c @@ -20,9 +20,10 @@ void queue_io_piece(struct thread_data *td, struct io_piece *ipo) void log_io_u(struct thread_data *td, struct io_u *io_u) { - const char *act[] = { "read", "write", "sync", "datasync" }; + const char *act[] = { "read", "write", "sync", "datasync", + "sync_file_range" }; - assert(io_u->ddir < 3); + assert(io_u->ddir <= 4); if (!td->o.write_iolog_file) return; diff --git a/options.c b/options.c index e2daf37..dda7cba 100644 --- a/options.c +++ b/options.c @@ -429,6 +429,20 @@ static int str_fst_cb(void *data, const char *str) return 0; } +static int str_sfr_cb(void *data, const char *str) +{ + struct thread_data *td = data; + char *nr = get_opt_postfix(str); + + td->sync_file_range_nr = 1; + if (nr) { + td->sync_file_range_nr = atoi(nr); + free(nr); + } + + return 0; +} + static int check_dir(struct thread_data *td, char *fname) { char file[PATH_MAX], *dir; @@ -1110,6 +1124,33 @@ static struct fio_option options[FIO_MAX_OPTS] = { .help = "Issue fdatasync for writes every given number of blocks", .def = "0", }, +#ifdef FIO_HAVE_SYNC_FILE_RANGE + { + .name = "sync_file_range", + .posval = { + { .ival = "wait_before", + .oval = SYNC_FILE_RANGE_WAIT_BEFORE, + .help = "SYNC_FILE_RANGE_WAIT_BEFORE", + .or = 1, + }, + { .ival = "write", + .oval = SYNC_FILE_RANGE_WRITE, + .help = "SYNC_FILE_RANGE_WRITE", + .or = 1, + }, + { + .ival = "wait_after", + .oval = SYNC_FILE_RANGE_WAIT_AFTER, + .help = "SYNC_FILE_RANGE_WAIT_AFTER", + .or = 1, + }, + }, + .type = FIO_OPT_STR_MULTI, + .cb = str_sfr_cb, + .off1 = td_var_offset(sync_file_range), + .help = "Use sync_file_range()", + }, +#endif { .name = "direct", .type = FIO_OPT_BOOL, diff --git a/os/os-linux.h b/os/os-linux.h index 01140e0..8c61cc0 100644 --- a/os/os-linux.h +++ b/os/os-linux.h @@ -34,6 +34,7 @@ #define FIO_HAVE_CL_SIZE #define FIO_HAVE_CGROUPS #define FIO_HAVE_FDATASYNC +#define FIO_HAVE_SYNC_FILE_RANGE #define OS_MAP_ANON MAP_ANONYMOUS diff --git a/parse.c b/parse.c index ff6a873..a5aa9f4 100644 --- a/parse.c +++ b/parse.c @@ -257,10 +257,13 @@ static int check_int(const char *p, int *val) return 1; } -#define val_store(ptr, val, off, data) \ +#define val_store(ptr, val, off, or, data) \ do { \ ptr = td_var((data), (off)); \ - *ptr = (val); \ + if ((or)) \ + *ptr |= (val); \ + else \ + *ptr = (val); \ } while (0) static int __handle_option(struct fio_option *o, const char *ptr, void *data, @@ -281,7 +284,8 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, } switch (o->type) { - case FIO_OPT_STR: { + case FIO_OPT_STR: + case FIO_OPT_STR_MULTI: { fio_opt_str_fn *fn = o->cb; const struct value_pair *vp; struct value_pair posval[PARSE_MAX_VP]; @@ -289,21 +293,24 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, posval_sort(o, posval); + ret = 1; for (i = 0; i < PARSE_MAX_VP; i++) { vp = &posval[i]; if (!vp->ival || vp->ival[0] == '\0') continue; - ret = 1; if (!strncmp(vp->ival, ptr, strlen(vp->ival))) { ret = 0; - if (o->roff1) - *(unsigned int *) o->roff1 = vp->oval; - else { + if (o->roff1) { + if (vp->or) + *(unsigned int *) o->roff1 |= vp->oval; + else + *(unsigned int *) o->roff1 = vp->oval; + } else { if (!o->off1) - break; - val_store(ilp, vp->oval, o->off1, data); + continue; + val_store(ilp, vp->oval, o->off1, vp->or, data); } - break; + continue; } } @@ -346,26 +353,26 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, if (o->roff1) *(unsigned long long *) o->roff1 = ull; else - val_store(ilp, ull, o->off1, data); + val_store(ilp, ull, o->off1, 0, data); } if (!more) { if (o->roff2) *(unsigned long long *) o->roff2 = ull; else if (o->off2) - val_store(ilp, ull, o->off2, data); + val_store(ilp, ull, o->off2, 0, data); } } else { if (first) { if (o->roff1) *(unsigned long long *) o->roff1 = ull; else - val_store(ullp, ull, o->off1, data); + val_store(ullp, ull, o->off1, 0, data); } if (!more) { if (o->roff2) *(unsigned long long *) o->roff2 = ull; else if (o->off2) - val_store(ullp, ull, o->off2, data); + val_store(ullp, ull, o->off2, 0, data); } } } @@ -423,18 +430,18 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, if (o->roff1) *(unsigned long *) o->roff1 = ul1; else - val_store(ilp, ul1, o->off1, data); + val_store(ilp, ul1, o->off1, 0, data); if (o->roff2) *(unsigned long *) o->roff2 = ul2; else - val_store(ilp, ul2, o->off2, data); + val_store(ilp, ul2, o->off2, 0, data); } if (o->roff3 && o->roff4) { *(unsigned long *) o->roff3 = ul1; *(unsigned long *) o->roff4 = ul2; } else if (o->off3 && o->off4) { - val_store(ilp, ul1, o->off3, data); - val_store(ilp, ul2, o->off4, data); + val_store(ilp, ul1, o->off3, 0, data); + val_store(ilp, ul2, o->off4, 0, data); } } @@ -468,13 +475,13 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, if (o->roff1) *(unsigned int *)o->roff1 = il; else - val_store(ilp, il, o->off1, data); + val_store(ilp, il, o->off1, 0, data); } if (!more) { if (o->roff2) *(unsigned int *) o->roff2 = il; else if (o->off2) - val_store(ilp, il, o->off2, data); + val_store(ilp, il, o->off2, 0, data); } } break; @@ -489,13 +496,13 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, if (o->roff1) *(unsigned int *) o->roff1 = 1; else - val_store(ilp, 1, o->off1, data); + val_store(ilp, 1, o->off1, 0, data); } if (!more) { if (o->roff2) *(unsigned int *) o->roff2 = 1; else if (o->off2) - val_store(ilp, 1, o->off2, data); + val_store(ilp, 1, o->off2, 0, data); } } break; @@ -825,8 +832,6 @@ int show_cmd_help(struct fio_option *options, const char *name) int found = 0; int show_all = 0; - printf("exec_profile=%s\n", exec_profile); - if (!name || !strcmp(name, "all")) show_all = 1; @@ -916,7 +921,8 @@ void option_init(struct fio_option *o) fprintf(stderr, "Option %s: neither cb nor offset given\n", o->name); } - if (o->type == FIO_OPT_STR || o->type == FIO_OPT_STR_STORE) + if (o->type == FIO_OPT_STR || o->type == FIO_OPT_STR_STORE || + o->type == FIO_OPT_STR_MULTI) return; if (o->cb && ((o->off1 || o->off2 || o->off3 || o->off4) || (o->roff1 || o->roff2 || o->roff3 || o->roff4))) { diff --git a/parse.h b/parse.h index 9cda559..41e3633 100644 --- a/parse.h +++ b/parse.h @@ -9,6 +9,7 @@ enum fio_opt_type { FIO_OPT_INVALID = 0, FIO_OPT_STR, + FIO_OPT_STR_MULTI, FIO_OPT_STR_VAL, FIO_OPT_STR_VAL_TIME, FIO_OPT_STR_STORE, @@ -26,6 +27,7 @@ struct value_pair { const char *ival; /* string option */ unsigned int oval; /* output value */ const char *help; /* help text for sub option */ + int or; /* OR value */ }; #define OPT_LEN_MAX 4096 diff --git a/profile.c b/profile.c index 3ed9127..855dde3 100644 --- a/profile.c +++ b/profile.c @@ -96,3 +96,21 @@ void profile_add_hooks(struct thread_data *td) if (ops->io_ops) td->prof_io_ops = *ops->io_ops; } + +int profile_td_init(struct thread_data *td) +{ + struct prof_io_ops *ops = &td->prof_io_ops; + + if (ops->td_init) + return ops->td_init(td); + + return 0; +} + +void profile_td_exit(struct thread_data *td) +{ + struct prof_io_ops *ops = &td->prof_io_ops; + + if (ops->td_exit) + ops->td_exit(td); +} diff --git a/profile.h b/profile.h index a54f072..673c5c4 100644 --- a/profile.h +++ b/profile.h @@ -7,6 +7,9 @@ * Functions for overriding internal fio io_u functions */ struct prof_io_ops { + int (*td_init)(struct thread_data *); + void (*td_exit)(struct thread_data *); + int (*fill_io_u_off)(struct thread_data *, struct io_u *); int (*fill_io_u_size)(struct thread_data *, struct io_u *); struct fio_file *(*get_next_file)(struct thread_data *); @@ -42,4 +45,7 @@ int load_profile(const char *); struct profile_ops *find_profile(const char *); void profile_add_hooks(struct thread_data *); +int profile_td_init(struct thread_data *); +void profile_td_exit(struct thread_data *); + #endif -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html