The following changes since commit 481075983e3c71460b9f784ddfb14099857e444a: options: add category/group to random_generator/random_distribution (2012-12-04 09:43:11 +0100) are available in the git repository at: git://git.kernel.dk/fio.git gfio Bruce Cran (3): Free io_u related structures before killing IO engine Fix windows out-of-memory handling windowsaio: create a single completion port during init, associate files during open. Jens Axboe (15): Name the various random offsets we use lfsr: ensure that the cycle follows the randrepeat= setting lfsr: add HOWTO and man page documentation Fix man page indentation parser: always match the correct option length for posval options windowsaio: initialize and map windowsaio IO structure to io_u Document the ioengine=net pingpong= option Add check for invariant TSC on x86 and use TSC is default clock if reliable Increase CPU clock calibration accuracy gettime: calibration rounding error gettime: fix CPU calibration reported mean Define TSC arch_init() for PPC and IA64 cpu clock: round up when dividing by samples Merge branch 'master' of ssh://brick.kernel.dk/data/git/fio Merge branch 'master' into gfio HOWTO | 26 +++++++++ arch/arch-ia64.h | 7 +++ arch/arch-ppc.h | 7 +++ arch/arch-x86-common.h | 43 +++++++++++++++ arch/arch-x86.h | 2 + arch/arch-x86_64.h | 2 + backend.c | 16 +++++- crc/crc32c-intel.c | 16 ------ engines/windowsaio.c | 142 +++++++++++++++++++++++++----------------------- filesetup.c | 6 ++- fio.1 | 31 ++++++++++- fio.c | 2 + fio.h | 15 +++++- gettime.c | 35 +++++++++---- init.c | 34 ++++++------ ioengine.h | 2 + lib/lfsr.c | 4 +- lib/lfsr.h | 2 +- options.c | 2 +- os/os-windows.h | 2 + os/windows/posix.c | 5 ++ parse.c | 9 +++- 22 files changed, 290 insertions(+), 120 deletions(-) create mode 100644 arch/arch-x86-common.h --- Diff of recent changes: diff --git a/HOWTO b/HOWTO index a8a3d95..529c967 100644 --- a/HOWTO +++ b/HOWTO @@ -753,6 +753,23 @@ softrandommap=bool See norandommap. If fio runs with the random block map will not be as complete as with random maps, this option is disabled by default. +random_generator=str Fio supports the following engines for generating + IO offsets for random IO: + + tausworthe Strong 2^88 cycle random number generator + lfsr Linear feedback shift register generator + + Tausworthe is a strong random number generator, but it + requires tracking on the side if we want to ensure that + blocks are only read or written once. LFSR guarantees + that we never generate the same offset twice, and it's + also less computationally expensive. It's not a true + random generator, however, though for IO purposes it's + typically good enough. LFSR only works with single + block sizes, not with workloads that use multiple block + sizes. If used with such a workload, fio may read or write + some blocks multiple times. + nice=int Run the job with the given nice value. See man nice(2). prio=int Set the io priority value of this job. Linux limits us to @@ -1381,6 +1398,15 @@ that defines them is selected. [net] listen For TCP network connections, tell fio to listen for incoming connections rather than initiating an outgoing connection. The hostname must be omitted if this option is used. +[net] pingpong Normal a network writer will just continue writing data, and + a network reader will just consume packages. If pingpong=1 + is set, a writer will send its normal payload to the reader, + then wait for the reader to send the same payload back. This + allows fio to measure network latencies. The submission + and completion latencies then measure local time spent + sending or receiving, and the completion latency measures + how long it took for the other end to receive and send back. + [e4defrag] donorname=str File will be used as a block donor(swap extents between files) [e4defrag] inplace=int diff --git a/arch/arch-ia64.h b/arch/arch-ia64.h index f4464c4..8ccbd86 100644 --- a/arch/arch-ia64.h +++ b/arch/arch-ia64.h @@ -42,6 +42,13 @@ static inline unsigned long get_cpu_clock(void) return ret; } +#define ARCH_HAVE_INIT +extern int tsc_reliable; +static inline int arch_init(char *envp[]) +{ + tsc_reliable = 1; +} + #define ARCH_HAVE_FFZ #define ARCH_HAVE_CPU_CLOCK diff --git a/arch/arch-ppc.h b/arch/arch-ppc.h index b790a55..0f20375 100644 --- a/arch/arch-ppc.h +++ b/arch/arch-ppc.h @@ -58,6 +58,13 @@ static inline unsigned long long get_cpu_clock(void) return ret; } +#define ARCH_HAVE_INIT +extern int tsc_reliable; +static inline int arch_init(char *envp[]) +{ + tsc_reliable = 1; +} + #define ARCH_HAVE_FFZ #define ARCH_HAVE_CPU_CLOCK diff --git a/arch/arch-x86-common.h b/arch/arch-x86-common.h new file mode 100644 index 0000000..1e62354 --- /dev/null +++ b/arch/arch-x86-common.h @@ -0,0 +1,43 @@ +#ifndef FIO_ARCH_X86_COMMON +#define FIO_ARCH_X86_COMMON + +static inline void do_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + unsigned int id = *eax; + + asm("movl %4, %%eax;" + "cpuid;" + "movl %%eax, %0;" + "movl %%ebx, %1;" + "movl %%ecx, %2;" + "movl %%edx, %3;" + : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) + : "r" (id) + : "eax", "ebx", "ecx", "edx"); +} + +#define ARCH_HAVE_INIT +extern int tsc_reliable; +static inline int arch_init(char *envp[]) +{ + unsigned int eax, ebx, ecx, edx; + + /* + * Check for TSC + */ + eax = 1; + do_cpuid(&eax, &ebx, &ecx, &edx); + if (!(edx & (1U << 4))) + return 0; + + /* + * Check for constant rate and synced (across cores) TSC + */ + eax = 0x80000007; + do_cpuid(&eax, &ebx, &ecx, &edx); + tsc_reliable = edx & (1U << 8); + return 0; +} + +#endif diff --git a/arch/arch-x86.h b/arch/arch-x86.h index 1ededd8..4803006 100644 --- a/arch/arch-x86.h +++ b/arch/arch-x86.h @@ -1,6 +1,8 @@ #ifndef ARCH_X86_H #define ARCH_X86_H +#include "arch-x86-common.h" + #define FIO_ARCH (arch_i386) #ifndef __NR_ioprio_set diff --git a/arch/arch-x86_64.h b/arch/arch-x86_64.h index 29e681f..d8b0933 100644 --- a/arch/arch-x86_64.h +++ b/arch/arch-x86_64.h @@ -1,6 +1,8 @@ #ifndef ARCH_X86_64_h #define ARCH_X86_64_h +#include "arch-x86-common.h" + #define FIO_ARCH (arch_x86_64) #ifndef __NR_ioprio_set diff --git a/backend.c b/backend.c index d56c7d0..8bcb7a2 100644 --- a/backend.c +++ b/backend.c @@ -803,6 +803,10 @@ static void cleanup_io_u(struct thread_data *td) io_u = flist_entry(entry, struct io_u, list); flist_del(&io_u->list); + + if (td->io_ops->io_u_free) + td->io_ops->io_u_free(td, io_u); + fio_memfree(io_u, sizeof(*io_u)); } @@ -885,6 +889,16 @@ static int init_io_u(struct thread_data *td) io_u->index = i; io_u->flags = IO_U_F_FREE; flist_add(&io_u->list, &td->io_u_freelist); + + if (td->io_ops->io_u_init) { + int ret = td->io_ops->io_u_init(td, io_u); + + if (ret) { + log_err("fio: failed to init engine data: %d\n", ret); + return 1; + } + } + p += max_bs; } @@ -1288,8 +1302,8 @@ err: verify_async_exit(td); close_and_free_files(td); - close_ioengine(td); cleanup_io_u(td); + close_ioengine(td); cgroup_shutdown(td, &cgroup_mnt); if (o->cpumask_set) { diff --git a/crc/crc32c-intel.c b/crc/crc32c-intel.c index 8a6e6dc..8e1cd58 100644 --- a/crc/crc32c-intel.c +++ b/crc/crc32c-intel.c @@ -78,22 +78,6 @@ uint32_t crc32c_intel(unsigned char const *data, unsigned long length) return crc; } -static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, - unsigned int *edx) -{ - int id = *eax; - - asm("movl %4, %%eax;" - "cpuid;" - "movl %%eax, %0;" - "movl %%ebx, %1;" - "movl %%ecx, %2;" - "movl %%edx, %3;" - : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) - : "r" (id) - : "eax", "ebx", "ecx", "edx"); -} - void crc32c_intel_probe(void) { if (!crc32c_probed) { diff --git a/engines/windowsaio.c b/engines/windowsaio.c index db75730..edc390c 100644 --- a/engines/windowsaio.c +++ b/engines/windowsaio.c @@ -20,12 +20,11 @@ struct fio_overlapped { OVERLAPPED o; struct io_u *io_u; BOOL io_complete; - BOOL io_free; }; struct windowsaio_data { - struct fio_overlapped *ovls; struct io_u **aio_events; + HANDLE iocp; HANDLE iothread; HANDLE iocomplete_event; CANCELIOEX pCancelIoEx; @@ -50,9 +49,9 @@ static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter); static int fio_windowsaio_init(struct thread_data *td); static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f); static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f); -static int win_to_poxix_error(DWORD winerr); +static int win_to_posix_error(DWORD winerr); -static int win_to_poxix_error(DWORD winerr) +static int win_to_posix_error(DWORD winerr) { switch (winerr) { @@ -139,7 +138,6 @@ static int fio_windowsaio_init(struct thread_data *td) struct windowsaio_data *wd; HANDLE hKernel32Dll; int rc = 0; - int i; wd = malloc(sizeof(struct windowsaio_data)); if (wd != NULL) @@ -154,25 +152,6 @@ static int fio_windowsaio_init(struct thread_data *td) } if (!rc) { - wd->ovls = malloc(td->o.iodepth * sizeof(struct fio_overlapped)); - if (wd->ovls == NULL) - rc = 1; - } - - if (!rc) { - for (i = 0; i < td->o.iodepth; i++) { - wd->ovls[i].io_free = TRUE; - wd->ovls[i].io_complete = FALSE; - - wd->ovls[i].o.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - if (wd->ovls[i].o.hEvent == NULL) { - rc = 1; - break; - } - } - } - - if (!rc) { /* Create an auto-reset event */ wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL); if (wd->iocomplete_event == NULL) @@ -181,8 +160,6 @@ static int fio_windowsaio_init(struct thread_data *td) if (rc) { if (wd != NULL) { - if (wd->ovls != NULL) - free(wd->ovls); if (wd->aio_events != NULL) free(wd->aio_events); @@ -194,12 +171,46 @@ static int fio_windowsaio_init(struct thread_data *td) wd->pCancelIoEx = (CANCELIOEX)GetProcAddress(hKernel32Dll, "CancelIoEx"); td->io_ops->data = wd; + + if (!rc) { + struct thread_ctx *ctx; + struct windowsaio_data *wd; + HANDLE hFile; + + hFile = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + if (hFile == INVALID_HANDLE_VALUE) + rc = 1; + + wd = td->io_ops->data; + wd->iothread_running = TRUE; + wd->iocp = hFile; + + if (!rc) + ctx = malloc(sizeof(struct thread_ctx)); + + if (!rc && ctx == NULL) + { + log_err("fio: out of memory in windowsaio\n"); + CloseHandle(hFile); + rc = 1; + } + + if (!rc) + { + ctx->iocp = hFile; + ctx->wd = wd; + wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL); + } + + if (rc || wd->iothread == NULL) + rc = 1; + } + return rc; } static void fio_windowsaio_cleanup(struct thread_data *td) { - int i; struct windowsaio_data *wd; wd = td->io_ops->data; @@ -211,12 +222,7 @@ static void fio_windowsaio_cleanup(struct thread_data *td) CloseHandle(wd->iothread); CloseHandle(wd->iocomplete_event); - for (i = 0; i < td->o.iodepth; i++) { - CloseHandle(wd->ovls[i].o.hEvent); - } - free(wd->aio_events); - free(wd->ovls); free(wd); td->io_ops->data = NULL; @@ -227,7 +233,6 @@ static void fio_windowsaio_cleanup(struct thread_data *td) static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f) { int rc = 0; - HANDLE hFile; DWORD flags = FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED; DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE; DWORD openmode = OPEN_ALWAYS; @@ -279,23 +284,11 @@ static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f) /* Only set up the completion port and thread if we're not just * querying the device size */ if (!rc && td->io_ops->data != NULL) { - struct thread_ctx *ctx; struct windowsaio_data *wd; - hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0); - wd = td->io_ops->data; - wd->iothread_running = TRUE; - if (!rc) { - ctx = malloc(sizeof(struct thread_ctx)); - ctx->iocp = hFile; - ctx->wd = wd; - - wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL); - } - - if (rc || wd->iothread == NULL) + if (CreateIoCompletionPort(f->hFile, wd->iocp, 0, 0) == NULL) rc = 1; } @@ -364,7 +357,6 @@ static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, if (fov->io_complete) { fov->io_complete = FALSE; - fov->io_free = TRUE; ResetEvent(fov->o.hEvent); wd->aio_events[dequeued] = io_u; dequeued++; @@ -389,32 +381,18 @@ static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, static int fio_windowsaio_queue(struct thread_data *td, struct io_u *io_u) { - LPOVERLAPPED lpOvl = NULL; - struct windowsaio_data *wd; + struct fio_overlapped *o = io_u->engine_data; + LPOVERLAPPED lpOvl = &o->o; DWORD iobytes; BOOL success = FALSE; - int index; int rc = FIO_Q_COMPLETED; fio_ro_check(td, io_u); - wd = td->io_ops->data; - - for (index = 0; index < td->o.iodepth; index++) { - if (wd->ovls[index].io_free) - break; - } - - assert(index < td->o.iodepth); - - wd->ovls[index].io_free = FALSE; - wd->ovls[index].io_u = io_u; - lpOvl = &wd->ovls[index].o; lpOvl->Internal = STATUS_PENDING; lpOvl->InternalHigh = 0; lpOvl->Offset = io_u->offset & 0xFFFFFFFF; lpOvl->OffsetHigh = io_u->offset >> 32; - io_u->engine_data = &wd->ovls[index]; switch (io_u->ddir) { case DDIR_WRITE: @@ -428,7 +406,7 @@ static int fio_windowsaio_queue(struct thread_data *td, struct io_u *io_u) case DDIR_SYNC_FILE_RANGE: success = FlushFileBuffers(io_u->file->hFile); if (!success) - io_u->error = win_to_poxix_error(GetLastError()); + io_u->error = win_to_posix_error(GetLastError()); return FIO_Q_COMPLETED; break; @@ -446,7 +424,7 @@ static int fio_windowsaio_queue(struct thread_data *td, struct io_u *io_u) if (success || GetLastError() == ERROR_IO_PENDING) rc = FIO_Q_QUEUED; else { - io_u->error = win_to_poxix_error(GetLastError()); + io_u->error = win_to_posix_error(GetLastError()); io_u->resid = io_u->xfer_buflen; } @@ -479,7 +457,7 @@ static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter) io_u->error = 0; } else { io_u->resid = io_u->xfer_buflen; - io_u->error = win_to_poxix_error(GetLastError()); + io_u->error = win_to_posix_error(GetLastError()); } fov->io_complete = TRUE; @@ -510,6 +488,34 @@ static int fio_windowsaio_cancel(struct thread_data *td, return rc; } +static void fio_windowsaio_io_u_free(struct thread_data *td, struct io_u *io_u) +{ + struct fio_overlapped *o = io_u->engine_data; + + if (o) { + CloseHandle(o->o.hEvent); + io_u->engine_data = NULL; + free(o); + } +} + +static int fio_windowsaio_io_u_init(struct thread_data *td, struct io_u *io_u) +{ + struct fio_overlapped *o; + + o = malloc(sizeof(*o)); + o->io_complete = FALSE: + o->io_u = io_u; + o->o.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + if (!o->o.hEvent) { + free(o); + return 1; + } + + io_u->engine_data = o; + return 0; +} + static struct ioengine_ops ioengine = { .name = "windowsaio", .version = FIO_IOOPS_VERSION, @@ -521,7 +527,9 @@ static struct ioengine_ops ioengine = { .cleanup = fio_windowsaio_cleanup, .open_file = fio_windowsaio_open_file, .close_file = fio_windowsaio_close_file, - .get_file_size = generic_get_file_size + .get_file_size = generic_get_file_size, + .io_u_init = fio_windowsaio_io_u_init, + .io_u_free = fio_windowsaio_io_u_free, }; static void fio_init fio_posixaio_register(void) diff --git a/filesetup.c b/filesetup.c index ee58a7b..3462a03 100644 --- a/filesetup.c +++ b/filesetup.c @@ -918,7 +918,11 @@ int init_random_map(struct thread_data *td) blocks = (f->real_file_size + td->o.rw_min_bs - 1) / (unsigned long long) td->o.rw_min_bs; if (td->o.random_generator == FIO_RAND_GEN_LFSR) { - if (!lfsr_init(&f->lfsr, blocks)) + unsigned long seed; + + seed = td->rand_seeds[FIO_RAND_BLOCK_OFF]; + + if (!lfsr_init(&f->lfsr, blocks, seed)) continue; } else if (!td->o.norandommap) { f->io_axmap = axmap_new(blocks); diff --git a/fio.1 b/fio.1 index 02eafae..62c42c6 100644 --- a/fio.1 +++ b/fio.1 @@ -486,8 +486,8 @@ transfer as fio ioengine .B e4defrag IO engine that does regular EXT4_IOC_MOVE_EXT ioctls to simulate defragment activity request to DDIR_WRITE event -.TP .RE +.P .RE .TP .BI iodepth \fR=\fPint @@ -617,6 +617,26 @@ fails to allocate the map, if this option is set it will continue without a random block map. As coverage will not be as complete as with random maps, this option is disabled by default. .TP +.BI random_generator \fR=\fPstr +Fio supports the following engines for generating IO offsets for random IO: +.RS +.TP +.B tausworthe +Strong 2^88 cycle random number generator +.TP +.B lfsr +Linear feedback shift register generator +.TP +.RE +.P +Tausworthe is a strong random number generator, but it requires tracking on the +side if we want to ensure that blocks are only read or written once. LFSR +guarantees that we never generate the same offset twice, and it's also less +computationally expensive. It's not a true random generator, however, though +for IO purposes it's typically good enough. LFSR only works with single block +sizes, not with workloads that use multiple block sizes. If used with such a +workload, fio may read or write some blocks multiple times. +.TP .BI nice \fR=\fPint Run job with given nice value. See \fInice\fR\|(2). .TP @@ -1148,6 +1168,15 @@ For TCP network connections, tell fio to listen for incoming connections rather than initiating an outgoing connection. The hostname must be omitted if this option is used. .TP +.BI (net, pingpong) \fR=\fPbool +Normal a network writer will just continue writing data, and a network reader +will just consume packages. If pingpong=1 is set, a writer will send its normal +payload to the reader, then wait for the reader to send the same payload back. +This allows fio to measure network latencies. The submission and completion +latencies then measure local time spent sending or receiving, and the +completion latency measures how long it took for the other end to receive and +send back. +.TP .BI (e4defrag,donorname) \fR=\fPstr File will be used as a block donor (swap extents between files) .TP diff --git a/fio.c b/fio.c index 9a6c31a..16e18b2 100644 --- a/fio.c +++ b/fio.c @@ -45,6 +45,8 @@ int main(int argc, char *argv[], char *envp[]) if (parse_options(argc, argv)) return 1; + fio_time_init(); + if (nr_clients) { if (fio_start_all_clients()) return 1; diff --git a/fio.h b/fio.h index 43ad765..82e2b62 100644 --- a/fio.h +++ b/fio.h @@ -79,6 +79,18 @@ enum { TD_F_PROFILE_OPS = 64, }; +enum { + FIO_RAND_BS_OFF = 0, + FIO_RAND_VER_OFF, + FIO_RAND_MIX_OFF, + FIO_RAND_FILE_OFF, + FIO_RAND_BLOCK_OFF, + FIO_RAND_FILE_SIZE_OFF, + FIO_RAND_TRIM_OFF, + FIO_RAND_BUF_OFF, + FIO_RAND_NR_OFFS, +}; + /* * This describes a single thread/process executing a fio job. */ @@ -138,7 +150,7 @@ struct thread_data { char *sysfs_root; - unsigned long rand_seeds[8]; + unsigned long rand_seeds[FIO_RAND_NR_OFFS]; union { os_random_state_t bsrange_state; @@ -345,6 +357,7 @@ extern char *job_section; extern int fio_gtod_offload; extern int fio_gtod_cpu; extern enum fio_cs fio_clock_source; +extern int fio_clock_source_set; extern int warnings_fatal; extern int terse_version; extern int is_backend; diff --git a/gettime.c b/gettime.c index 35d685e..1ba18e9 100644 --- a/gettime.c +++ b/gettime.c @@ -15,11 +15,13 @@ #ifdef ARCH_HAVE_CPU_CLOCK static unsigned long cycles_per_usec; static unsigned long last_cycles; +int tsc_reliable = 0; #endif static struct timeval last_tv; static int last_tv_valid; enum fio_cs fio_clock_source = FIO_PREFERRED_CLOCK_SOURCE; +int fio_clock_source_set = 0; #ifdef FIO_DEBUG_TIME @@ -199,24 +201,26 @@ static unsigned long get_cycles_per_usec(void) gettimeofday(&e, NULL); elapsed = utime_since(&s, &e); - if (elapsed >= 10) { + if (elapsed >= 1280) { c_e = get_cpu_clock(); break; } } while (1); - return c_e - c_s; + return (c_e - c_s + 127) >> 7; } +#define NR_TIME_ITERS 50 + static void calibrate_cpu_clock(void) { double delta, mean, S; - unsigned long avg, cycles[10]; + unsigned long avg, cycles[NR_TIME_ITERS]; int i, samples; cycles[0] = get_cycles_per_usec(); S = delta = mean = 0.0; - for (i = 0; i < 10; i++) { + for (i = 0; i < NR_TIME_ITERS; i++) { cycles[i] = get_cycles_per_usec(); delta = cycles[i] - mean; if (delta) { @@ -225,10 +229,10 @@ static void calibrate_cpu_clock(void) } } - S = sqrt(S / (10 - 1.0)); + S = sqrt(S / (NR_TIME_ITERS - 1.0)); samples = avg = 0; - for (i = 0; i < 10; i++) { + for (i = 0; i < NR_TIME_ITERS; i++) { double this = cycles[i]; if ((fmax(this, mean) - fmin(this, mean)) > S) @@ -237,18 +241,18 @@ static void calibrate_cpu_clock(void) avg += this; } - S /= 10.0; + S /= (double) NR_TIME_ITERS; mean /= 10.0; - for (i = 0; i < 10; i++) + for (i = 0; i < NR_TIME_ITERS; i++) dprint(FD_TIME, "cycles[%d]=%lu\n", i, cycles[i] / 10); - avg /= (samples * 10); + avg /= samples; + avg = (avg + 9) / 10; dprint(FD_TIME, "avg: %lu\n", avg); dprint(FD_TIME, "mean=%f, S=%f\n", mean, S); cycles_per_usec = avg; - } #else static void calibrate_cpu_clock(void) @@ -260,6 +264,17 @@ void fio_clock_init(void) { last_tv_valid = 0; calibrate_cpu_clock(); + + /* + * If the arch sets tsc_reliable != 0, then it must be good enough + * to use as THE clock source. For x86 CPUs, this means the TSC + * runs at a constant rate and is synced across CPU cores. + */ + if (tsc_reliable) { + if (!fio_clock_source_set) + fio_clock_source = CS_CPUCLOCK; + } else if (fio_clock_source == CS_CPUCLOCK) + log_info("fio: clocksource=cpu may not be reliable\n"); } unsigned long long utime_since(struct timeval *s, struct timeval *e) diff --git a/init.c b/init.c index 59b4727..6878322 100644 --- a/init.c +++ b/init.c @@ -667,44 +667,44 @@ static int exists_and_not_file(const char *filename) static void td_fill_rand_seeds_os(struct thread_data *td) { - os_random_seed(td->rand_seeds[0], &td->bsrange_state); - os_random_seed(td->rand_seeds[1], &td->verify_state); - os_random_seed(td->rand_seeds[2], &td->rwmix_state); + os_random_seed(td->rand_seeds[FIO_RAND_BS_OFF], &td->bsrange_state); + os_random_seed(td->rand_seeds[FIO_RAND_VER_OFF], &td->verify_state); + os_random_seed(td->rand_seeds[FIO_RAND_MIX_OFF], &td->rwmix_state); if (td->o.file_service_type == FIO_FSERVICE_RANDOM) - os_random_seed(td->rand_seeds[3], &td->next_file_state); + os_random_seed(td->rand_seeds[FIO_RAND_FILE_OFF], &td->next_file_state); - os_random_seed(td->rand_seeds[5], &td->file_size_state); - os_random_seed(td->rand_seeds[6], &td->trim_state); + os_random_seed(td->rand_seeds[FIO_RAND_FILE_SIZE_OFF], &td->file_size_state); + os_random_seed(td->rand_seeds[FIO_RAND_TRIM_OFF], &td->trim_state); if (!td_random(td)) return; if (td->o.rand_repeatable) - td->rand_seeds[4] = FIO_RANDSEED * td->thread_number; + td->rand_seeds[FIO_RAND_BLOCK_OFF] = FIO_RANDSEED * td->thread_number; - os_random_seed(td->rand_seeds[4], &td->random_state); + os_random_seed(td->rand_seeds[FIO_RAND_BLOCK_OFF], &td->random_state); } static void td_fill_rand_seeds_internal(struct thread_data *td) { - init_rand_seed(&td->__bsrange_state, td->rand_seeds[0]); - init_rand_seed(&td->__verify_state, td->rand_seeds[1]); - init_rand_seed(&td->__rwmix_state, td->rand_seeds[2]); + init_rand_seed(&td->__bsrange_state, td->rand_seeds[FIO_RAND_BS_OFF]); + init_rand_seed(&td->__verify_state, td->rand_seeds[FIO_RAND_VER_OFF]); + init_rand_seed(&td->__rwmix_state, td->rand_seeds[FIO_RAND_MIX_OFF]); if (td->o.file_service_type == FIO_FSERVICE_RANDOM) - init_rand_seed(&td->__next_file_state, td->rand_seeds[3]); + init_rand_seed(&td->__next_file_state, td->rand_seeds[FIO_RAND_FILE_OFF]); - init_rand_seed(&td->__file_size_state, td->rand_seeds[5]); - init_rand_seed(&td->__trim_state, td->rand_seeds[6]); + init_rand_seed(&td->__file_size_state, td->rand_seeds[FIO_RAND_FILE_SIZE_OFF]); + init_rand_seed(&td->__trim_state, td->rand_seeds[FIO_RAND_TRIM_OFF]); if (!td_random(td)) return; if (td->o.rand_repeatable) - td->rand_seeds[4] = FIO_RANDSEED * td->thread_number; + td->rand_seeds[FIO_RAND_BLOCK_OFF] = FIO_RANDSEED * td->thread_number; - init_rand_seed(&td->__random_state, td->rand_seeds[4]); + init_rand_seed(&td->__random_state, td->rand_seeds[FIO_RAND_BLOCK_OFF]); } void td_fill_rand_seeds(struct thread_data *td) @@ -714,7 +714,7 @@ void td_fill_rand_seeds(struct thread_data *td) else td_fill_rand_seeds_internal(td); - init_rand_seed(&td->buf_state, td->rand_seeds[7]); + init_rand_seed(&td->buf_state, td->rand_seeds[FIO_RAND_BUF_OFF]); } diff --git a/ioengine.h b/ioengine.h index 1cd08af..df5f889 100644 --- a/ioengine.h +++ b/ioengine.h @@ -128,6 +128,8 @@ struct ioengine_ops { int (*close_file)(struct thread_data *, struct fio_file *); int (*get_file_size)(struct thread_data *, struct fio_file *); void (*terminate)(struct thread_data *); + int (*io_u_init)(struct thread_data *, struct io_u *); + void (*io_u_free)(struct thread_data *, struct io_u *); int option_struct_size; struct fio_option *options; void *data; diff --git a/lib/lfsr.c b/lib/lfsr.c index 01c97cb..8a70029 100644 --- a/lib/lfsr.c +++ b/lib/lfsr.c @@ -243,7 +243,7 @@ static struct lfsr_taps *find_lfsr(uint64_t size) return NULL; } -int lfsr_init(struct fio_lfsr *fl, uint64_t size) +int lfsr_init(struct fio_lfsr *fl, uint64_t size, unsigned long seed) { struct lfsr_taps *tap; int i; @@ -252,7 +252,7 @@ int lfsr_init(struct fio_lfsr *fl, uint64_t size) if (!tap) return 1; - fl->last_val = 1; + fl->last_val = seed; fl->max_val = size - 1; fl->num_vals = 0; fl->taps.length = tap->length; diff --git a/lib/lfsr.h b/lib/lfsr.h index 0de9ea8..09f5ac0 100644 --- a/lib/lfsr.h +++ b/lib/lfsr.h @@ -19,6 +19,6 @@ struct fio_lfsr { }; int lfsr_next(struct fio_lfsr *fl, uint64_t *off); -int lfsr_init(struct fio_lfsr *fl, uint64_t size); +int lfsr_init(struct fio_lfsr *fl, uint64_t size, unsigned long seed); #endif diff --git a/options.c b/options.c index eab22a7..917dbf0 100644 --- a/options.c +++ b/options.c @@ -359,7 +359,7 @@ static int fio_clock_source_cb(void *data, const char *str) struct thread_data *td = data; fio_clock_source = td->o.clocksource; - fio_time_init(); + fio_clock_source_set = 1; return 0; } diff --git a/os/os-windows.h b/os/os-windows.h index 7b61d16..ba93195 100644 --- a/os/os-windows.h +++ b/os/os-windows.h @@ -23,6 +23,7 @@ #define FIO_HAVE_FALLOCATE #define FIO_HAVE_GETTID #define FIO_HAVE_CLOCK_MONOTONIC +#define FIO_HAVE_FADVISE #define FIO_USE_GENERIC_RAND #define FIO_PREFERRED_ENGINE "windowsaio" @@ -83,6 +84,7 @@ typedef DWORD_PTR os_cpu_mask_t; #define SIGCONT 0 #define SIGUSR1 1 +#define SIGUSR2 2 typedef int sigset_t; typedef int siginfo_t; diff --git a/os/windows/posix.c b/os/windows/posix.c index 11500e4..f616e87 100755 --- a/os/windows/posix.c +++ b/os/windows/posix.c @@ -535,6 +535,11 @@ int getrusage(int who, struct rusage *r_usage) return 0; } +int posix_fadvise(int fd, off_t offset, off_t len, int advice) +{ + return 0; +} + int posix_madvise(void *addr, size_t len, int advice) { log_err("%s is not implemented\n", __func__); diff --git a/parse.c b/parse.c index 92adbe5..f349744 100644 --- a/parse.c +++ b/parse.c @@ -343,6 +343,11 @@ static int opt_len(const char *str) return (int)(postfix - str); } +static int str_match_len(const struct value_pair *vp, const char *str) +{ + return max(strlen(vp->ival), opt_len(str)); +} + #define val_store(ptr, val, off, or, data) \ do { \ ptr = td_var((data), (off)); \ @@ -387,7 +392,7 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, if (!vp->ival || vp->ival[0] == '\0') continue; all_skipped = 0; - if (!strncmp(vp->ival, ptr, opt_len(ptr))) { + if (!strncmp(vp->ival, ptr, str_match_len(vp, ptr))) { ret = 0; if (o->roff1) { if (vp->or) @@ -548,7 +553,7 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, if (!vp->ival || vp->ival[0] == '\0') continue; all_skipped = 0; - if (!strncmp(vp->ival, ptr, opt_len(ptr))) { + if (!strncmp(vp->ival, ptr, str_match_len(vp, ptr))) { char *rest; ret = 0; -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html