The following changes since commit 01423eae85a071ff6acee870edd696c7929723ba: clock: ensure that we re-init if the clocksource changes from the default (2012-12-14 20:37:06 +0100) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (4): cpu clock: add independent test for monotonic/sane TSC gettime: locking fix and debug check for identical sequence gettime: include per-cpu clock calibration in cpu clock test Add --cpuclock-test and clocksource= option help HOWTO | 16 ++++++ README | 1 + fio.1 | 21 ++++++++ gettime.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gettime.h | 1 + init.c | 12 ++++- 6 files changed, 222 insertions(+), 1 deletions(-) --- Diff of recent changes: diff --git a/HOWTO b/HOWTO index b349620..0930f33 100644 --- a/HOWTO +++ b/HOWTO @@ -1262,6 +1262,22 @@ percentile_list=float_list Overwrite the default list of percentiles the values of completion latency below which 99.5% and 99.9% of the observed latencies fell, respectively. +clocksource=str Use the given clocksource as the base of timing. The + supported options are: + + gettimeofday gettimeofday(2) + + clock_gettime clock_gettime(2) + + cpu Internal CPU clock source + + cpu is the preferred clocksource if it is reliable, as it + is very fast (and fio is heavy on time calls). Fio will + automatically use this clocksource if it's supported and + considered reliable on the system it is running on, unless + another clocksource is specifically set. For x86/x86-64 CPUs, + this means supporting TSC Invariant. + gtod_reduce=bool Enable all of the gettimeofday() reducing options (disable_clat, disable_slat, disable_bw) plus reduce precision of the timeout somewhat to really shrink diff --git a/README b/README index ceac385..7c4552d 100644 --- a/README +++ b/README @@ -129,6 +129,7 @@ $ fio --terse-version=type Terse version output format (default 3, or 2 or 4). --version Print version info and exit --help Print this page + --cpuclock-test Perform test/validation of CPU clock --cmdhelp=cmd Print command help, "all" for all of them --enghelp=engine Print ioengine help, or list available ioengines --enghelp=engine,cmd Print help for an ioengine cmd diff --git a/fio.1 b/fio.1 index b7abc4b..22c32fd 100644 --- a/fio.1 +++ b/fio.1 @@ -1027,6 +1027,27 @@ given time in milliseconds. .BI disk_util \fR=\fPbool Generate disk utilization statistics if the platform supports it. Default: true. .TP +.BI clocksource \fR=\fPstr +Use the given clocksource as the base of timing. The supported options are: +.RS +.TP +.B gettimeofday +gettimeofday(2) +.TP +.B clock_gettime +clock_gettime(2) +.TP +.B cpu +Internal CPU clock source +.TP +.RE +.P +\fBcpu\fR is the preferred clocksource if it is reliable, as it is very fast +(and fio is heavy on time calls). Fio will automatically use this clocksource +if it's supported and considered reliable on the system it is running on, +unless another clocksource is specifically set. For x86/x86-64 CPUs, this +means supporting TSC Invariant. +.TP .BI gtod_reduce \fR=\fPbool Enable all of the gettimeofday() reducing options (disable_clat, disable_slat, disable_bw) plus reduce precision of the timeout somewhat to really shrink the diff --git a/gettime.c b/gettime.c index 9f23e3f..f5be6bd 100644 --- a/gettime.c +++ b/gettime.c @@ -11,6 +11,7 @@ #include "smalloc.h" #include "hash.h" +#include "os/os.h" #ifdef ARCH_HAVE_CPU_CLOCK static unsigned long cycles_per_usec; @@ -347,3 +348,174 @@ unsigned long time_since_now(struct timeval *s) { return mtime_since_now(s) / 1000; } + +#if defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) + +#define CLOCK_ENTRIES 100000 + +struct clock_entry { + unsigned long seq; + unsigned long tsc; + unsigned long cpu; +}; + +struct clock_thread { + pthread_t thread; + int cpu; + pthread_mutex_t lock; + pthread_mutex_t started; + uint64_t *seq; + struct clock_entry *entries; +}; + +static inline uint64_t atomic64_inc_return(uint64_t *seq) +{ + return 1 + __sync_fetch_and_add(seq, 1); +} + +static void *clock_thread_fn(void *data) +{ + struct clock_thread *t = data; + struct clock_entry *c; + os_cpu_mask_t cpu_mask; + int i; + + memset(&cpu_mask, 0, sizeof(cpu_mask)); + fio_cpu_set(&cpu_mask, t->cpu); + + if (fio_setaffinity(gettid(), cpu_mask) == -1) { + log_err("clock setaffinity failed\n"); + return (void *) 1; + } + + pthread_mutex_lock(&t->lock); + pthread_mutex_unlock(&t->started); + + c = &t->entries[0]; + for (i = 0; i < CLOCK_ENTRIES; i++, c++) { + uint64_t seq, tsc; + + c->cpu = t->cpu; + do { + seq = atomic64_inc_return(t->seq); + tsc = get_cpu_clock(); + } while (seq != *t->seq); + + c->seq = seq; + c->tsc = tsc; + } + + log_info("cs: cpu%3d: %lu clocks seen\n", t->cpu, t->entries[CLOCK_ENTRIES - 1].tsc - t->entries[0].tsc); + return NULL; +} + +static int clock_cmp(const void *p1, const void *p2) +{ + const struct clock_entry *c1 = p1; + const struct clock_entry *c2 = p2; + + if (c1->seq == c2->seq) + log_err("cs: bug in atomic sequence!\n"); + + return c1->seq - c2->seq; +} + +int fio_monotonic_clocktest(void) +{ + struct clock_thread *threads; + unsigned int nr_cpus = cpus_online(); + struct clock_entry *entries; + unsigned long tentries, failed; + uint64_t seq = 0; + int i; + + fio_debug |= 1U << FD_TIME; + calibrate_cpu_clock(); + fio_debug &= ~(1U << FD_TIME); + + threads = malloc(nr_cpus * sizeof(struct clock_thread)); + tentries = CLOCK_ENTRIES * nr_cpus; + entries = malloc(tentries * sizeof(struct clock_entry)); + + log_info("cs: Testing %u CPUs\n", nr_cpus); + + for (i = 0; i < nr_cpus; i++) { + struct clock_thread *t = &threads[i]; + + t->cpu = i; + t->seq = &seq; + t->entries = &entries[i * CLOCK_ENTRIES]; + pthread_mutex_init(&t->lock, NULL); + pthread_mutex_init(&t->started, NULL); + pthread_mutex_lock(&t->lock); + pthread_create(&t->thread, NULL, clock_thread_fn, t); + } + + for (i = 0; i < nr_cpus; i++) { + struct clock_thread *t = &threads[i]; + + pthread_mutex_lock(&t->started); + } + + for (i = 0; i < nr_cpus; i++) { + struct clock_thread *t = &threads[i]; + + pthread_mutex_unlock(&t->lock); + } + + for (failed = i = 0; i < nr_cpus; i++) { + struct clock_thread *t = &threads[i]; + void *ret; + + pthread_join(t->thread, &ret); + if (ret) + failed++; + } + free(threads); + + if (failed) { + log_err("Clocksource test: %u threads failed\n", failed); + goto err; + } + + qsort(entries, tentries, sizeof(struct clock_entry), clock_cmp); + + for (failed = i = 0; i < tentries; i++) { + struct clock_entry *prev, *this = &entries[i]; + + if (!i) { + prev = this; + continue; + } + + if (prev->tsc > this->tsc) { + uint64_t diff = prev->tsc - this->tsc; + + log_info("cs: CPU clock mismatch (diff=%lu):\n", diff); + log_info("\t CPU%3lu: TSC=%lu, SEQ=%lu\n", prev->cpu, prev->tsc, prev->seq); + log_info("\t CPU%3lu: TSC=%lu, SEQ=%lu\n", this->cpu, this->tsc, this->seq); + failed++; + } + + prev = this; + } + + if (failed) + log_info("cs: Failed: %lu\n", failed); + else + log_info("cs: Pass!\n"); + +err: + free(entries); + return !!failed; +} + +#else /* defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) */ + +int fio_monotonic_clocktest(void) +{ + log_info("cs: current platform does not support CPU clocks\n"); + return 0; +} + +#endif diff --git a/gettime.h b/gettime.h index 64651a1..8ca3791 100644 --- a/gettime.h +++ b/gettime.h @@ -15,6 +15,7 @@ extern void fio_gettime(struct timeval *, void *); extern void fio_gtod_init(void); extern void fio_clock_init(void); extern int fio_start_gtod_thread(void); +extern int fio_monotonic_clocktest(void); extern struct timeval *fio_tv; diff --git a/init.c b/init.c index e90d735..e7b3303 100644 --- a/init.c +++ b/init.c @@ -208,6 +208,11 @@ static struct option l_opts[FIO_NR_OPTIONS] = { .val = 'C', }, { + .name = (char *) "cpuclock-test", + .has_arg = no_argument, + .val = 'T', + }, + { .name = NULL, }, }; @@ -1255,6 +1260,7 @@ static void usage(const char *name) printf(" --terse-version=x\tSet terse version output format to 'x'\n"); printf(" --version\t\tPrint version info and exit\n"); printf(" --help\t\tPrint this page\n"); + printf(" --cpuclock-test\tPerform test/validation of CPU clock\n"); printf(" --cmdhelp=cmd\t\tPrint command help, \"all\" for all of" " them\n"); printf(" --enghelp=engine\tPrint ioengine help, or list" @@ -1275,7 +1281,7 @@ static void usage(const char *name) printf(" --daemonize=pidfile\tBackground fio server, write pid to file\n"); printf(" --client=hostname\tTalk to remote backend fio server at hostname\n"); printf("\nFio was written by Jens Axboe <jens.axboe@xxxxxxxxxx>"); - printf("\n Jens Axboe <jaxboe@xxxxxxxxxxxx>\n"); + printf("\n Jens Axboe <jaxboe@xxxxxxxxxxxx>\n"); } #ifdef FIO_INC_DEBUG @@ -1621,6 +1627,10 @@ int parse_cmd_line(int argc, char *argv[]) optind++; } break; + case 'T': + do_exit++; + exit_val = fio_monotonic_clocktest(); + break; default: do_exit++; exit_val = 1; -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html