This microbenchmark simulates how the use of different futex types can affect the actual performanace of userspace mutex locks. The usage is: perf bench futex mutex <options> Three sets of simple mutex lock and unlock functions are implemented using the wait-wake, PI and TP futexes respectively. This microbenchmark then runs the locking rate measurement tests using either one of those mutexes or all of them consecutively. Signed-off-by: Waiman Long <longman@xxxxxxxxxx> --- tools/perf/bench/Build | 1 + tools/perf/bench/bench.h | 1 + tools/perf/bench/futex-locks.c | 844 +++++++++++++++++++++++++++++++++++++++++ tools/perf/bench/futex.h | 23 ++ tools/perf/builtin-bench.c | 10 + tools/perf/check-headers.sh | 4 + 6 files changed, 883 insertions(+) create mode 100644 tools/perf/bench/futex-locks.c diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 60bf119..7ce1cd7 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -6,6 +6,7 @@ perf-y += futex-wake.o perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o +perf-y += futex-locks.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 579a592..b0632df 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -36,6 +36,7 @@ int bench_futex_requeue(int argc, const char **argv, const char *prefix); /* pi futexes */ int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); +int bench_futex_mutex(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-locks.c b/tools/perf/bench/futex-locks.c new file mode 100644 index 0000000..ec1e627 --- /dev/null +++ b/tools/perf/bench/futex-locks.c @@ -0,0 +1,844 @@ +/* + * Copyright (C) 2016 Waiman Long <longman@xxxxxxxxxx> + * + * This microbenchmark simulates how the use of different futex types can + * affect the actual performanace of userspace locking primitives like mutex. + * + * The raw throughput of the futex lock and unlock calls is not a good + * indication of actual throughput of the mutex code as it may not really + * need to call into the kernel. Therefore, 3 sets of simple mutex lock and + * unlock functions are written to implenment a mutex lock using the + * wait-wake, PI and TP futexes respectively. These functions serve as the + * basis for measuring the locking throughput. + */ + +#include <pthread.h> + +#include <signal.h> +#include <string.h> +#include "../util/stat.h" +#include "../perf-sys.h" +#include <subcmd/parse-options.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <errno.h> +#include "bench.h" +#include "futex.h" + +#include <err.h> +#include <stdlib.h> +#include <sys/time.h> + +#define CACHELINE_SIZE 64 +#define gettid() syscall(SYS_gettid) +#define __cacheline_aligned __attribute__((__aligned__(CACHELINE_SIZE))) + +typedef u32 futex_t; +typedef void (*lock_fn_t)(futex_t *futex, int tid); +typedef void (*unlock_fn_t)(futex_t *futex, int tid); + +/* + * Statistical count list + */ +enum { + STAT_OPS, /* # of exclusive locking operations */ + STAT_LOCKS, /* # of exclusive lock futex calls */ + STAT_UNLOCKS, /* # of exclusive unlock futex calls */ + STAT_SLEEPS, /* # of exclusive lock sleeps */ + STAT_EAGAINS, /* # of EAGAIN errors */ + STAT_WAKEUPS, /* # of wakeups (unlock return) */ + STAT_HANDOFFS, /* # of lock handoff (TP only) */ + STAT_STEALS, /* # of lock steals (TP only) */ + STAT_LOCKERRS, /* # of exclusive lock errors */ + STAT_UNLKERRS, /* # of exclusive unlock errors */ + STAT_NUM /* Total # of statistical count */ +}; + +/* + * Syscall time list + */ +enum { + TIME_LOCK, /* Total exclusive lock syscall time */ + TIME_UNLK, /* Total exclusive unlock syscall time */ + TIME_NUM, +}; + +struct worker { + futex_t *futex; + pthread_t thread; + + /* + * Per-thread operation statistics + */ + u32 stats[STAT_NUM]; + + /* + * Lock/unlock times + */ + u64 times[TIME_NUM]; +} __cacheline_aligned; + +/* + * Global cache-aligned futex + */ +static futex_t __cacheline_aligned global_futex; +static futex_t *pfutex = &global_futex; + +static __thread futex_t thread_id; /* Thread ID */ +static __thread int counter; /* Sleep counter */ + +static struct worker *worker, *worker_alloc; +static unsigned int nsecs = 10; +static bool verbose, done, fshared, exit_now, timestat; +static unsigned int ncpus, nthreads; +static int flags; +static const char *ftype; +static int loadlat = 1; +static int locklat = 1; +static int wratio; +struct timeval start, end, runtime; +static unsigned int worker_start; +static unsigned int threads_starting; +static unsigned int threads_stopping; +static struct stats throughput_stats; +static lock_fn_t mutex_lock_fn; +static unlock_fn_t mutex_unlock_fn; + +/* + * Lock/unlock syscall time macro + */ +static inline void systime_add(int tid, int item, struct timespec *begin, + struct timespec *_end) +{ + worker[tid].times[item] += (_end->tv_sec - begin->tv_sec)*1000000000 + + _end->tv_nsec - begin->tv_nsec; +} + +static inline double stat_percent(struct worker *w, int top, int bottom) +{ + return (double)w->stats[top] * 100 / w->stats[bottom]; +} + +/* + * Inline functions to update the statistical counts + * + * Enable statistics collection may sometimes impact the locking rates + * to be measured. So we can specify the DISABLE_STAT macro to disable + * statistic counts collection for all except the core locking rate counts. + * + * #define DISABLE_STAT + */ +#ifndef DISABLE_STAT +static inline void stat_add(int tid, int item, int num) +{ + worker[tid].stats[item] += num; +} + +static inline void stat_inc(int tid, int item) +{ + stat_add(tid, item, 1); +} +#else +static inline void stat_add(int tid __maybe_unused, int item __maybe_unused, + int num __maybe_unused) +{ +} + +static inline void stat_inc(int tid __maybe_unused, int item __maybe_unused) +{ +} +#endif + +/* + * The latency value within a lock critical section (load) and between locking + * operations is in term of the number of cpu_relax() calls that are being + * issued. + */ +static const struct option mutex_options[] = { + OPT_INTEGER ('d', "locklat", &locklat, "Specify inter-locking latency (default = 1)"), + OPT_STRING ('f', "ftype", &ftype, "type", "Specify futex type: WW, PI, TP, all (default)"), + OPT_INTEGER ('L', "loadlat", &loadlat, "Specify load latency (default = 1)"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds, default = 10s)"), + OPT_BOOLEAN ('S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_BOOLEAN ('T', "timestat", ×tat, "Track lock/unlock syscall times"), + OPT_UINTEGER('t', "threads", &nthreads, "Specify number of threads, default = # of CPUs"), + OPT_BOOLEAN ('v', "verbose", &verbose, "Verbose mode: display thread-level details"), + OPT_INTEGER ('w', "wait-ratio", &wratio, "Specify <n>/1024 of load is 1us sleep, default = 0"), + OPT_END() +}; + +static const char * const bench_futex_mutex_usage[] = { + "perf bench futex mutex <options>", + NULL +}; + +/** + * futex_cmpxchg() - atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign to the futex + * + * Implement cmpxchg using gcc atomic builtins. + * + * Return: the old futex value. + */ +static inline futex_t futex_cmpxchg(futex_t *uaddr, futex_t old, futex_t new) +{ + return __sync_val_compare_and_swap(uaddr, old, new); +} + +/** + * futex_xchg() - atomic exchange + * @uaddr: The address of the futex to be modified + * @newval: The new value to assign to the futex + * + * Implement cmpxchg using gcc atomic builtins. + * + * Return: the old futex value. + */ +static inline futex_t futex_xchg(futex_t *uaddr, futex_t new) +{ + return __sync_lock_test_and_set(uaddr, new); +} + +/** + * atomic_dec_return - atomically decrement & return the new value + * @uaddr: The address of the futex to be decremented + * Return: The new value + */ +static inline int atomic_dec_return(futex_t *uaddr) +{ + return __sync_sub_and_fetch(uaddr, 1); +} + +/** + * atomic_inc_return - atomically increment & return the new value + * @uaddr: The address of the futex to be incremented + * Return: The new value + */ +static inline int atomic_inc_return(futex_t *uaddr) +{ + return __sync_add_and_fetch(uaddr, 1); +} + +/**********************[ MUTEX lock/unlock functions ]*********************/ + +/* + * Wait-wake futex lock/unlock functions (Glibc implementation) + * futex value: 0 - unlocked + * 1 - locked + * 2 - locked with waiters (contended) + */ +static void ww_mutex_lock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val = *futex; + int ret; + + if (!val) { + val = futex_cmpxchg(futex, 0, 1); + if (val == 0) + return; + } + + for (;;) { + if (val != 2) { + /* + * Force value to 2 to indicate waiter + */ + val = futex_xchg(futex, 2); + if (val == 0) + return; + } + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wait(futex, 2, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_LOCK, &stime, &etime); + } else { + ret = futex_wait(futex, 2, NULL, flags); + } + + stat_inc(tid, STAT_LOCKS); + if (ret < 0) { + if (errno == EAGAIN) + stat_inc(tid, STAT_EAGAINS); + else + stat_inc(tid, STAT_LOCKERRS); + } + + val = *futex; + } +} + +static void ww_mutex_unlock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val; + int ret; + + val = futex_xchg(futex, 0); + + if (val == 2) { + stat_inc(tid, STAT_UNLOCKS); + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(futex, 1, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_wake(futex, 1, flags); + } + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + } +} + +/* + * Alternate wait-wake futex lock/unlock functions with thread_id lock word + */ +static void ww2_mutex_lock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val = *futex; + int ret; + + if (!val) { + val = futex_cmpxchg(futex, 0, thread_id); + if (val == 0) + return; + } + + for (;;) { + /* + * Set the FUTEX_WAITERS bit, if not set yet. + */ + while (!(val & FUTEX_WAITERS)) { + futex_t old; + + if (!val) { + val = futex_cmpxchg(futex, 0, thread_id); + if (val == 0) + return; + continue; + } + old = futex_cmpxchg(futex, val, val | FUTEX_WAITERS); + if (old == val) { + val |= FUTEX_WAITERS; + break; + } + val = old; + } + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wait(futex, val, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_LOCK, &stime, &etime); + } else { + ret = futex_wait(futex, val, NULL, flags); + } + stat_inc(tid, STAT_LOCKS); + if (ret < 0) { + if (errno == EAGAIN) + stat_inc(tid, STAT_EAGAINS); + else + stat_inc(tid, STAT_LOCKERRS); + } + + val = *futex; + } +} + +static void ww2_mutex_unlock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val; + int ret; + + val = futex_xchg(futex, 0); + + if ((val & FUTEX_TID_MASK) != thread_id) + stat_inc(tid, STAT_UNLKERRS); + + if (val & FUTEX_WAITERS) { + stat_inc(tid, STAT_UNLOCKS); + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(futex, 1, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_wake(futex, 1, flags); + } + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + } +} + +/* + * PI futex lock/unlock functions + */ +static void pi_mutex_lock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val; + int ret; + + val = futex_cmpxchg(futex, 0, thread_id); + if (val == 0) + return; + + /* + * Retry if an error happens + */ + for (;;) { + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_lock_pi(futex, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_LOCK, &stime, &etime); + } else { + ret = futex_lock_pi(futex, NULL, flags); + } + stat_inc(tid, STAT_LOCKS); + if (ret >= 0) + break; + stat_inc(tid, STAT_LOCKERRS); + } +} + +static void pi_mutex_unlock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val; + int ret; + + val = futex_cmpxchg(futex, thread_id, 0); + if (val == thread_id) + return; + + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_unlock_pi(futex, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_unlock_pi(futex, flags); + } + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + stat_inc(tid, STAT_UNLOCKS); +} + +/* + * TP futex lock/unlock functions + */ +static void tp_mutex_lock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val; + int ret; + + val = futex_cmpxchg(futex, 0, thread_id); + if (val == 0) + return; + + /* + * Retry if an error happens + */ + for (;;) { + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_lock(futex, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_LOCK, &stime, &etime); + } else { + ret = futex_lock(futex, NULL, flags); + } + stat_inc(tid, STAT_LOCKS); + if (ret >= 0) + break; + stat_inc(tid, STAT_LOCKERRS); + } + /* + * Get # of sleeps & locking method + */ + stat_add(tid, STAT_SLEEPS, ret >> 16); + ret &= 0xff; + if (!ret) + stat_inc(tid, STAT_STEALS); + else if (ret == 2) + stat_inc(tid, STAT_HANDOFFS); +} + +static void tp_mutex_unlock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val; + int ret; + + val = futex_cmpxchg(futex, thread_id, 0); + if (val == thread_id) + return; + + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_unlock(futex, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_unlock(futex, flags); + } + stat_inc(tid, STAT_UNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); +} + +/**************************************************************************/ + +/* + * Load function + */ +static inline void load(int tid) +{ + int n = loadlat; + + /* + * Optionally does a 1us sleep instead if wratio is defined and + * is within bound. + */ + if (wratio && (((counter++ + tid) & 0x3ff) < wratio)) { + usleep(1); + return; + } + + while (n-- > 0) + cpu_relax(); +} + +static inline void csdelay(void) +{ + int n = locklat; + + while (n-- > 0) + cpu_relax(); +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + /* inform all threads that we're done for the day */ + done = true; + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); + if (sig) + exit_now = true; +} + +static void *mutex_workerfn(void *arg) +{ + long tid = (long)arg; + struct worker *w = &worker[tid]; + lock_fn_t lock_fn = mutex_lock_fn; + unlock_fn_t unlock_fn = mutex_unlock_fn; + + thread_id = gettid(); + counter = 0; + + atomic_dec_return(&threads_starting); + + /* + * Busy wait until asked to start + */ + while (!worker_start) + cpu_relax(); + + do { + lock_fn(w->futex, tid); + load(tid); + unlock_fn(w->futex, tid); + w->stats[STAT_OPS]++; /* One more locking operation */ + csdelay(); + } while (!done); + + if (verbose) + printf("[thread %3ld (%d)] exited.\n", tid, thread_id); + atomic_inc_return(&threads_stopping); + return NULL; +} + +static void create_threads(struct worker *w, pthread_attr_t *thread_attr, + void *(*workerfn)(void *arg), long tid) +{ + cpu_set_t cpu; + + /* + * Bind each thread to a CPU + */ + CPU_ZERO(&cpu); + CPU_SET(tid % ncpus, &cpu); + w->futex = pfutex; + + if (pthread_attr_setaffinity_np(thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w->thread, thread_attr, workerfn, (void *)tid)) + err(EXIT_FAILURE, "pthread_create"); +} + +static int futex_mutex_type(const char **ptype) +{ + const char *type = *ptype; + + if (!strcasecmp(type, "WW")) { + *ptype = "WW"; + mutex_lock_fn = ww_mutex_lock; + mutex_unlock_fn = ww_mutex_unlock; + } else if (!strcasecmp(type, "WW2")) { + *ptype = "WW2"; + mutex_lock_fn = ww2_mutex_lock; + mutex_unlock_fn = ww2_mutex_unlock; + } else if (!strcasecmp(type, "PI")) { + *ptype = "PI"; + mutex_lock_fn = pi_mutex_lock; + mutex_unlock_fn = pi_mutex_unlock; + } else if (!strcasecmp(type, "TP")) { + *ptype = "TP"; + mutex_lock_fn = tp_mutex_lock; + mutex_unlock_fn = tp_mutex_unlock; + + /* + * Check if TP futex is supported. + */ + futex_unlock(&global_futex, 0); + if (errno == ENOSYS) { + fprintf(stderr, + "\nTP futexes are not supported by the kernel!\n"); + return -1; + } + } else { + return -1; + } + return 0; +} + +static void futex_test_driver(const char *futex_type, + int (*proc_type)(const char **ptype), + void *(*workerfn)(void *arg)) +{ + u64 us; + int i, j; + struct worker total; + double avg, stddev; + pthread_attr_t thread_attr; + + /* + * There is an extra blank line before the error counts to highlight + * them. + */ + const char *desc[STAT_NUM] = { + [STAT_OPS] = "Total exclusive locking ops", + [STAT_LOCKS] = "Exclusive lock futex calls", + [STAT_UNLOCKS] = "Exclusive unlock futex calls", + [STAT_SLEEPS] = "Exclusive lock sleeps", + [STAT_WAKEUPS] = "Process wakeups", + [STAT_EAGAINS] = "EAGAIN lock errors", + [STAT_HANDOFFS] = "Lock handoffs", + [STAT_STEALS] = "Lock stealings", + [STAT_LOCKERRS] = "\nExclusive lock errors", + [STAT_UNLKERRS] = "\nExclusive unlock errors", + }; + + if (exit_now) + return; + + if (proc_type(&futex_type) < 0) { + fprintf(stderr, "Unknown futex type '%s'!\n", futex_type); + exit(1); + } + + printf("\n=====================================\n"); + printf("[PID %d]: %d threads doing %s futex lockings (load=%d) for %d secs.\n\n", + getpid(), nthreads, futex_type, loadlat, nsecs); + + init_stats(&throughput_stats); + + *pfutex = 0; + done = false; + threads_starting = nthreads; + pthread_attr_init(&thread_attr); + + for (i = 0; i < (int)nthreads; i++) + create_threads(&worker[i], &thread_attr, workerfn, i); + + while (threads_starting) + usleep(1); + + gettimeofday(&start, NULL); + + /* + * Start the test + * + * Unlike the other futex benchmarks, this one uses busy waiting + * instead of pthread APIs to make sure that all the threads (except + * the one that shares CPU with the parent) will start more or less + * simultaineously. + */ + atomic_inc_return(&worker_start); + sleep(nsecs); + toggle_done(0, NULL, NULL); + + /* + * In verbose mode, we check if all the threads have been stopped + * after 1ms and report the status if some are still running. + */ + if (verbose) { + usleep(1000); + if (threads_stopping != nthreads) { + printf("%d threads still running 1ms after timeout" + " - futex = 0x%x\n", + nthreads - threads_stopping, *pfutex); + /* + * If the threads are still running after 10s, + * go directly to statistics printing and exit. + */ + for (i = 10; i > 0; i--) { + sleep(1); + if (threads_stopping == nthreads) + break; + } + if (!i) { + printf("*** Threads waiting ABORTED!! ***\n\n"); + goto print_stat; + } + } + } + + for (i = 0; i < (int)nthreads; i++) { + int ret = pthread_join(worker[i].thread, NULL); + + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + +print_stat: + pthread_attr_destroy(&thread_attr); + + us = runtime.tv_sec * 1000000 + runtime.tv_usec; + memset(&total, 0, sizeof(total)); + for (i = 0; i < (int)nthreads; i++) { + /* + * Get a rounded estimate of the # of locking ops/sec. + */ + u64 tp = (u64)worker[i].stats[STAT_OPS] * 1000000 / us; + + for (j = 0; j < STAT_NUM; j++) + total.stats[j] += worker[i].stats[j]; + + update_stats(&throughput_stats, tp); + if (verbose) + printf("[thread %3d] futex: %p [ %'ld ops/sec ]\n", + i, worker[i].futex, (long)tp); + } + + avg = avg_stats(&throughput_stats); + stddev = stddev_stats(&throughput_stats); + + printf("Locking statistics:\n"); + printf("%-28s = %'.2fs\n", "Test run time", (double)us/1000000); + for (i = 0; i < STAT_NUM; i++) + if (total.stats[i]) + printf("%-28s = %'d\n", desc[i], total.stats[i]); + + if (timestat && total.times[TIME_LOCK]) { + printf("\nSyscall times:\n"); + if (total.stats[STAT_LOCKS]) + printf("Avg exclusive lock syscall = %'ldns\n", + total.times[TIME_LOCK]/total.stats[STAT_LOCKS]); + if (total.stats[STAT_UNLOCKS]) + printf("Avg exclusive unlock syscall = %'ldns\n", + total.times[TIME_UNLK]/total.stats[STAT_UNLOCKS]); + } + + printf("\nPercentages:\n"); + if (total.stats[STAT_LOCKS]) + printf("Exclusive lock futex calls = %.1f%%\n", + stat_percent(&total, STAT_LOCKS, STAT_OPS)); + if (total.stats[STAT_UNLOCKS]) + printf("Exclusive unlock futex calls = %.1f%%\n", + stat_percent(&total, STAT_UNLOCKS, STAT_OPS)); + if (total.stats[STAT_EAGAINS]) + printf("EAGAIN lock errors = %.1f%%\n", + stat_percent(&total, STAT_EAGAINS, STAT_LOCKS)); + if (total.stats[STAT_WAKEUPS]) + printf("Process wakeups = %.1f%%\n", + stat_percent(&total, STAT_WAKEUPS, STAT_UNLOCKS)); + + printf("\nPer-thread Locking Rates:\n"); + printf("Avg = %'d ops/sec (+- %.2f%%)\n", (int)(avg + 0.5), + rel_stddev_stats(stddev, avg)); + printf("Min = %'d ops/sec\n", (int)throughput_stats.min); + printf("Max = %'d ops/sec\n", (int)throughput_stats.max); + + if (*pfutex != 0) + printf("\nResidual futex value = 0x%x\n", *pfutex); + + /* Clear the workers area */ + memset(worker, 0, sizeof(*worker) * nthreads); +} + +int bench_futex_mutex(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + struct sigaction act; + + argc = parse_options(argc, argv, mutex_options, + bench_futex_mutex_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + /* + * Since the allocated memory buffer may not be properly cacheline + * aligned, we need to allocate one more than needed and manually + * adjust array boundary to be cacheline aligned. + */ + worker_alloc = calloc(nthreads + 1, sizeof(*worker)); + if (!worker_alloc) + err(EXIT_FAILURE, "calloc"); + worker = (void *)((unsigned long)&worker_alloc[1] & + ~(CACHELINE_SIZE - 1)); + + if (!fshared) + flags = FUTEX_PRIVATE_FLAG; + + if (!ftype || !strcmp(ftype, "all")) { + futex_test_driver("WW", futex_mutex_type, mutex_workerfn); + futex_test_driver("PI", futex_mutex_type, mutex_workerfn); + futex_test_driver("TP", futex_mutex_type, mutex_workerfn); + } else { + futex_test_driver(ftype, futex_mutex_type, mutex_workerfn); + } + free(worker_alloc); + return 0; +err: + usage_with_options(bench_futex_mutex_usage, mutex_options); + exit(EXIT_FAILURE); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index ba7c735..be1eb7a 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -87,6 +87,29 @@ val, opflags); } +#ifndef FUTEX_LOCK +#define FUTEX_LOCK 0xff /* Disable the use of TP futexes */ +#define FUTEX_UNLOCK 0xff +#endif /* FUTEX_LOCK */ + +/** + * futex_lock() - lock the TP futex + */ +static inline int +futex_lock(u_int32_t *uaddr, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_LOCK, 0, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock() - unlock the TP futex + */ +static inline int +futex_unlock(u_int32_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK, 0, NULL, NULL, 0, opflags); +} + #ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP #include <pthread.h> static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index a1cddc6..bf4418d 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -20,6 +20,7 @@ #include "builtin.h" #include "bench/bench.h" +#include <locale.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -62,6 +63,7 @@ struct bench { { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, /* pi-futexes */ { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi }, + { "mutex", "Benchmark for mutex locks using futexes", bench_futex_mutex }, { "all", "Run all futex benchmarks", NULL }, { NULL, NULL, NULL } }; @@ -215,6 +217,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) { struct collection *coll; int ret = 0; + char *locale; if (argc < 2) { /* No collection specified. */ @@ -222,6 +225,13 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) goto end; } + /* + * Enable better number formatting. + */ + locale = setlocale(LC_NUMERIC, ""); + if (!strcmp(locale, "C")) + setlocale(LC_NUMERIC, "en_US"); + argc = parse_options(argc, argv, bench_options, bench_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c747bfd..6ec5f2d 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -57,3 +57,7 @@ check arch/x86/lib/memcpy_64.S -B -I "^EXPORT_SYMBOL" -I "^#include <asm/ check arch/x86/lib/memset_64.S -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" check include/uapi/linux/mman.h -B -I "^#include <\(uapi/\)*asm/mman.h>" + +# need to link to the latest futex.h header +[[ -f ../../include/uapi/linux/futex.h ]] && + ln -sf ../../../../../include/uapi/linux/futex.h util/include/linux -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-doc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html