This microbenchmark simulates how the use of different futex types can affect the actual performanace of userspace rwlock locks. The usage is: perf bench futex rwlock <options> Three sets of simple rwlock lock and unlock functions are implemented using the wait-wake, TP futexes and glibc rwlock respectively. This microbenchmark then runs the locking rate measurement tests using either one of those rwlocks or all of them consecutively. Signed-off-by: Waiman Long <longman@xxxxxxxxxx> --- tools/perf/bench/bench.h | 1 + tools/perf/bench/futex-locks.c | 842 ++++++++++++++++++++++++++++++++++++++++- tools/perf/bench/futex.h | 24 +- tools/perf/builtin-bench.c | 1 + 4 files changed, 850 insertions(+), 18 deletions(-) diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index b0632df..a7e2037 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -37,6 +37,7 @@ /* pi futexes */ int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); int bench_futex_mutex(int argc, const char **argv, const char *prefix); +int bench_futex_rwlock(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-locks.c b/tools/perf/bench/futex-locks.c index ec1e627..4008691 100644 --- a/tools/perf/bench/futex-locks.c +++ b/tools/perf/bench/futex-locks.c @@ -10,6 +10,12 @@ * unlock functions are written to implenment a mutex lock using the * wait-wake, PI and TP futexes respectively. These functions serve as the * basis for measuring the locking throughput. + * + * Three sets of simple reader/writer lock and unlock functions are also + * implemented using the wait-wake and TP futexes as well as the Glibc + * rwlock respectively for performance measurement purpose. The TP futex + * writer lock/unlock functions are the same as that of the mutex functions. + * That is not the case for the wait-wake futexes. */ #include <pthread.h> @@ -21,11 +27,13 @@ #include <subcmd/parse-options.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <asm/byteorder.h> #include <errno.h> #include "bench.h" #include "futex.h" #include <err.h> +#include <limits.h> #include <stdlib.h> #include <sys/time.h> @@ -42,15 +50,25 @@ */ enum { STAT_OPS, /* # of exclusive locking operations */ + STAT_SOPS, /* # of shared locking operations */ STAT_LOCKS, /* # of exclusive lock futex calls */ STAT_UNLOCKS, /* # of exclusive unlock futex calls */ STAT_SLEEPS, /* # of exclusive lock sleeps */ + STAT_SLOCKS, /* # of shared lock futex calls */ + STAT_SUNLOCKS, /* # of shared unlock futex calls */ + STAT_SSLEEPS, /* # of shared lock sleeps */ STAT_EAGAINS, /* # of EAGAIN errors */ STAT_WAKEUPS, /* # of wakeups (unlock return) */ STAT_HANDOFFS, /* # of lock handoff (TP only) */ STAT_STEALS, /* # of lock steals (TP only) */ + STAT_SLRETRIES, /* # of shared lock retries */ + STAT_SURETRIES, /* # of shared unlock retries */ + STAT_SALONES, /* # of shared locking alone ops */ + STAT_SGROUPS, /* # of shared locking group ops */ STAT_LOCKERRS, /* # of exclusive lock errors */ STAT_UNLKERRS, /* # of exclusive unlock errors */ + STAT_SLOCKERRS, /* # of shared lock errors */ + STAT_SUNLKERRS, /* # of shared unlock errors */ STAT_NUM /* Total # of statistical count */ }; @@ -60,6 +78,8 @@ enum { enum { TIME_LOCK, /* Total exclusive lock syscall time */ TIME_UNLK, /* Total exclusive unlock syscall time */ + TIME_SLOCK, /* Total shared lock syscall time */ + TIME_SUNLK, /* Total shared unlock syscall time */ TIME_NUM, }; @@ -102,7 +122,11 @@ struct worker { static unsigned int threads_stopping; static struct stats throughput_stats; static lock_fn_t mutex_lock_fn; +static lock_fn_t read_lock_fn; +static lock_fn_t write_lock_fn; static unlock_fn_t mutex_unlock_fn; +static unlock_fn_t read_unlock_fn; +static unlock_fn_t write_unlock_fn; /* * Lock/unlock syscall time macro @@ -150,6 +174,26 @@ static inline void stat_inc(int tid __maybe_unused, int item __maybe_unused) #endif /* + * For rwlock, the default is to have each thread acts as both reader and + * writer. The use of the -p option will force the use of separate threads + * for readers and writers. The numbers of reader and writer threads are + * determined by reader percentage and the total number of threads used. + * So the actual ratio of reader and writer operations may not be close + * to the given reader percentage. + */ +static bool xthread; +static bool pwriter; /* Prefer writer flag */ +static int rthread_threshold = -1; /* (tid < threshold) => reader */ +static unsigned int rpercent = 50; /* Reader percentage */ + +/* + * Glibc rwlock + */ +static pthread_rwlock_t __cacheline_aligned rwlock; +static pthread_rwlockattr_t rwlock_attr; +static bool rwlock_inited, rwlock_attr_inited; + +/* * The latency value within a lock critical section (load) and between locking * operations is in term of the number of cpu_relax() calls that are being * issued. @@ -172,6 +216,27 @@ static inline void stat_inc(int tid __maybe_unused, int item __maybe_unused) NULL }; +static const struct option rwlock_options[] = { + OPT_INTEGER ('d', "locklat", &locklat, "Specify inter-locking latency (default = 1)"), + OPT_STRING ('f', "ftype", &ftype, "type", "Specify futex type: WW, TP, GC, all (default)"), + OPT_INTEGER ('L', "loadlat", &loadlat, "Specify load latency (default = 1)"), + OPT_UINTEGER('R', "read-%", &rpercent, "Specify reader percentage (default 50%)"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds, default = 10s)"), + OPT_BOOLEAN ('S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_BOOLEAN ('T', "timestat", ×tat, "Track lock/unlock syscall times"), + OPT_UINTEGER('t', "threads", &nthreads, "Specify number of threads, default = # of CPUs"), + OPT_BOOLEAN ('v', "verbose", &verbose, "Verbose mode: display thread-level details"), + OPT_BOOLEAN ('W', "prefer-wr", &pwriter, "Prefer writers instead of readers"), + OPT_INTEGER ('w', "wait-ratio", &wratio, "Specify <n>/1024 of load is 1us sleep, default = 0"), + OPT_BOOLEAN ('x', "xthread", &xthread, "Use separate reader/writer threads"), + OPT_END() +}; + +static const char * const bench_futex_rwlock_usage[] = { + "perf bench futex rwlock <options>", + NULL +}; + /** * futex_cmpxchg() - atomic compare and exchange * @uaddr: The address of the futex to be modified @@ -221,6 +286,17 @@ static inline int atomic_inc_return(futex_t *uaddr) return __sync_add_and_fetch(uaddr, 1); } +/** + * atomic_add_return - atomically add a number & return the new value + * @uaddr: The address of the futex to be added + * @val : The integer value to be added + * Return: The new value + */ +static inline int atomic_add_return(futex_t *uaddr, int val) +{ + return __sync_add_and_fetch(uaddr, val); +} + /**********************[ MUTEX lock/unlock functions ]*********************/ /* @@ -502,6 +578,498 @@ static void tp_mutex_unlock(futex_t *futex, int tid) stat_add(tid, STAT_WAKEUPS, ret); } +/**********************[ RWLOCK lock/unlock functions ]********************/ + +/* + * Wait-wake futex reader/writer lock/unlock functions + * + * This implementation is based on the reader-preferring futex eventcount + * rwlocks posted on http://locklessinc.com/articles/sleeping_rwlocks with + * some modification. + * + * It is assumed the passed-in futex have sufficient trailing space to + * be used by the bigger reader/writer lock structure. + */ +#ifdef __LITTLE_ENDIAN +#define LSB(field) unsigned char field +#else +#define LSB(field) struct { \ + unsigned char __pad[3]; \ + unsigned char field; \ + } +#endif + +#define RW_WLOCKED (1U << 0) +#define RW_READER (1U << 8) +#define RW_EC_CONTEND (1U << 0) +#define RW_EC_INC (1U << 8) + +struct rwlock { + /* + * Bits 0-7 : writer lock + * Bits 8-31: reader count + */ + union { + futex_t val; + LSB(wlocked); + } lock; + + /* Writer event count */ + union { + futex_t val; + LSB(contend); + } write_ec; + + /* Reader event count */ + union { + futex_t val; + LSB(contend); + } read_ec; +}; + +static struct rwlock __cacheline_aligned rwfutex; + +/* + * Reader preferring rwlock functions + */ +static void ww_write_lock(futex_t *futex __maybe_unused, int tid) +{ + struct timespec stime, etime; + struct rwlock *rw = &rwfutex; + + for (;;) { + futex_t ec = rw->write_ec.val | RW_EC_CONTEND; + int ret; + + /* Set the write lock if there is no reader */ + if (!rw->lock.val && + (futex_cmpxchg(&rw->lock.val, 0, RW_WLOCKED) == 0)) + return; + + rw->write_ec.contend = 1; + + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wait(&rw->write_ec.val, ec, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_LOCK, &stime, &etime); + } else { + ret = futex_wait(&rw->write_ec.val, ec, NULL, flags); + } + + stat_inc(tid, STAT_LOCKS); + if (ret < 0) { + if (errno == EAGAIN) + stat_inc(tid, STAT_EAGAINS); + else + stat_inc(tid, STAT_LOCKERRS); + } + + /* Other writers may exist */ + rw->write_ec.contend = 1; + } +} + +static void ww_write_unlock(futex_t *futex __maybe_unused, int tid) +{ + struct timespec stime, etime; + struct rwlock *rw = &rwfutex; + int ret; + + rw->lock.wlocked = 0; /* Unlock */ + + /* Wake all the readers */ + atomic_add_return(&rw->read_ec.val, RW_EC_INC); + if (rw->read_ec.contend) { + rw->read_ec.contend = 0; + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(&rw->read_ec.val, INT_MAX, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_wake(&rw->read_ec.val, INT_MAX, flags); + } + stat_inc(tid, STAT_UNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + if (ret > 0) + return; + } + + /* Wake a writer */ + atomic_add_return(&rw->write_ec.val, RW_EC_INC); + if (rw->write_ec.contend) { + rw->write_ec.contend = 0; + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(&rw->write_ec.val, 1, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_wake(&rw->write_ec.val, 1, flags); + } + stat_inc(tid, STAT_UNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + } +} + +static void ww_read_lock(futex_t *futex __maybe_unused, int tid) +{ + struct timespec stime, etime; + struct rwlock *rw = &rwfutex; + futex_t ec = rw->read_ec.val, state; + int ret; + + state = atomic_add_return(&rw->lock.val, RW_READER); + + while (state & RW_WLOCKED) { + ec |= RW_EC_CONTEND; + rw->read_ec.contend = 1; + + /* Sleep until no longer held by a writer */ + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wait(&rw->read_ec.val, ec, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_SLOCK, &stime, &etime); + } else { + ret = futex_wait(&rw->read_ec.val, ec, NULL, flags); + } + stat_inc(tid, STAT_SLOCKS); + if (ret < 0) { + if (errno == EAGAIN) + stat_inc(tid, STAT_EAGAINS); + else + stat_inc(tid, STAT_SLOCKERRS); + } + + ec = rw->read_ec.val; + barrier(); + state = rw->lock.val; + } +} + +static void ww_read_unlock(futex_t *futex __maybe_unused, int tid) +{ + struct timespec stime, etime; + struct rwlock *rw = &rwfutex; + futex_t state; + int ret; + + /* Read unlock */ + state = atomic_add_return(&rw->lock.val, -RW_READER); + + /* Other readers there, don't do anything */ + if (state >> 8) + return; + + /* We may need to wake up a writer */ + atomic_add_return(&rw->write_ec.val, RW_EC_INC); + if (rw->write_ec.contend) { + rw->write_ec.contend = 0; + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(&rw->write_ec.val, 1, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_SUNLK, &stime, &etime); + } else { + ret = futex_wake(&rw->write_ec.val, 1, flags); + } + stat_inc(tid, STAT_SUNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_SUNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + } +} + +/* + * Writer perferring rwlock functions + */ +#define ww2_write_lock ww_write_lock +#define ww2_read_unlock ww_read_unlock + +static void ww2_write_unlock(futex_t *futex __maybe_unused, int tid) +{ + struct timespec stime, etime; + struct rwlock *rw = &rwfutex; + int ret; + + rw->lock.wlocked = 0; /* Unlock */ + + /* Wake a writer */ + atomic_add_return(&rw->write_ec.val, RW_EC_INC); + if (rw->write_ec.contend) { + rw->write_ec.contend = 0; + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(&rw->write_ec.val, 1, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_wake(&rw->write_ec.val, 1, flags); + } + stat_inc(tid, STAT_UNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + if (ret > 0) + return; + } + + /* Wake all the readers */ + atomic_add_return(&rw->read_ec.val, RW_EC_INC); + if (rw->read_ec.contend) { + rw->read_ec.contend = 0; + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(&rw->read_ec.val, INT_MAX, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_UNLK, &stime, &etime); + } else { + ret = futex_wake(&rw->read_ec.val, INT_MAX, flags); + } + stat_inc(tid, STAT_UNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_UNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + } +} + +static void ww2_read_lock(futex_t *futex __maybe_unused, int tid) +{ + struct timespec stime, etime; + struct rwlock *rw = &rwfutex; + + for (;;) { + futex_t ec = rw->read_ec.val | RW_EC_CONTEND; + futex_t state; + int ret; + + if (!rw->write_ec.contend) { + state = atomic_add_return(&rw->lock.val, RW_READER); + + if (!(state & RW_WLOCKED)) + return; + + /* Unlock */ + state = atomic_add_return(&rw->lock.val, -RW_READER); + } else { + atomic_add_return(&rw->write_ec.val, RW_EC_INC); + if (rw->write_ec.contend) { + rw->write_ec.contend = 0; + + /* Wake a writer, and then try again */ + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wake(&rw->write_ec.val, 1, + flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_SUNLK, &stime, + &etime); + } else { + ret = futex_wake(&rw->write_ec.val, 1, + flags); + } + stat_inc(tid, STAT_SUNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_SUNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); + continue; + } + } + + rw->read_ec.contend = 1; + if (rw->read_ec.val != ec) + continue; + + /* Sleep until no longer held by a writer */ + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_wait(&rw->read_ec.val, ec, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_SLOCK, &stime, &etime); + } else { + ret = futex_wait(&rw->read_ec.val, ec, NULL, flags); + } + stat_inc(tid, STAT_SLOCKS); + if (ret < 0) { + if (errno == EAGAIN) + stat_inc(tid, STAT_EAGAINS); + else + stat_inc(tid, STAT_SLOCKERRS); + } + } +} + +/* + * TP futex reader/writer lock/unlock functions + */ +#define tp_write_lock tp_mutex_lock +#define tp_write_unlock tp_mutex_unlock +#define FUTEX_FLAGS_MASK (3UL << 30) + +static void tp_read_lock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t val; + int ret, retry = 0; + + val = futex_cmpxchg(futex, 0, FUTEX_SHARED + 1); + if (!val) + return; + + for (;;) { + futex_t old = val, new; + + /* + * Try to increment the reader count only if + * 1) the FUTEX_SHARED bit is set; and + * 2) none of the flags bits or FUTEX_SHARED_UNLOCK is set. + */ + if (!old) + new = FUTEX_SHARED + 1; + else if ((old & FUTEX_SHARED) && + !(old & (FUTEX_FLAGS_MASK|FUTEX_SHARED_UNLOCK))) + new = old + 1; + else + break; + val = futex_cmpxchg(futex, old, new); + if (val == old) + goto out; + retry++; + } + + for (;;) { + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_lock_shared(futex, NULL, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_SLOCK, &stime, &etime); + } else { + ret = futex_lock_shared(futex, NULL, flags); + } + stat_inc(tid, STAT_SLOCKS); + if (ret >= 0) + break; + stat_inc(tid, STAT_SLOCKERRS); + } + /* + * Get # of sleeps & locking method + */ + stat_add(tid, STAT_SSLEEPS, ret >> 16); + if (ret & 0x100) + stat_inc(tid, STAT_SALONES); /* In alone mode */ + if (ret & 0x200) + stat_inc(tid, STAT_SGROUPS); /* In group mode */ + + ret &= 0xff; + if (!ret) + stat_inc(tid, STAT_STEALS); + else if (ret == 2) + stat_inc(tid, STAT_HANDOFFS); +out: + if (unlikely(retry)) + stat_add(tid, STAT_SLRETRIES, retry); +} + +static void tp_read_unlock(futex_t *futex, int tid) +{ + struct timespec stime, etime; + futex_t old, val; + int ret, retry = 0; + + val = atomic_dec_return(futex); + if (!(val & FUTEX_SHARED)) { + fprintf(stderr, + "tp_read_unlock: Incorrect futex value = 0x%x\n", val); + exit(1); + } + + for (;;) { + /* + * Return if not the last reader, not in shared locking + * mode or the unlock bit has been set. + */ + if ((val & (FUTEX_SCNT_MASK|FUTEX_SHARED_UNLOCK)) || + !(val & FUTEX_SHARED)) + return; + + if (val & ~FUTEX_SHARED_TID_MASK) { + /* + * Only one task that can set the FUTEX_SHARED_UNLOCK + * bit will do the unlock. + */ + old = futex_cmpxchg(futex, val, + val|FUTEX_SHARED_UNLOCK); + if (old == val) + break; + } else { + /* + * Try to clear the futex. + */ + old = futex_cmpxchg(futex, val, 0); + if (old == val) + goto out; + } + val = old; + retry++; + } + + if (timestat) { + clock_gettime(CLOCK_REALTIME, &stime); + ret = futex_unlock_shared(futex, flags); + clock_gettime(CLOCK_REALTIME, &etime); + systime_add(tid, TIME_SUNLK, &stime, &etime); + } else { + ret = futex_unlock_shared(futex, flags); + } + stat_inc(tid, STAT_SUNLOCKS); + if (ret < 0) + stat_inc(tid, STAT_SUNLKERRS); + else + stat_add(tid, STAT_WAKEUPS, ret); +out: + if (unlikely(retry)) + stat_add(tid, STAT_SURETRIES, retry); +} + +/* + * Glibc read/write lock + */ +static void gc_write_lock(futex_t *futex __maybe_unused, + int tid __maybe_unused) +{ + pthread_rwlock_wrlock(&rwlock); +} + +static void gc_write_unlock(futex_t *futex __maybe_unused, + int tid __maybe_unused) +{ + pthread_rwlock_unlock(&rwlock); +} + +static void gc_read_lock(futex_t *futex __maybe_unused, + int tid __maybe_unused) +{ + pthread_rwlock_rdlock(&rwlock); +} + +static void gc_read_unlock(futex_t *futex __maybe_unused, + int tid __maybe_unused) +{ + pthread_rwlock_unlock(&rwlock); +} + /**************************************************************************/ /* @@ -576,6 +1144,77 @@ static void *mutex_workerfn(void *arg) return NULL; } +static void *rwlock_workerfn(void *arg) +{ + long tid = (long)arg; + struct worker *w = &worker[tid]; + lock_fn_t rlock_fn = read_lock_fn; + lock_fn_t wlock_fn = write_lock_fn; + unlock_fn_t runlock_fn = read_unlock_fn; + unlock_fn_t wunlock_fn = write_unlock_fn; + + thread_id = gettid(); + counter = 0; + + atomic_dec_return(&threads_starting); + + /* + * Busy wait until asked to start + */ + while (!worker_start) + cpu_relax(); + + if (rthread_threshold >= 0) { + if (tid < rthread_threshold) { + do { + rlock_fn(w->futex, tid); + load(tid); + runlock_fn(w->futex, tid); + w->stats[STAT_SOPS]++; + csdelay(); + } while (!done); + } else { + do { + wlock_fn(w->futex, tid); + load(tid); + wunlock_fn(w->futex, tid); + w->stats[STAT_OPS]++; + csdelay(); + } while (!done); + } + goto out; + } + + while (!done) { + int rcnt = rpercent; + int wcnt = 100 - rcnt; + + do { + if (wcnt) { + wlock_fn(w->futex, tid); + load(tid); + wunlock_fn(w->futex, tid); + w->stats[STAT_OPS]++; + wcnt--; + csdelay(); + } + if (rcnt) { + rlock_fn(w->futex, tid); + load(tid); + runlock_fn(w->futex, tid); + w->stats[STAT_SOPS]++; + rcnt--; + csdelay(); + } + } while (!done && (rcnt + wcnt)); + } +out: + if (verbose) + printf("[thread %3ld (%d)] exited.\n", tid, thread_id); + atomic_inc_return(&threads_stopping); + return NULL; +} + static void create_threads(struct worker *w, pthread_attr_t *thread_attr, void *(*workerfn)(void *arg), long tid) { @@ -631,6 +1270,66 @@ static int futex_mutex_type(const char **ptype) return 0; } +static int futex_rwlock_type(const char **ptype) +{ + const char *type = *ptype; + + if (!strcasecmp(type, "WW")) { + *ptype = "WW"; + pfutex = &rwfutex.lock.val; + if (pwriter) { + read_lock_fn = ww2_read_lock; + read_unlock_fn = ww2_read_unlock; + write_lock_fn = ww2_write_lock; + write_unlock_fn = ww2_write_unlock; + } else { + read_lock_fn = ww_read_lock; + read_unlock_fn = ww_read_unlock; + write_lock_fn = ww_write_lock; + write_unlock_fn = ww_write_unlock; + } + } else if (!strcasecmp(type, "TP")) { + *ptype = "TP"; + read_lock_fn = tp_read_lock; + read_unlock_fn = tp_read_unlock; + write_lock_fn = tp_write_lock; + write_unlock_fn = tp_write_unlock; + + /* + * Check if TP futex is supported. + */ + futex_unlock(&global_futex, 0); + if (errno == ENOSYS) { + fprintf(stderr, + "\nTP futexes are not supported by the kernel!\n"); + return -1; + } + } else if (!strcasecmp(type, "GC")) { + pthread_rwlockattr_t *attr = NULL; + + *ptype = "GC"; + read_lock_fn = gc_read_lock; + read_unlock_fn = gc_read_unlock; + write_lock_fn = gc_write_lock; + write_unlock_fn = gc_write_unlock; + if (pwriter || fshared) { + pthread_rwlockattr_init(&rwlock_attr); + attr = &rwlock_attr; + rwlock_attr_inited = true; + if (fshared) + pthread_rwlockattr_setpshared(attr, true); + if (pwriter) + pthread_rwlockattr_setkind_np(attr, + PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP); + } + pthread_rwlock_init(&rwlock, attr); + rwlock_inited = true; + } else { + return -1; + } + return 0; +} + static void futex_test_driver(const char *futex_type, int (*proc_type)(const char **ptype), void *(*workerfn)(void *arg)) @@ -647,15 +1346,25 @@ static void futex_test_driver(const char *futex_type, */ const char *desc[STAT_NUM] = { [STAT_OPS] = "Total exclusive locking ops", + [STAT_SOPS] = "Total shared locking ops", [STAT_LOCKS] = "Exclusive lock futex calls", [STAT_UNLOCKS] = "Exclusive unlock futex calls", [STAT_SLEEPS] = "Exclusive lock sleeps", + [STAT_SLOCKS] = "Shared lock futex calls", + [STAT_SUNLOCKS] = "Shared unlock futex calls", + [STAT_SSLEEPS] = "Shared lock sleeps", [STAT_WAKEUPS] = "Process wakeups", [STAT_EAGAINS] = "EAGAIN lock errors", [STAT_HANDOFFS] = "Lock handoffs", [STAT_STEALS] = "Lock stealings", + [STAT_SLRETRIES] = "Shared lock retries", + [STAT_SURETRIES] = "Shared unlock retries", + [STAT_SALONES] = "Shared lock ops (alone mode)", + [STAT_SGROUPS] = "Shared lock ops (group mode)", [STAT_LOCKERRS] = "\nExclusive lock errors", [STAT_UNLKERRS] = "\nExclusive unlock errors", + [STAT_SLOCKERRS] = "\nShared lock errors", + [STAT_SUNLKERRS] = "\nShared unlock errors", }; if (exit_now) @@ -667,9 +1376,18 @@ static void futex_test_driver(const char *futex_type, } printf("\n=====================================\n"); - printf("[PID %d]: %d threads doing %s futex lockings (load=%d) for %d secs.\n\n", + printf("[PID %d]: %d threads doing %s futex lockings (load=%d) for %d secs.\n", getpid(), nthreads, futex_type, loadlat, nsecs); + if (xthread) { + /* + * Compute numbers of reader and writer threads. + */ + rthread_threshold = (rpercent * nthreads + 50)/100; + printf("\t\t{%d reader threads, %d writer threads}\n", + rthread_threshold, nthreads - rthread_threshold); + } + printf("\n"); init_stats(&throughput_stats); *pfutex = 0; @@ -739,11 +1457,15 @@ static void futex_test_driver(const char *futex_type, /* * Get a rounded estimate of the # of locking ops/sec. */ - u64 tp = (u64)worker[i].stats[STAT_OPS] * 1000000 / us; + u64 tp = (u64)(worker[i].stats[STAT_OPS] + + worker[i].stats[STAT_SOPS]) * 1000000 / us; for (j = 0; j < STAT_NUM; j++) total.stats[j] += worker[i].stats[j]; + for (j = 0; j < TIME_NUM; j++) + total.times[j] += worker[i].times[j]; + update_stats(&throughput_stats, tp); if (verbose) printf("[thread %3d] futex: %p [ %'ld ops/sec ]\n", @@ -767,21 +1489,41 @@ static void futex_test_driver(const char *futex_type, if (total.stats[STAT_UNLOCKS]) printf("Avg exclusive unlock syscall = %'ldns\n", total.times[TIME_UNLK]/total.stats[STAT_UNLOCKS]); + if (total.stats[STAT_SLOCKS]) + printf("Avg shared lock syscall = %'ldns\n", + total.times[TIME_SLOCK]/total.stats[STAT_SLOCKS]); + if (total.stats[STAT_SUNLOCKS]) + printf("Avg shared unlock syscall = %'ldns\n", + total.times[TIME_SUNLK]/total.stats[STAT_SUNLOCKS]); } printf("\nPercentages:\n"); if (total.stats[STAT_LOCKS]) printf("Exclusive lock futex calls = %.1f%%\n", stat_percent(&total, STAT_LOCKS, STAT_OPS)); + if (total.stats[STAT_SLOCKS]) + printf("Shared lock futex calls = %.1f%%\n", + stat_percent(&total, STAT_SLOCKS, STAT_SOPS)); if (total.stats[STAT_UNLOCKS]) printf("Exclusive unlock futex calls = %.1f%%\n", stat_percent(&total, STAT_UNLOCKS, STAT_OPS)); + if (total.stats[STAT_SUNLOCKS]) + printf("Shared unlock futex calls = %.1f%%\n", + stat_percent(&total, STAT_SUNLOCKS, STAT_SOPS)); if (total.stats[STAT_EAGAINS]) printf("EAGAIN lock errors = %.1f%%\n", - stat_percent(&total, STAT_EAGAINS, STAT_LOCKS)); + (double)total.stats[STAT_EAGAINS] * 100 / + (total.stats[STAT_LOCKS] + total.stats[STAT_SLOCKS])); if (total.stats[STAT_WAKEUPS]) printf("Process wakeups = %.1f%%\n", - stat_percent(&total, STAT_WAKEUPS, STAT_UNLOCKS)); + (double)total.stats[STAT_WAKEUPS] * 100 / + (total.stats[STAT_UNLOCKS] + + total.stats[STAT_SUNLOCKS])); + if (xthread) + printf("Reader operations = %.1f%%\n", + (double)total.stats[STAT_SOPS] * 100 / + (total.stats[STAT_OPS] + total.stats[STAT_SOPS])); + printf("\nPer-thread Locking Rates:\n"); printf("Avg = %'d ops/sec (+- %.2f%%)\n", (int)(avg + 0.5), @@ -789,28 +1531,55 @@ static void futex_test_driver(const char *futex_type, printf("Min = %'d ops/sec\n", (int)throughput_stats.min); printf("Max = %'d ops/sec\n", (int)throughput_stats.max); + /* + * Compute the averagge reader and writer locking operation rates + * with separate reader and writer threads. + */ + if (xthread) { + u64 tp; + + /* Reader stats */ + memset(&throughput_stats, 0, sizeof(throughput_stats)); + for (i = 0, tp = 0; i < rthread_threshold; i++) { + tp = (u64)worker[i].stats[STAT_SOPS] * 1000000 / us; + update_stats(&throughput_stats, tp); + } + avg = avg_stats(&throughput_stats); + stddev = stddev_stats(&throughput_stats); + printf("\nReader avg = %'d ops/sec (+- %.2f%%)\n", + (int)(avg + 0.5), rel_stddev_stats(stddev, avg)); + + /* Writer stats */ + memset(&throughput_stats, 0, sizeof(throughput_stats)); + for (tp = 0; i < (int)nthreads; i++) { + tp = (u64)worker[i].stats[STAT_OPS] * 1000000 / us; + update_stats(&throughput_stats, tp); + } + avg = avg_stats(&throughput_stats); + stddev = stddev_stats(&throughput_stats); + printf("Writer avg = %'d ops/sec (+- %.2f%%)\n", + (int)(avg + 0.5), rel_stddev_stats(stddev, avg)); + } + if (*pfutex != 0) printf("\nResidual futex value = 0x%x\n", *pfutex); /* Clear the workers area */ memset(worker, 0, sizeof(*worker) * nthreads); + + if (rwlock_inited) + pthread_rwlock_destroy(&rwlock); + if (rwlock_attr_inited) + pthread_rwlockattr_destroy(&rwlock_attr); } -int bench_futex_mutex(int argc, const char **argv, - const char *prefix __maybe_unused) +static void bench_futex_common(struct sigaction *act) { - struct sigaction act; - - argc = parse_options(argc, argv, mutex_options, - bench_futex_mutex_usage, 0); - if (argc) - goto err; - ncpus = sysconf(_SC_NPROCESSORS_ONLN); - sigfillset(&act.sa_mask); - act.sa_sigaction = toggle_done; - sigaction(SIGINT, &act, NULL); + sigfillset(&act->sa_mask); + act->sa_sigaction = toggle_done; + sigaction(SIGINT, act, NULL); if (!nthreads) nthreads = ncpus; @@ -828,6 +1597,19 @@ int bench_futex_mutex(int argc, const char **argv, if (!fshared) flags = FUTEX_PRIVATE_FLAG; +} + +int bench_futex_mutex(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + struct sigaction act; + + argc = parse_options(argc, argv, mutex_options, + bench_futex_mutex_usage, 0); + if (argc) + goto err; + + bench_futex_common(&act); if (!ftype || !strcmp(ftype, "all")) { futex_test_driver("WW", futex_mutex_type, mutex_workerfn); @@ -842,3 +1624,31 @@ int bench_futex_mutex(int argc, const char **argv, usage_with_options(bench_futex_mutex_usage, mutex_options); exit(EXIT_FAILURE); } + +int bench_futex_rwlock(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + struct sigaction act; + + argc = parse_options(argc, argv, rwlock_options, + bench_futex_rwlock_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + bench_futex_common(&act); + + if (!ftype || !strcmp(ftype, "all")) { + futex_test_driver("WW", futex_rwlock_type, rwlock_workerfn); + futex_test_driver("TP", futex_rwlock_type, rwlock_workerfn); + futex_test_driver("GC", futex_rwlock_type, rwlock_workerfn); + } else { + futex_test_driver(ftype, futex_rwlock_type, rwlock_workerfn); + } + free(worker_alloc); + return 0; +err: + usage_with_options(bench_futex_rwlock_usage, rwlock_options); + exit(EXIT_FAILURE); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index be1eb7a..199cc77 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -88,8 +88,10 @@ } #ifndef FUTEX_LOCK -#define FUTEX_LOCK 0xff /* Disable the use of TP futexes */ -#define FUTEX_UNLOCK 0xff +#define FUTEX_LOCK 0xff /* Disable the use of TP futexes */ +#define FUTEX_UNLOCK 0xff +#define FUTEX_LOCK_SHARED 0xff +#define FUTEX_UNLOCK_SHARED 0xff #endif /* FUTEX_LOCK */ /** @@ -110,6 +112,24 @@ return futex(uaddr, FUTEX_UNLOCK, 0, NULL, NULL, 0, opflags); } +/** + * futex_lock_shared() - shared locking of TP futex + */ +static inline int +futex_lock_shared(u_int32_t *uaddr, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_LOCK_SHARED, 0, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_shared() - shared unlocking of TP futex + */ +static inline int +futex_unlock_shared(u_int32_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_SHARED, 0, NULL, NULL, 0, opflags); +} + #ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP #include <pthread.h> static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index bf4418d..424a014 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -64,6 +64,7 @@ struct bench { /* pi-futexes */ { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi }, { "mutex", "Benchmark for mutex locks using futexes", bench_futex_mutex }, + { "rwlock", "Benchmark for rwlocks using futexes", bench_futex_rwlock }, { "all", "Run all futex benchmarks", NULL }, { NULL, NULL, NULL } }; -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-doc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html