From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Because the qspinlock needs to touch a second cacheline; add a pending bit and allow a single in-word spinner before we punt to the second cacheline. Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Signed-off-by: Waiman Long <Waiman.Long@xxxxxx> --- include/asm-generic/qspinlock_types.h | 12 +++- kernel/locking/qspinlock.c | 121 +++++++++++++++++++++++++++------ 2 files changed, 110 insertions(+), 23 deletions(-) diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index f66f845..bd25081 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -39,8 +39,9 @@ typedef struct qspinlock { * Bitfields in the atomic value: * * 0- 7: locked byte - * 8- 9: tail index - * 10-31: tail cpu (+1) + * 8: pending + * 9-10: tail index + * 11-31: tail cpu (+1) */ #define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ << _Q_ ## type ## _OFFSET) @@ -48,7 +49,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 8 #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) -#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 1 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) #define _Q_TAIL_IDX_BITS 2 #define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) @@ -57,5 +62,6 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) #endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index b97a1ad..6467bfc 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -83,23 +83,97 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) return per_cpu_ptr(&mcs_nodes[idx], cpu); } +#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) + +/** + * trylock_pending - try to acquire queue spinlock using the pending bit + * @lock : Pointer to queue spinlock structure + * @pval : Pointer to value of the queue spinlock 32-bit word + * Return: 1 if lock acquired, 0 otherwise + */ +static inline int trylock_pending(struct qspinlock *lock, u32 *pval) +{ + u32 old, new, val = *pval; + + /* + * trylock || pending + * + * 0,0,0 -> 0,0,1 ; trylock + * 0,0,1 -> 0,1,1 ; pending + */ + for (;;) { + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + return 0; + + new = _Q_LOCKED_VAL; + if (val == new) + new |= _Q_PENDING_VAL; + + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + + *pval = val = old; + } + + /* + * we won the trylock + */ + if (new == _Q_LOCKED_VAL) + return 1; + + /* + * we're pending, wait for the owner to go away. + * + * *,1,1 -> *,1,0 + */ + while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK) + arch_mutex_cpu_relax(); + + /* + * take ownership and clear the pending bit. + * + * *,1,0 -> *,0,1 + */ + for (;;) { + new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; + + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + + val = old; + } + return 1; +} + /** * queue_spin_lock_slowpath - acquire the queue spinlock * @lock: Pointer to queue spinlock structure * @val: Current value of the queue spinlock 32-bit word * - * (queue tail, lock bit) + * (queue tail, pending bit, lock bit) + * + * fast : slow : unlock + * : : + * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0) + * : | ^--------.------. / : + * : v \ \ | : + * pending : (0,1,1) +--> (0,1,0) \ | : + * : | ^--' | | : + * : v | | : + * uncontended : (n,x,y) +--> (n,0,0) --' | : + * queue : | ^--' | : + * : v | : + * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : + * queue : ^--' : * - * fast : slow : unlock - * : : - * uncontended (0,0) --:--> (0,1) --------------------------------:--> (*,0) - * : | ^--------. / : - * : v \ | : - * uncontended : (n,x) --+--> (n,0) | : - * queue : | ^--' | : - * : v | : - * contended : (*,x) --+--> (*,0) -----> (*,1) ---' : - * queue : ^--' : + * The pending bit processing is in the trylock_pending() function + * whereas the uncontended and contended queue processing is in the + * queue_spin_lock_slowpath() function. * */ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val) @@ -110,6 +184,9 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val) BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + if (trylock_pending(lock, &val)) + return; /* Lock acquired */ + node = this_cpu_ptr(&mcs_nodes[0]); idx = node->count++; tail = encode_tail(smp_processor_id(), idx); @@ -119,15 +196,18 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val) node->next = NULL; /* + * we already touched the queueing cacheline; don't bother with pending + * stuff. + * * trylock || xchg(lock, node) * - * 0,0 -> 0,1 ; trylock - * p,x -> n,x ; prev = xchg(lock, node) + * 0,0,0 -> 0,0,1 ; trylock + * p,y,x -> n,y,x ; prev = xchg(lock, node) */ for (;;) { new = _Q_LOCKED_VAL; if (val) - new = tail | (val & _Q_LOCKED_MASK); + new = tail | (val & _Q_LOCKED_PENDING_MASK); old = atomic_cmpxchg(&lock->val, val, new); if (old == val) @@ -145,7 +225,7 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val) /* * if there was a previous node; link it and wait. */ - if (old & ~_Q_LOCKED_MASK) { + if (old & ~_Q_LOCKED_PENDING_MASK) { prev = decode_tail(old); ACCESS_ONCE(prev->next) = node; @@ -153,18 +233,19 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val) } /* - * we're at the head of the waitqueue, wait for the owner to go away. + * we're at the head of the waitqueue, wait for the owner & pending to + * go away. * - * *,x -> *,0 + * *,x,y -> *,0,0 */ - while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK) + while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK) arch_mutex_cpu_relax(); /* * claim the lock: * - * n,0 -> 0,1 : lock, uncontended - * *,0 -> *,1 : lock, contended + * n,0,0 -> 0,0,1 : lock, uncontended + * *,0,0 -> *,0,1 : lock, contended */ for (;;) { new = _Q_LOCKED_VAL; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html