From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> Separate ticket-lock into tspinlock.h and let generic spinlock support qspinlock or ticket-lock selected by CONFIG_ARCH_USE_QUEUED_SPINLOCKS config. Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> Signed-off-by: Guo Ren <guoren@xxxxxxxxxx> Cc: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> --- include/asm-generic/spinlock.h | 90 ++------------------------ include/asm-generic/spinlock_types.h | 14 ++-- include/asm-generic/tspinlock.h | 92 +++++++++++++++++++++++++++ include/asm-generic/tspinlock_types.h | 17 +++++ 4 files changed, 119 insertions(+), 94 deletions(-) create mode 100644 include/asm-generic/tspinlock.h create mode 100644 include/asm-generic/tspinlock_types.h diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h index fdfebcb050f4..4eca2488af38 100644 --- a/include/asm-generic/spinlock.h +++ b/include/asm-generic/spinlock.h @@ -1,92 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ - -/* - * 'Generic' ticket-lock implementation. - * - * It relies on atomic_fetch_add() having well defined forward progress - * guarantees under contention. If your architecture cannot provide this, stick - * to a test-and-set lock. - * - * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a - * sub-word of the value. This is generally true for anything LL/SC although - * you'd be hard pressed to find anything useful in architecture specifications - * about this. If your architecture cannot do this you might be better off with - * a test-and-set. - * - * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence - * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with - * a full fence after the spin to upgrade the otherwise-RCpc - * atomic_cond_read_acquire(). - * - * The implementation uses smp_cond_load_acquire() to spin, so if the - * architecture has WFE like instructions to sleep instead of poll for word - * modifications be sure to implement that (see ARM64 for example). - * - */ - #ifndef __ASM_GENERIC_SPINLOCK_H #define __ASM_GENERIC_SPINLOCK_H -#include <linux/atomic.h> -#include <asm-generic/spinlock_types.h> - -static __always_inline void arch_spin_lock(arch_spinlock_t *lock) -{ - u32 val = atomic_fetch_add(1<<16, lock); - u16 ticket = val >> 16; - - if (ticket == (u16)val) - return; - - /* - * atomic_cond_read_acquire() is RCpc, but rather than defining a - * custom cond_read_rcsc() here we just emit a full fence. We only - * need the prior reads before subsequent writes ordering from - * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we - * have no outstanding writes due to the atomic_fetch_add() the extra - * orderings are free. - */ - atomic_cond_read_acquire(lock, ticket == (u16)VAL); - smp_mb(); -} - -static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock) -{ - u32 old = atomic_read(lock); - - if ((old >> 16) != (old & 0xffff)) - return false; - - return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */ -} - -static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - u32 val = atomic_read(lock); - - smp_store_release(ptr, (u16)val + 1); -} - -static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - u32 val = atomic_read(lock); - - return ((val >> 16) != (val & 0xffff)); -} - -static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) -{ - u32 val = atomic_read(lock); - - return (s16)((val >> 16) - (val & 0xffff)) > 1; -} - -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return !arch_spin_is_locked(&lock); -} - +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS +#include <asm/qspinlock.h> #include <asm/qrwlock.h> +#else +#include <asm-generic/tspinlock.h> +#endif #endif /* __ASM_GENERIC_SPINLOCK_H */ diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spinlock_types.h index 8962bb730945..9875c1d058b3 100644 --- a/include/asm-generic/spinlock_types.h +++ b/include/asm-generic/spinlock_types.h @@ -3,15 +3,11 @@ #ifndef __ASM_GENERIC_SPINLOCK_TYPES_H #define __ASM_GENERIC_SPINLOCK_TYPES_H -#include <linux/types.h> -typedef atomic_t arch_spinlock_t; - -/* - * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the - * include. - */ +#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS +#include <asm-generic/qspinlock_types.h> #include <asm/qrwlock_types.h> - -#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) +#else +#include <asm-generic/tspinlock_types.h> +#endif #endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */ diff --git a/include/asm-generic/tspinlock.h b/include/asm-generic/tspinlock.h new file mode 100644 index 000000000000..def7b8f0f4f4 --- /dev/null +++ b/include/asm-generic/tspinlock.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * 'Generic' ticket-lock implementation. + * + * It relies on atomic_fetch_add() having well defined forward progress + * guarantees under contention. If your architecture cannot provide this, stick + * to a test-and-set lock. + * + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a + * sub-word of the value. This is generally true for anything LL/SC although + * you'd be hard pressed to find anything useful in architecture specifications + * about this. If your architecture cannot do this you might be better off with + * a test-and-set. + * + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence + * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with + * a full fence after the spin to upgrade the otherwise-RCpc + * atomic_cond_read_acquire(). + * + * The implementation uses smp_cond_load_acquire() to spin, so if the + * architecture has WFE like instructions to sleep instead of poll for word + * modifications be sure to implement that (see ARM64 for example). + * + */ + +#ifndef __ASM_GENERIC_TSPINLOCK_H +#define __ASM_GENERIC_TSPINLOCK_H + +#include <linux/atomic.h> +#include <asm-generic/tspinlock_types.h> + +static __always_inline void arch_spin_lock(arch_spinlock_t *lock) +{ + u32 val = atomic_fetch_add(1<<16, lock); + u16 ticket = val >> 16; + + if (ticket == (u16)val) + return; + + /* + * atomic_cond_read_acquire() is RCpc, but rather than defining a + * custom cond_read_rcsc() here we just emit a full fence. We only + * need the prior reads before subsequent writes ordering from + * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we + * have no outstanding writes due to the atomic_fetch_add() the extra + * orderings are free. + */ + atomic_cond_read_acquire(lock, ticket == (u16)VAL); + smp_mb(); +} + +static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock) +{ + u32 old = atomic_read(lock); + + if ((old >> 16) != (old & 0xffff)) + return false; + + return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */ +} + +static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + u32 val = atomic_read(lock); + + smp_store_release(ptr, (u16)val + 1); +} + +static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + u32 val = atomic_read(lock); + + return ((val >> 16) != (val & 0xffff)); +} + +static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + u32 val = atomic_read(lock); + + return (s16)((val >> 16) - (val & 0xffff)) > 1; +} + +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return !arch_spin_is_locked(&lock); +} + +#include <asm/qrwlock.h> + +#endif /* __ASM_GENERIC_TSPINLOCK_H */ diff --git a/include/asm-generic/tspinlock_types.h b/include/asm-generic/tspinlock_types.h new file mode 100644 index 000000000000..ca3ea5acd172 --- /dev/null +++ b/include/asm-generic/tspinlock_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_GENERIC_TSPINLOCK_TYPES_H +#define __ASM_GENERIC_TSPINLOCK_TYPES_H + +#include <linux/types.h> +typedef atomic_t arch_spinlock_t; + +/* + * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the + * include. + */ +#include <asm/qrwlock_types.h> + +#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) + +#endif /* __ASM_GENERIC_TSPINLOCK_TYPES_H */ -- 2.36.1