On Sun, Sep 10, 2023 at 04:28:56AM -0400, guoren@xxxxxxxxxx wrote: > From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> > > Move ticket-lock definition into an independent file. This is the > preparation for the next combo spinlock of riscv. > > Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> > Signed-off-by: Guo Ren <guoren@xxxxxxxxxx> > --- > include/asm-generic/spinlock.h | 87 +--------------------- > include/asm-generic/ticket_spinlock.h | 103 ++++++++++++++++++++++++++ > 2 files changed, 104 insertions(+), 86 deletions(-) > create mode 100644 include/asm-generic/ticket_spinlock.h > > diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h > index 4773334ee638..970590baf61b 100644 > --- a/include/asm-generic/spinlock.h > +++ b/include/asm-generic/spinlock.h > @@ -1,94 +1,9 @@ > /* SPDX-License-Identifier: GPL-2.0 */ > > -/* > - * 'Generic' ticket-lock implementation. > - * > - * It relies on atomic_fetch_add() having well defined forward progress > - * guarantees under contention. If your architecture cannot provide this, stick > - * to a test-and-set lock. > - * > - * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a > - * sub-word of the value. This is generally true for anything LL/SC although > - * you'd be hard pressed to find anything useful in architecture specifications > - * about this. If your architecture cannot do this you might be better off with > - * a test-and-set. > - * > - * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence > - * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with > - * a full fence after the spin to upgrade the otherwise-RCpc > - * atomic_cond_read_acquire(). > - * > - * The implementation uses smp_cond_load_acquire() to spin, so if the > - * architecture has WFE like instructions to sleep instead of poll for word > - * modifications be sure to implement that (see ARM64 for example). > - * > - */ > - > #ifndef __ASM_GENERIC_SPINLOCK_H > #define __ASM_GENERIC_SPINLOCK_H > > -#include <linux/atomic.h> > -#include <asm-generic/spinlock_types.h> > - > -static __always_inline void arch_spin_lock(arch_spinlock_t *lock) > -{ > - u32 val = atomic_fetch_add(1<<16, &lock->val); > - u16 ticket = val >> 16; > - > - if (ticket == (u16)val) > - return; > - > - /* > - * atomic_cond_read_acquire() is RCpc, but rather than defining a > - * custom cond_read_rcsc() here we just emit a full fence. We only > - * need the prior reads before subsequent writes ordering from > - * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we > - * have no outstanding writes due to the atomic_fetch_add() the extra > - * orderings are free. > - */ > - atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL); > - smp_mb(); > -} > - > -static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock) > -{ > - u32 old = atomic_read(&lock->val); > - > - if ((old >> 16) != (old & 0xffff)) > - return false; > - > - return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */ > -} > - > -static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) > -{ > - u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); > - u32 val = atomic_read(&lock->val); > - > - smp_store_release(ptr, (u16)val + 1); > -} > - > -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) > -{ > - u32 val = lock.val.counter; > - > - return ((val >> 16) == (val & 0xffff)); > -} > - > -static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock) > -{ > - arch_spinlock_t val = READ_ONCE(*lock); > - > - return !arch_spin_value_unlocked(val); > -} > - > -static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) > -{ > - u32 val = atomic_read(&lock->val); > - > - return (s16)((val >> 16) - (val & 0xffff)) > 1; > -} > - > +#include <asm-generic/ticket_spinlock.h> > #include <asm/qrwlock.h> > > #endif /* __ASM_GENERIC_SPINLOCK_H */ > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h > new file mode 100644 > index 000000000000..cfcff22b37b3 > --- /dev/null > +++ b/include/asm-generic/ticket_spinlock.h > @@ -0,0 +1,103 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +/* > + * 'Generic' ticket-lock implementation. > + * > + * It relies on atomic_fetch_add() having well defined forward progress > + * guarantees under contention. If your architecture cannot provide this, stick > + * to a test-and-set lock. > + * > + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a > + * sub-word of the value. This is generally true for anything LL/SC although > + * you'd be hard pressed to find anything useful in architecture specifications > + * about this. If your architecture cannot do this you might be better off with > + * a test-and-set. > + * > + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence > + * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with > + * a full fence after the spin to upgrade the otherwise-RCpc > + * atomic_cond_read_acquire(). > + * > + * The implementation uses smp_cond_load_acquire() to spin, so if the > + * architecture has WFE like instructions to sleep instead of poll for word > + * modifications be sure to implement that (see ARM64 for example). > + * > + */ > + > +#ifndef __ASM_GENERIC_TICKET_SPINLOCK_H > +#define __ASM_GENERIC_TICKET_SPINLOCK_H > + > +#include <linux/atomic.h> > +#include <asm-generic/spinlock_types.h> > + > +static __always_inline void ticket_spin_lock(arch_spinlock_t *lock) > +{ > + u32 val = atomic_fetch_add(1<<16, &lock->val); > + u16 ticket = val >> 16; > + > + if (ticket == (u16)val) > + return; > + > + /* > + * atomic_cond_read_acquire() is RCpc, but rather than defining a > + * custom cond_read_rcsc() here we just emit a full fence. We only > + * need the prior reads before subsequent writes ordering from > + * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we > + * have no outstanding writes due to the atomic_fetch_add() the extra > + * orderings are free. > + */ > + atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL); > + smp_mb(); > +} > + > +static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock) > +{ > + u32 old = atomic_read(&lock->val); > + > + if ((old >> 16) != (old & 0xffff)) > + return false; > + > + return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */ > +} > + > +static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock) > +{ > + u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); > + u32 val = atomic_read(&lock->val); > + > + smp_store_release(ptr, (u16)val + 1); > +} > + > +static __always_inline int ticket_spin_value_unlocked(arch_spinlock_t lock) > +{ > + u32 val = lock.val.counter; > + > + return ((val >> 16) == (val & 0xffff)); > +} > + > +static __always_inline int ticket_spin_is_locked(arch_spinlock_t *lock) > +{ > + arch_spinlock_t val = READ_ONCE(*lock); > + > + return !ticket_spin_value_unlocked(val); > +} > + > +static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock) > +{ > + u32 val = atomic_read(&lock->val); > + > + return (s16)((val >> 16) - (val & 0xffff)) > 1; > +} > + > +/* > + * Remapping spinlock architecture specific functions to the corresponding > + * ticket spinlock functions. > + */ > +#define arch_spin_is_locked(l) ticket_spin_is_locked(l) > +#define arch_spin_is_contended(l) ticket_spin_is_contended(l) > +#define arch_spin_value_unlocked(l) ticket_spin_value_unlocked(l) > +#define arch_spin_lock(l) ticket_spin_lock(l) > +#define arch_spin_trylock(l) ticket_spin_trylock(l) > +#define arch_spin_unlock(l) ticket_spin_unlock(l) > + > +#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */ IIUC here most of the file was moved, and the above defines are introduced. I understand this pattern of creating the defines at the end of the file is the same used in "asm-generic/qspinlock.h" but I don't actually think this is a good way of doing this. Instead of having those defines here (and similarly on "asm-generic/qspinlock.h", I think it would be better to have those defines in the arch-specific header including them, which would allow the arch to include multiple spinlock versions and decide (compile-time, even run-time) which version to use. It gives decision power to the arch code. (And it would remove the need of undefining them on a later patch) There are only 3 archs which use this arch-generic qspinlock, so should not be a huge deal to have the defines copied there: # git grep asm-generic/qspinlock.h arch/loongarch/include/asm/qspinlock.h:16:#include <asm-generic/qspinlock.h> arch/sparc/include/asm/qspinlock.h:6:#include <asm-generic/qspinlock.h> arch/x86/include/asm/qspinlock.h:107:#include <asm-generic/qspinlock.h> Other than that: Reviewed-by: Leonardo Bras <leobras@xxxxxxxxxx>