Hi Christoph, On Thu, Sep 12, 2024 at 03:44:08PM -0700, Christoph Lameter via B4 Relay wrote: > diff --git a/arch/Kconfig b/arch/Kconfig > index 975dd22a2dbd..3c270f496231 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -1600,6 +1600,14 @@ config ARCH_HAS_KERNEL_FPU_SUPPORT > Architectures that select this option can run floating-point code in > the kernel, as described in Documentation/core-api/floating-point.rst. > > +config ARCH_HAS_ACQUIRE_RELEASE > + bool > + help > + Setting ARCH_HAS_ACQUIRE_RELEASE indicates that the architecture > + supports load acquire and release. Typically these are more effective > + than memory barriers. Code will prefer the use of load acquire and > + store release over memory barriers if this option is enabled. > + Unsurprisingly, I'd be in favour of making this unconditional rather than adding a new Kconfig option. Would that actually hurt any architectures where we care about the last few shreds of performance? > source "kernel/gcov/Kconfig" > > source "scripts/gcc-plugins/Kconfig" > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index a2f8ff354ca6..19e34fff145f 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -39,6 +39,7 @@ config ARM64 > select ARCH_HAS_PTE_DEVMAP > select ARCH_HAS_PTE_SPECIAL > select ARCH_HAS_HW_PTE_YOUNG > + select ARCH_HAS_ACQUIRE_RELEASE > select ARCH_HAS_SETUP_DMA_OPS > select ARCH_HAS_SET_DIRECT_MAP > select ARCH_HAS_SET_MEMORY > diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h > index d90d8ee29d81..a3fe9ee8edef 100644 > --- a/include/linux/seqlock.h > +++ b/include/linux/seqlock.h > @@ -23,6 +23,13 @@ > > #include <asm/processor.h> > > +#ifdef CONFIG_ARCH_HAS_ACQUIRE_RELEASE > +# define USE_LOAD_ACQUIRE true > +# define USE_COND_LOAD_ACQUIRE !IS_ENABLED(CONFIG_PREEMPT_RT) > +#else > +# define USE_LOAD_ACQUIRE false > +# define USE_COND_LOAD_ACQUIRE false > +#endif > /* > * The seqlock seqcount_t interface does not prescribe a precise sequence of > * read begin/retry/end. For readers, typically there is a call to > @@ -132,6 +139,17 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) > #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) > #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) > > +static __always_inline unsigned __seqprop_load_sequence(const seqcount_t *s, bool acquire) > +{ > + if (!acquire || !USE_LOAD_ACQUIRE) > + return READ_ONCE(s->sequence); > + > + if (USE_COND_LOAD_ACQUIRE) > + return smp_cond_load_acquire((unsigned int *)&s->sequence, (s->sequence & 1) == 0); This looks wrong to me. The conditional expression passed to smp_cond_load_acquire() should be written in terms of 'VAL', otherwise you're introducing an additional non-atomic access to the sequence counter. Will