On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote: > Now that Zacas is supported in the kernel, let's use the double word > atomic version of amocas to improve the SLUB allocator. > > Note that we have to select fixed registers, otherwise gcc fails to pick > even registers and then produces a reserved encoding which fails to > assemble. Oh, that's quite unfortunate... I guess we should try to get some new RISC-V inline assembly register constraints added to support register pairs. > > Signed-off-by: Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> > --- > arch/riscv/Kconfig | 1 + > arch/riscv/include/asm/cmpxchg.h | 39 ++++++++++++++++++++++++++++++++ > 2 files changed, 40 insertions(+) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index d3b0f92f92da..0bbaec0444d0 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -104,6 +104,7 @@ config RISCV > select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO > select HARDIRQS_SW_RESEND > select HAS_IOPORT if MMU > + select HAVE_ALIGNED_STRUCT_PAGE > select HAVE_ARCH_AUDITSYSCALL > select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP > select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 97b24da38897..608d98522557 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -289,4 +289,43 @@ end:; \ > arch_cmpxchg_release((ptr), (o), (n)); \ > }) > > +#ifdef CONFIG_RISCV_ISA_ZACAS This is also 64-bit only, so needs a CONFIG_64BIT check too. > + > +#define system_has_cmpxchg128() \ > + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) nit: let's let this stick out since we have 100 chars > + > +union __u128_halves { > + u128 full; > + struct { > + u64 low, high; Should we consider big endian too? > + }; > +}; > + > +#define __arch_cmpxchg128(p, o, n, cas_sfx) \ > +({ \ > + __typeof__(*(p)) __o = (o); \ > + union __u128_halves __hn = { .full = (n) }; \ > + union __u128_halves __ho = { .full = (__o) }; \ > + register unsigned long x6 asm ("x6") = __hn.low; \ > + register unsigned long x7 asm ("x7") = __hn.high; \ > + register unsigned long x28 asm ("x28") = __ho.low; \ > + register unsigned long x29 asm ("x29") = __ho.high; \ Can we use t1,t2,t3,t4 rather than the x names? > + \ > + __asm__ __volatile__ ( \ > + " amocas.q" cas_sfx " %0, %z3, %2" \ > + : "+&r" (x28), "+&r" (x29), "+A" (*(p)) \ > + : "rJ" (x6), "rJ" (x7) \ > + : "memory"); \ > + \ > + ((u128)x29 << 64) | x28; \ > +}) > + > +#define arch_cmpxchg128(ptr, o, n) \ > + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") > + > +#define arch_cmpxchg128_local(ptr, o, n) \ > + __arch_cmpxchg128((ptr), (o), (n), "") > + > +#endif /* CONFIG_RISCV_ISA_ZACAS */ > + > #endif /* _ASM_RISCV_CMPXCHG_H */ > -- > 2.39.2 Thanks, drew