On Wed, Jun 26, 2024 at 03:03:44PM +0200, Alexandre Ghiti wrote: > xchg() uses amoswap.X instructions from Zabha but still uses > the LR/SC acquire/release semantics which require barriers. > > Let's improve that by using proper amoswap acquire/release semantics in > order to avoid any of those barriers. > > Suggested-by: Andrea Parri <andrea@xxxxxxxxxxxx> > Signed-off-by: Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> > --- > arch/riscv/include/asm/cmpxchg.h | 35 +++++++++++++------------------- > 1 file changed, 14 insertions(+), 21 deletions(-) > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index eb35e2d30a97..0e57d5fbf227 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -11,8 +11,8 @@ > #include <asm/fence.h> > #include <asm/alternative.h> > > -#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ > - swap_append, r, p, n) \ > +#define __arch_xchg_masked(sc_sfx, swap_sfx, sc_prepend, sc_append, \ > + r, p, n) \ > ({ \ > __label__ zabha, end; \ > \ > @@ -31,7 +31,7 @@ > ulong __rc; \ > \ > __asm__ __volatile__ ( \ > - prepend \ > + sc_prepend \ > "0: lr.w %0, %2\n" \ > " and %1, %0, %z4\n" \ > " or %1, %1, %z3\n" \ > @@ -48,9 +48,7 @@ > zabha: \ > if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ > __asm__ __volatile__ ( \ > - prepend \ > " amoswap" swap_sfx " %0, %z2, %1\n" \ > - swap_append \ > : "=&r" (r), "+A" (*(p)) \ > : "rJ" (n) \ > : "memory"); \ > @@ -58,19 +56,17 @@ zabha: \ > end:; \ > }) > > -#define __arch_xchg(sfx, prepend, append, r, p, n) \ > +#define __arch_xchg(sfx, r, p, n) \ > ({ \ > __asm__ __volatile__ ( \ > - prepend \ > " amoswap" sfx " %0, %2, %1\n" \ > - append \ > : "=r" (r), "+A" (*(p)) \ > : "r" (n) \ > : "memory"); \ > }) > > -#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ > - sc_append, swap_append) \ > +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, \ > + sc_prepend, sc_append) \ > ({ \ > __typeof__(ptr) __ptr = (ptr); \ > __typeof__(*(__ptr)) __new = (new); \ > @@ -79,21 +75,19 @@ end:; \ > switch (sizeof(*__ptr)) { \ > case 1: \ > __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ > - prepend, sc_append, swap_append, \ > + sc_prepend, sc_append, \ > __ret, __ptr, __new); \ > break; \ > case 2: \ > __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ > - prepend, sc_append, swap_append, \ > + sc_prepend, sc_append, \ > __ret, __ptr, __new); \ > break; \ > case 4: \ > - __arch_xchg(".w" swap_sfx, prepend, swap_append, \ > - __ret, __ptr, __new); \ > + __arch_xchg(".w" swap_sfx, __ret, __ptr, __new); \ > break; \ > case 8: \ > - __arch_xchg(".d" swap_sfx, prepend, swap_append, \ > - __ret, __ptr, __new); \ > + __arch_xchg(".d" swap_sfx, __ret, __ptr, __new); \ > break; \ > default: \ > BUILD_BUG(); \ > @@ -102,17 +96,16 @@ end:; \ > }) > > #define arch_xchg_relaxed(ptr, x) \ > - _arch_xchg(ptr, x, "", "", "", "", "") > + _arch_xchg(ptr, x, "", "", "", "") > > #define arch_xchg_acquire(ptr, x) \ > - _arch_xchg(ptr, x, "", "", "", \ > - RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) > + _arch_xchg(ptr, x, "", ".aq", "", RISCV_ACQUIRE_BARRIER) > > #define arch_xchg_release(ptr, x) \ > - _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") > + _arch_xchg(ptr, x, "", ".rl", RISCV_RELEASE_BARRIER, "") > > #define arch_xchg(ptr, x) \ > - _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") > + _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER) I actually see no reason for this patch, please see also my remarks /question on patch #4. Andrea