> + asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ > + RISCV_ISA_EXT_ZACAS, 1) \ > + : : : : zacas); \ > + \ > __asm__ __volatile__ ( \ > prepend \ > "0: lr" lr_sfx " %0, %2\n" \ > " bne %0, %z3, 1f\n" \ > - " sc" sc_sfx " %1, %z4, %2\n" \ > + " sc" sc_cas_sfx " %1, %z4, %2\n" \ > " bnez %1, 0b\n" \ > append \ > "1:\n" \ > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ > : "rJ" (co o), "rJ" (n) \ > : "memory"); \ > + goto end; \ > + \ > +zacas: \ > + __asm__ __volatile__ ( \ > + prepend \ > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ > + append \ > + : "+&r" (r), "+A" (*(p)) \ > + : "rJ" (n) \ > + : "memory"); \ With this, a cmpxchg32() will result in something like amocas.w.rl a5,a4,(s1) fence rw,rw (cf. my remarks in patch #4); this will/should provide enough sync, but you might want to try the alternative and currently more common mapping for "fully-ordered AMO sequences", aka amocas.w.aqrl a5,a4,(s1) Similarly for cmpxchg64 and other sizes. Andrea