The following commit has been merged into the locking/core branch of tip: Commit-ID: aef95dac9ce4f271cc43195ffc175114ed934cbe Gitweb: https://git.kernel.org/tip/aef95dac9ce4f271cc43195ffc175114ed934cbe Author: Uros Bizjak <ubizjak@xxxxxxxxx> AuthorDate: Mon, 08 Apr 2024 11:13:58 +02:00 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitterDate: Tue, 09 Apr 2024 09:51:03 +02:00 locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64 Commit: 6d12c8d308e68 ("percpu: Wire up cmpxchg128") improved emulated cmpxchg8b_emu() library function to return success/failure in a ZF flag. Define arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64 targets to override the generic archy_try_cmpxchg() with an optimized target specific implementation that handles ZF flag. The assembly code at the call sites improves from: bf56d: e8 fc ff ff ff call cmpxchg8b_emu bf572: 8b 74 24 28 mov 0x28(%esp),%esi bf576: 89 c3 mov %eax,%ebx bf578: 89 d1 mov %edx,%ecx bf57a: 8b 7c 24 2c mov 0x2c(%esp),%edi bf57e: 89 f0 mov %esi,%eax bf580: 89 fa mov %edi,%edx bf582: 31 d8 xor %ebx,%eax bf584: 31 ca xor %ecx,%edx bf586: 09 d0 or %edx,%eax bf588: 0f 84 e3 01 00 00 je bf771 <...> to: bf572: e8 fc ff ff ff call cmpxchg8b_emu bf577: 0f 84 b6 01 00 00 je bf733 <...> Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Link: https://lore.kernel.org/r/20240408091547.90111-4-ubizjak@xxxxxxxxx --- arch/x86/include/asm/cmpxchg_32.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index fe40d06..9e0d330 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -122,6 +122,34 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 } #define arch_cmpxchg64_local arch_cmpxchg64_local +#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new) \ +({ \ + union __u64_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm volatile(ALTERNATIVE(LOCK_PREFIX_HERE \ + "call cmpxchg8b_emu", \ + "lock; cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ + CC_SET(e) \ + : CC_OUT(e) (ret), \ + [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), "S" (_ptr) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new) +{ + return __arch_try_cmpxchg64_emu(ptr, oldp, new); +} +#define arch_try_cmpxchg64 arch_try_cmpxchg64 + #endif #define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8)