The following commit has been merged into the locking/core branch of tip: Commit-ID: d26e46f6bf329cfcc469878709baa41d3bfc7cc3 Gitweb: https://git.kernel.org/tip/d26e46f6bf329cfcc469878709baa41d3bfc7cc3 Author: Uros Bizjak <ubizjak@xxxxxxxxx> AuthorDate: Sun, 14 Apr 2024 18:12:43 +02:00 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitterDate: Sun, 14 Apr 2024 22:40:54 +02:00 locking/atomic/x86: Introduce arch_try_cmpxchg64_local() Introduce arch_try_cmpxchg64_local() for 64-bit and 32-bit targets to improve code using cmpxchg64_local(). On 64-bit targets, the generated assembly improves from: 3e28: 31 c0 xor %eax,%eax 3e2a: 4d 0f b1 7d 00 cmpxchg %r15,0x0(%r13) 3e2f: 48 85 c0 test %rax,%rax 3e32: 0f 85 9f 00 00 00 jne 3ed7 <...> to: 3e28: 31 c0 xor %eax,%eax 3e2a: 4d 0f b1 7d 00 cmpxchg %r15,0x0(%r13) 3e2f: 0f 85 9f 00 00 00 jne 3ed4 <...> where a TEST instruction after CMPXCHG is saved. The improvements for 32-bit targets are even more noticeable, because double-word compare after CMPXCHG8B gets eliminated. Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Waiman Long <longman@xxxxxxxxxx> Link: https://lore.kernel.org/r/20240414161257.49145-1-ubizjak@xxxxxxxxx --- arch/x86/include/asm/cmpxchg_32.h | 34 ++++++++++++++++++++++++++++++- arch/x86/include/asm/cmpxchg_64.h | 6 +++++- 2 files changed, 40 insertions(+) diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 9e0d330..9dedc13 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -64,6 +64,11 @@ static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 ne return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX); } +static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new) +{ + return __arch_try_cmpxchg64(ptr, oldp, new,); +} + #ifdef CONFIG_X86_CMPXCHG64 #define arch_cmpxchg64 __cmpxchg64 @@ -72,6 +77,8 @@ static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 ne #define arch_try_cmpxchg64 __try_cmpxchg64 +#define arch_try_cmpxchg64_local __try_cmpxchg64_local + #else /* @@ -150,6 +157,33 @@ static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 } #define arch_try_cmpxchg64 arch_try_cmpxchg64 +#define __arch_try_cmpxchg64_emu_local(_ptr, _oldp, _new) \ +({ \ + union __u64_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm volatile(ALTERNATIVE("call cmpxchg8b_emu", \ + "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ + CC_SET(e) \ + : CC_OUT(e) (ret), \ + [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), "S" (_ptr) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new) +{ + return __arch_try_cmpxchg64_emu_local(ptr, oldp, new); +} +#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local + #endif #define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8) diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index c1d6cd5..5e24130 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -20,6 +20,12 @@ arch_try_cmpxchg((ptr), (po), (n)); \ }) +#define arch_try_cmpxchg64_local(ptr, po, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_try_cmpxchg_local((ptr), (po), (n)); \ +}) + union __u128_halves { u128 full; struct {