On Mon, May 15, 2023 at 09:57:04AM +0200, Peter Zijlstra wrote: > In order to replace cmpxchg_double() with the newly minted > cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg(). > > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> > --- > --- a/arch/x86/include/asm/percpu.h > +++ b/arch/x86/include/asm/percpu.h > @@ -210,6 +210,65 @@ do { \ > (typeof(_var))(unsigned long) pco_old__; \ > }) > > +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64) > +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ > +({ \ > + union { \ > + u64 var; \ > + struct { \ > + u32 low, high; \ > + }; \ > + } old__, new__; \ > + \ > + old__.var = _oval; \ > + new__.var = _nval; \ > + \ > + asm qual ("cmpxchg8b " __percpu_arg([var]) \ > + : [var] "+m" (_var), \ > + "+a" (old__.low), \ > + "+d" (old__.high) \ > + : "b" (new__.low), \ > + "c" (new__.high) \ > + : "memory"); \ > + \ > + old__.var; \ > +}) > + > +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) > +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) > +#endif > + > +#ifdef CONFIG_X86_64 > +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); > +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); > + > +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ > +({ \ > + union { \ > + u128 var; \ > + struct { \ > + u64 low, high; \ > + }; \ > + } old__, new__; \ > + \ > + old__.var = _oval; \ > + new__.var = _nval; \ > + \ > + asm qual ("cmpxchg16b " __percpu_arg([var]) \ > + : [var] "+m" (_var), \ > + "+a" (old__.low), \ > + "+d" (old__.high) \ > + : "b" (new__.low), \ > + "c" (new__.high) \ > + : "memory"); \ > + \ > + old__.var; \ > +}) > + > +#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) > +#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) > +#endif > + > /* > * this_cpu_read() makes gcc load the percpu variable every time it is > * accessed while this_cpu_read_stable() allows the value to be cached. Since this_cpu_cmpxchg*() is assumed to always be present (their usage is not guarded with system_has_cmpxchg*() it needs a fallback for then the instruction is not present. The below has been tested with clearcpuid=cx16; adding an obvious defect in the fallback implemention crashes the kernel, with the code as presented it boots. Build tested i386-defconfig. (the things we do for museum pieces :/) --- include/asm/percpu.h | 25 +++++++++++--------- lib/Makefile | 3 +- lib/cmpxchg16b_emu.S | 43 ++++++++++++++++++++-------------- lib/cmpxchg8b_emu.S | 63 +++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 90 insertions(+), 44 deletions(-) --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -210,7 +210,7 @@ do { \ (typeof(_var))(unsigned long) pco_old__; \ }) -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64) +#ifdef CONFIG_X86_32 #define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ ({ \ union { \ @@ -223,13 +223,14 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual ("cmpxchg8b " __percpu_arg([var]) \ + asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ : [var] "+m" (_var), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ "c" (new__.high) \ - : "memory"); \ + : "memory", "esi"); \ \ old__.var; \ }) @@ -254,13 +255,14 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual ("cmpxchg16b " __percpu_arg([var]) \ + asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ : [var] "+m" (_var), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ "c" (new__.high) \ - : "memory"); \ + : "memory", "rsi"); \ \ old__.var; \ }) @@ -400,12 +402,13 @@ do { \ bool __ret; \ typeof(pcp1) __o1 = (o1), __n1 = (n1); \ typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ - "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ - X86_FEATURE_CX16, \ - ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ - "+m" (pcp2), "+d" (__o2)), \ - "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ + asm volatile (ALTERNATIVE("leaq %P1, %%rsi; call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg(1), X86_FEATURE_CX16) \ + "setz %0" \ + : "=a" (__ret), "+m" (pcp1) \ + : "b" (__n1), "c" (__n2), \ + "a" (__o1), "d" (__o2) \ + : "memory", "rsi"); \ __ret; \ }) --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y) lib-y += strstr_32.o lib-y += string_32.o lib-y += memmove_32.o + lib-y += cmpxchg8b_emu.o ifneq ($(CONFIG_X86_CMPXCHG64),y) - lib-y += cmpxchg8b_emu.o atomic64_386_32.o + lib-y += atomic64_386_32.o endif else obj-y += iomap_copy_64.o --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -1,47 +1,54 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> #include <asm/percpu.h> +#include <asm/processor-flags.h> .text /* + * Emulate 'cmpxchg16b %gs:(%rsi)' + * * Inputs: * %rsi : memory location to compare * %rax : low 64 bits of old value * %rdx : high 64 bits of old value * %rbx : low 64 bits of new value * %rcx : high 64 bits of new value - * %al : Operation successful + * + * Notably this is not LOCK prefixed and is not safe against NMIs */ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) -# -# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not -# via the ZF. Caller will access %al to get result. -# -# Note that this is only useful for a cpuops operation. Meaning that we -# do *not* have a fully atomic operation but just an operation that is -# *atomic* on a single cpu (as provided by the this_cpu_xx class of -# macros). -# pushfq cli - cmpq PER_CPU_VAR((%rsi)), %rax - jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx - jne .Lnot_same + /* if (*ptr == old) */ + cmpq PER_CPU_VAR(0(%rsi)), %rax + jne .Lnot_same + cmpq PER_CPU_VAR(8(%rsi)), %rdx + jne .Lnot_same + + /* *ptr = new */ + movq %rbx, PER_CPU_VAR(0(%rsi)) + movq %rcx, PER_CPU_VAR(8(%rsi)) - movq %rbx, PER_CPU_VAR((%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + /* set ZF in EFLAGS to indicate success */ + orl $X86_EFLAGS_ZF, (%rsp) popfq - mov $1, %al RET .Lnot_same: + /* *ptr != old */ + + /* old = *ptr */ + movq PER_CPU_VAR(0(%rsi)), %rax + movq PER_CPU_VAR(8(%rsi)), %rdx + + /* clear ZF in EFLAGS to indicate failure */ + andl $(~X86_EFLAGS_ZF), (%rsp) + popfq - xor %al,%al RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -2,10 +2,16 @@ #include <linux/linkage.h> #include <asm/export.h> +#include <asm/percpu.h> +#include <asm/processor-flags.h> .text +#ifndef CONFIG_X86_CMPXCHG64 + /* + * Emulate 'cmpxchg8b (%esi)' on UP + * * Inputs: * %esi : memory location to compare * %eax : low 32 bits of old value @@ -15,32 +21,61 @@ */ SYM_FUNC_START(cmpxchg8b_emu) -# -# Emulate 'cmpxchg8b (%esi)' on UP except we don't -# set the whole ZF thing (caller will just compare -# eax:edx with the expected value) -# pushfl cli - cmpl (%esi), %eax - jne .Lnot_same - cmpl 4(%esi), %edx - jne .Lhalf_same + cmpl 0(%esi), %eax + jne .Lnot_same + cmpl 4(%esi), %edx + jne .Lnot_same + + movl %ebx, 0(%esi) + movl %ecx, 4(%esi) - movl %ebx, (%esi) - movl %ecx, 4(%esi) + orl $X86_EFLAGS_ZF, (%esp) popfl RET .Lnot_same: - movl (%esi), %eax -.Lhalf_same: - movl 4(%esi), %edx + movl 0(%esi), %eax + movl 4(%esi), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) popfl RET SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) + +#endif + +SYM_FUNC_START(this_cpu_cmpxchg8b_emu) + + pushfl + cli + + cmpl PER_CPU_VAR(0(%esi)), %eax + jne .Lnot_same2 + cmpl PER_CPU_VAR(4(%esi)), %edx + jne .Lnot_same2 + + movl %ebx, PER_CPU_VAR(0(%esi)) + movl %ecx, PER_CPU_VAR(4(%esi)) + + orl $X86_EFLAGS_ZF, (%esp) + + popfl + RET + +.Lnot_same2: + movl PER_CPU_VAR(0(%esi)), %eax + movl PER_CPU_VAR(4(%esi)), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) + + popfl + RET + +SYM_FUNC_END(this_cpu_cmpxchg8b_emu)