On Sat, Apr 16, 2022 at 11:15:16PM +0200, Borislav Petkov wrote: > > but wouldn't it be lovely if we could start moving towards a model > > where we can just inline 'memset' and 'memcpy' like this? > > Yeah, inlined insns without even a CALL insn would be the most optimal > thing to do. Here's a diff ontop of Mark's patch, it inlines clear_user() everywhere and it seems to patch properly vmlinux has: ffffffff812ba53e: 0f 87 f8 fd ff ff ja ffffffff812ba33c <load_elf_binary+0x60c> ffffffff812ba544: 90 nop ffffffff812ba545: 90 nop ffffffff812ba546: 90 nop ffffffff812ba547: 4c 89 e7 mov %r12,%rdi ffffffff812ba54a: f3 aa rep stos %al,%es:(%rdi) ffffffff812ba54c: 90 nop ffffffff812ba54d: 90 nop ffffffff812ba54e: 90 nop ffffffff812ba54f: 90 nop ffffffff812ba550: 90 nop ffffffff812ba551: 90 nop which is the rep; stosb and when I boot my guest, it has been patched to: 0xffffffff812ba544 <+2068>: 0f 01 cb stac 0xffffffff812ba547 <+2071>: 4c 89 e7 mov %r12,%rdi 0xffffffff812ba54a <+2074>: e8 71 9a 15 00 call 0xffffffff81413fc0 <clear_user_rep_good> 0xffffffff812ba54f <+2079>: 0f 01 ca clac (stac and clac are also alternative-patched). And there's the call to rep_good because the guest doesn't advertize FRSM nor ERMS. Anyway, more playing with this later to make sure it really does what it should. --- diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index f78e2b3501a1..335d571e8a79 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -405,9 +405,6 @@ strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); -unsigned long __must_check clear_user(void __user *mem, unsigned long len); -unsigned long __must_check __clear_user(void __user *mem, unsigned long len); - #ifdef CONFIG_ARCH_HAS_COPY_MC unsigned long __must_check copy_mc_to_kernel(void *to, const void *from, unsigned len); @@ -429,6 +426,8 @@ extern struct movsl_mask { #define ARCH_HAS_NOCACHE_UACCESS 1 #ifdef CONFIG_X86_32 +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); # include <asm/uaccess_32.h> #else # include <asm/uaccess_64.h> diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 6a4995e4cfae..dc0f2bfc80a4 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -91,28 +91,34 @@ clear_user_rep_good(void __user *addr, unsigned long len); __must_check unsigned long clear_user_erms(void __user *addr, unsigned long len); -static __always_inline __must_check unsigned long -___clear_user(void __user *addr, unsigned long len) +static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) { - unsigned long ret; - /* * No memory constraint because it doesn't change any memory gcc * knows about. */ might_fault(); - alternative_call_2( - clear_user_original, - clear_user_rep_good, - X86_FEATURE_REP_GOOD, - clear_user_erms, - X86_FEATURE_ERMS, - ASM_OUTPUT2("=a" (ret), "=D" (addr), "=c" (len)), - "1" (addr), "2" (len) - : "%rdx", "cc"); - return ret; + stac(); + asm_inline volatile( + "1:" + ALTERNATIVE_3("rep stosb", + "call clear_user_erms", ALT_NOT(X86_FEATURE_FSRM), + "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS), + "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD)) + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : "+&c" (size), "+&D" (addr), ASM_CALL_CONSTRAINT + : "a" (0)); + + clac(); + return size; } -#define __clear_user(d, n) ___clear_user(d, n) +static __always_inline unsigned long clear_user(void __user *to, unsigned long n) +{ + if (access_ok(to, n)) + return __clear_user(to, n); + return n; +} #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index abe1f44ea422..b80f710a7f30 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -62,9 +62,7 @@ EXPORT_SYMBOL_GPL(clear_page_erms) * Output: * rax uncopied bytes or 0 if successful. */ - SYM_FUNC_START(clear_user_original) - ASM_STAC movq %rcx,%rax shrq $3,%rcx andq $7,%rax @@ -86,7 +84,7 @@ SYM_FUNC_START(clear_user_original) decl %ecx jnz 2b -3: ASM_CLAC +3: movq %rcx,%rax RET @@ -105,11 +103,8 @@ EXPORT_SYMBOL(clear_user_original) * Output: * rax uncopied bytes or 0 if successful. */ - SYM_FUNC_START(clear_user_rep_good) - ASM_STAC movq %rcx,%rdx - xorq %rax,%rax shrq $3,%rcx andq $7,%rdx @@ -118,7 +113,7 @@ SYM_FUNC_START(clear_user_rep_good) 1: rep stosb -3: ASM_CLAC +3: movq %rcx,%rax RET @@ -135,15 +130,13 @@ EXPORT_SYMBOL(clear_user_rep_good) * * Output: * rax uncopied bytes or 0 if successful. + * + * XXX: check for small sizes and call the original version. + * Benchmark it first though. */ - SYM_FUNC_START(clear_user_erms) - xorq %rax,%rax - ASM_STAC - 0: rep stosb - -3: ASM_CLAC +3: movq %rcx,%rax RET diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 3a2872c9c4a9..8bf04d95dc04 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -14,14 +14,6 @@ * Zero Userspace */ -unsigned long clear_user(void __user *to, unsigned long n) -{ - if (access_ok(to, n)) - return __clear_user(to, n); - return n; -} -EXPORT_SYMBOL(clear_user); - #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bd0c2c828940..ea48ee11521f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1019,6 +1019,9 @@ static const char *uaccess_safe_builtin[] = { "copy_mc_fragile_handle_tail", "copy_mc_enhanced_fast_string", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ + "clear_user_erms", + "clear_user_rep_good", + "clear_user_original", NULL }; -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette