Side note, if you want to play around with the user copy routines (or maybe Borislav wants to), I have a patch that handles a couple of common cases statically. It requires that we inline copy_to/from_user() in order to get constant size information, but almost all other architectures do that anyway, and it's not as horrid as it used to be with the current access_ok() that doesn't need to do that nasty dynamic task size check. In particular, it should help with copying structures - notably the 'stat' structure in cp_new_stat(). The attached patch is entirely untested, except for me checking code generation for some superficial sanity in a couple of places. I'm not convinced that len >= 64 && !(len & 7) is necessarily the "correct" option, but I resurrected an older patch for this, and decided to use that as the "this is what rep_movs_alternative would do anyway" test. And obviously I expect that FSRM also does ok with "rep movsq", even if technically "movsb" is the simpler case (because it doesn't have the alignment issues that "rep movsq" has). Linus
arch/x86/include/asm/uaccess_64.h | 41 +++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f2c02e4469cc..017665052036 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -12,6 +12,9 @@ #include <asm/cpufeatures.h> #include <asm/page.h> +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER + #ifdef CONFIG_ADDRESS_MASKING /* * Mask out tag bits from the address. @@ -101,22 +104,36 @@ static inline bool __access_ok(const void __user *ptr, unsigned long size) __must_check unsigned long rep_movs_alternative(void *to, const void *from, unsigned len); +#define statically_true(x) (__builtin_constant_p(x) && (x)) + static __always_inline __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned long len) { stac(); - /* - * If CPU has FSRM feature, use 'rep movs'. - * Otherwise, use rep_movs_alternative. - */ - asm volatile( - "1:\n\t" - ALTERNATIVE("rep movsb", - "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) - "2:\n" - _ASM_EXTABLE_UA(1b, 2b) - :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT - : : "memory", "rax"); + if (statically_true(len >= 64 && !(len & 7))) { + len >>= 3; + asm volatile( + "\n1:\t" + "rep movsq" + "\n2:\n" + _ASM_EXTABLE_UA(1b, 2b) + :"+c" (len), "+D" (to), "+S" (from) + : :"memory"); + len <<= 3; + } else { + /* + * If CPU has FSRM feature, use 'rep movs'. + * Otherwise, use rep_movs_alternative. + */ + asm volatile( + "1:\n\t" + ALTERNATIVE("rep movsb", + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT + : : "memory", "rax"); + } clac(); return len; }