Re: [patch 02/14] tmpfs: fix regressions from wider use of ZERO_PAGE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Apr 16, 2022 at 03:07:47PM +0100, Mark Hemment wrote:
> I've done a skeleton implementation of alternative __clear_user() based on 
> CPU features.

Cool!

Just a couple of quick notes - more indepth look next week.

> It has three versions of __clear_user();
> o __clear_user_original() - similar to the 'standard' __clear_user()
> o __clear_user_rep_good() - using resp stos{qb} when CPU has 'rep_good'
> o __clear_user_erms() - using 'resp stosb' when CPU has 'erms'

you also need a _fsrm() one which checks X86_FEATURE_FSRM. That one
should simply do rep; stosb regardless of the size. For that you can
define an alternative_call_3 similar to how the _2 variant is defined.

> Not claiming the implementation is ideal, but might be a useful starting
> point for someone.
> Patch is against 5.18.0-rc2.
> Only basic sanity testing done.
> 
> Simple performance testing done for large sizes, on a system (Intel E8400) 
> which has rep_good but not erms;
> # dd if=/dev/zero of=/dev/null bs=16384 count=10000
> o *_original() - ~14.2GB/s.  Same as the 'standard' __clear_user().
> o *_rep_good() - same throughput as *_original().
> o *_erms()     - ~12.2GB/s (expected on a system without erms).

Right.

I have a couple of boxes too - I can run the benchmarks on them too so
we should have enough perf data points eventually.

> diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
> index fbcfec4dc4cc..373ed6be7a8d 100644
> --- a/arch/x86/include/asm/asm.h
> +++ b/arch/x86/include/asm/asm.h
> @@ -132,6 +132,35 @@
>  /* Exception table entry */
>  #ifdef __ASSEMBLY__
>  
> +# define UNDEFINE_EXTABLE_TYPE_REG \
> +	.purgem extable_type_reg ;
> +
> +# define DEFINE_EXTABLE_TYPE_REG \
> +	.macro extable_type_reg type:req reg:req ;			\

I love that macro PeterZ!</sarcasm>

> +	.set .Lfound, 0	;						\
> +	.set .Lregnr, 0 ;						\
> +	.irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,	\
> +	     r14,r15 ;							\
> +	.ifc \reg, %\rs ;						\
> +	.set .Lfound, .Lfound+1 ;					\
> +	.long \type + (.Lregnr << 8) ;					\
> +	.endif ;							\
> +	.set .Lregnr, .Lregnr+1 ;					\
> +	.endr ;								\
> +	.set .Lregnr, 0 ;						\
> +	.irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d, \
> +	     r13d,r14d,r15d ;						\
> +	.ifc \reg, %\rs ;						\
> +	.set .Lfound, .Lfound+1 ;					\
> +	.long \type + (.Lregnr << 8) ;					\
> +	.endif ;							\
> +	.set .Lregnr, .Lregnr+1 ;					\
> +	.endr ;								\
> +	.if (.Lfound != 1) ;						\
> +	.error "extable_type_reg: bad register argument" ;		\
> +	.endif ;							\
> +	.endm ;
> +
>  # define _ASM_EXTABLE_TYPE(from, to, type)			\
>  	.pushsection "__ex_table","a" ;				\
>  	.balign 4 ;						\
> @@ -140,6 +169,16 @@
>  	.long type ;						\
>  	.popsection
>  
> +# define _ASM_EXTABLE_TYPE_REG(from, to, type1, reg1)		\
> +	.pushsection "__ex_table","a" ;				\
> +	.balign 4 ;						\
> +	.long (from) - . ;					\
> +	.long (to) - . ;					\
> +	DEFINE_EXTABLE_TYPE_REG					\
> +	extable_type_reg reg=reg1, type=type1 ;			\
> +	UNDEFINE_EXTABLE_TYPE_REG				\
> +	.popsection
> +
>  # ifdef CONFIG_KPROBES
>  #  define _ASM_NOKPROBE(entry)					\
>  	.pushsection "_kprobe_blacklist","aw" ;			\
> diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
> index 45697e04d771..6a4995e4cfae 100644
> --- a/arch/x86/include/asm/uaccess_64.h
> +++ b/arch/x86/include/asm/uaccess_64.h
> @@ -79,4 +79,40 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
>  	kasan_check_write(dst, size);
>  	return __copy_user_flushcache(dst, src, size);
>  }
> +
> +/*
> + * Zero Userspace.
> + */
> +
> +__must_check unsigned long
> +clear_user_original(void __user *addr, unsigned long len);
> +__must_check unsigned long
> +clear_user_rep_good(void __user *addr, unsigned long len);
> +__must_check unsigned long
> +clear_user_erms(void __user *addr, unsigned long len);
> +
> +static __always_inline __must_check unsigned long
> +___clear_user(void __user *addr, unsigned long len)
> +{
> +	unsigned long	ret;
> +
> +	/*
> +	 * No memory constraint because it doesn't change any memory gcc
> +	 * knows about.
> +	 */
> +
> +	might_fault();

I think you could do the stac(); clac() sandwich around the
alternative_call_2 here and remove the respective calls from the asm.

> +	alternative_call_2(
> +		clear_user_original,
> +		clear_user_rep_good,
> +		X86_FEATURE_REP_GOOD,
> +		clear_user_erms,
> +		X86_FEATURE_ERMS,
> +		ASM_OUTPUT2("=a" (ret), "=D" (addr), "=c" (len)),

You can do here:

                           : "+&c" (size), "+&D" (addr)
                           :: "eax");

because size and addr need to be earlyclobbers:

  e0a96129db57 ("x86: use early clobbers in usercopy*.c")

and then simply return size;

The asm modifies it anyway - no need for ret.

In any case, thanks for doing that!

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux