Peter Zijlstra <peterz@xxxxxxxxxxxxx> writes: > On Sun, Apr 02, 2023 at 10:22:28PM -0700, Ankur Arora wrote: >> Change clear_page*() to take a length parameter. >> Rename to clear_pages_*(). >> >> Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx> > >> diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S >> index ecbfb4dd3b01..6069acf6072f 100644 >> --- a/arch/x86/lib/clear_page_64.S >> +++ b/arch/x86/lib/clear_page_64.S > So it seems to me that clear_user_*() and clear_page_*() are now very > similar; is there really no way to de-duplicate all that? I'm not sure I see a good way to do that. The clear_page_*() variants are way simpler because they don't do alignment or uacess exception handling. The innermost loop in them could be unified -- but both clear_pages_rep() and clear_pages_erms() are pretty trivial. One place where it might be worth doing is clear_user_original() and clear_pages_orig(). These two have also diverged some (clear_pages_orig unrolls its loop and uses a register mov): inner loop in clear_pages_orig: .Lloop: decl %ecx #define PUT(x) movq %rax,x*8(%rdi) movq %rax,(%rdi) PUT(1) PUT(2) PUT(3) PUT(4) PUT(5) PUT(6) PUT(7) leaq 64(%rdi),%rdi jnz .Lloop nop inner loop in clear_user_original: .Lqwords: movq $0,(%rdi) lea 8(%rdi),%rdi dec %rcx jnz .Lqwords Maybe something like this? --- arch/x86/lib/clear_page_64.S | 35 +++++------------------------------ 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index 6069acf6072f..795a82214d99 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -28,36 +28,6 @@ SYM_FUNC_START(clear_pages_rep) SYM_FUNC_END(clear_pages_rep) EXPORT_SYMBOL_GPL(clear_pages_rep) -/* - * Original page zeroing loop. - * %rdi - page - * %esi - page-length - * - * Clobbers: %rax, %rcx, %rflags - */ -SYM_FUNC_START(clear_pages_orig) - movl %esi, %ecx - xorl %eax,%eax - shrl $6,%ecx - .p2align 4 -.Lloop: - decl %ecx -#define PUT(x) movq %rax,x*8(%rdi) - movq %rax,(%rdi) - PUT(1) - PUT(2) - PUT(3) - PUT(4) - PUT(5) - PUT(6) - PUT(7) - leaq 64(%rdi),%rdi - jnz .Lloop - nop - RET -SYM_FUNC_END(clear_pages_orig) -EXPORT_SYMBOL_GPL(clear_pages_orig) - /* * Zero a page. * %rdi - page @@ -92,6 +62,9 @@ SYM_FUNC_START(clear_user_original) jz .Lrest_bytes # do the qwords first +SYM_INNER_LABEL(clear_pages_orig, SYM_L_GLOBAL) + movl %esi, %ecx + shr $3,%rcx .p2align 4 .Lqwords: movq $0,(%rdi) @@ -135,6 +108,8 @@ SYM_FUNC_START(clear_user_original) SYM_FUNC_END(clear_user_original) EXPORT_SYMBOL(clear_user_original) +EXPORT_SYMBOL_GPL(clear_pages_orig) + /* * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is * present.