Change clear_page*() to take a length parameter. Rename to clear_pages_*(). Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx> --- clear_page() is now defined in terms of clear_pages(). This means that all clear_page() callsites -- which are more numerous -- would use an additional register now. Opinions on if that's worth optimizing? --- arch/x86/include/asm/page_64.h | 18 ++++++++------ arch/x86/lib/clear_page_64.S | 45 +++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index cc6b8e087192..7ca3bd2448c1 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -39,22 +39,24 @@ extern unsigned long __phys_addr_symbol(unsigned long); #define __phys_reloc_hide(x) (x) -void clear_page_orig(void *page); -void clear_page_rep(void *page); -void clear_page_erms(void *page); +void clear_pages_orig(void *page, unsigned int length); +void clear_pages_rep(void *page, unsigned int length); +void clear_pages_erms(void *page, unsigned int length); static inline void clear_page(void *page) { + unsigned long length = PAGE_SIZE; /* * Clean up KMSAN metadata for the page being cleared. The assembly call * below clobbers @page, so we perform unpoisoning before it. */ - kmsan_unpoison_memory(page, PAGE_SIZE); - alternative_call_2(clear_page_orig, - clear_page_rep, X86_FEATURE_REP_GOOD, - clear_page_erms, X86_FEATURE_ERMS, + kmsan_unpoison_memory(page, length); + + alternative_call_2(clear_pages_orig, + clear_pages_rep, X86_FEATURE_REP_GOOD, + clear_pages_erms, X86_FEATURE_ERMS, "=D" (page), - "0" (page) + "0" (page), "S" (length) : "cc", "memory", "rax", "rcx"); } diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index ecbfb4dd3b01..6069acf6072f 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -2,6 +2,8 @@ #include <linux/linkage.h> #include <asm/asm.h> #include <asm/export.h> +#include <asm/cpufeatures.h> +#include <asm/alternative.h> /* * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is @@ -13,18 +15,30 @@ /* * Zero a page. * %rdi - page + * %esi - page-length + * + * Clobbers: %rax, %rcx */ -SYM_FUNC_START(clear_page_rep) - movl $4096/8,%ecx +SYM_FUNC_START(clear_pages_rep) + movl %esi, %ecx xorl %eax,%eax + shrl $3,%ecx rep stosq RET -SYM_FUNC_END(clear_page_rep) -EXPORT_SYMBOL_GPL(clear_page_rep) +SYM_FUNC_END(clear_pages_rep) +EXPORT_SYMBOL_GPL(clear_pages_rep) -SYM_FUNC_START(clear_page_orig) +/* + * Original page zeroing loop. + * %rdi - page + * %esi - page-length + * + * Clobbers: %rax, %rcx, %rflags + */ +SYM_FUNC_START(clear_pages_orig) + movl %esi, %ecx xorl %eax,%eax - movl $4096/64,%ecx + shrl $6,%ecx .p2align 4 .Lloop: decl %ecx @@ -41,16 +55,23 @@ SYM_FUNC_START(clear_page_orig) jnz .Lloop nop RET -SYM_FUNC_END(clear_page_orig) -EXPORT_SYMBOL_GPL(clear_page_orig) +SYM_FUNC_END(clear_pages_orig) +EXPORT_SYMBOL_GPL(clear_pages_orig) -SYM_FUNC_START(clear_page_erms) - movl $4096,%ecx +/* + * Zero a page. + * %rdi - page + * %esi - page-length + * + * Clobbers: %rax, %rcx + */ +SYM_FUNC_START(clear_pages_erms) + movl %esi, %ecx xorl %eax,%eax rep stosb RET -SYM_FUNC_END(clear_page_erms) -EXPORT_SYMBOL_GPL(clear_page_erms) +SYM_FUNC_END(clear_pages_erms) +EXPORT_SYMBOL_GPL(clear_pages_erms) /* * Default clear user-space. -- 2.31.1