Reimplement clear_gigantic_page() to clear gigabytes pages using the non-temporal streaming store instructions that bypass the cache (movnti), since an entire 1GiB region will not fit in the cache anyway. Doing an mlock() on a 512GiB 1G-hugetlb region previously would take on average 134 seconds, about 260ms/GiB which is quite slow. Using `movnti` and optimizing the control flow over the constituent small pages, this can be improved roughly by a factor of 3-4x, with the 512GiB mlock() taking only 34 seconds on average, or 67ms/GiB. The assembly code for the __clear_page_nt routine is more or less taken directly from the output of gcc with -O3 for this function with some tweaks to support arbitrary sizes and moving memory barriers: void clear_page_nt_64i (void *page) { for (int i = 0; i < GiB /sizeof(long long int); ++i) { _mm_stream_si64 (((long long int*)page) + i, 0); } sfence(); } Tested: Time to `mlock()` a 512GiB region on broadwell CPU AVG time (s) % imp. ms/page clear_page_erms 133.584 - 261 clear_page_nt 34.154 74.43% 67 An earlier version of this code was sent as an RFC patch ~July 2018 https://patchwork.kernel.org/patch/10543193/ but never merged. Signed-off-by: Cannon Matthews <cannonmatthews@xxxxxxxxxx> --- MAINTAINERS | 1 + arch/x86/Kconfig | 4 ++++ arch/x86/include/asm/page_64.h | 1 + arch/x86/lib/Makefile | 2 +- arch/x86/lib/clear_gigantic_page.c | 28 ++++++++++++++++++++++++++++ arch/x86/lib/clear_page_64.S | 19 +++++++++++++++++++ include/linux/mm.h | 2 ++ mm/memory.c | 2 ++ 8 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 arch/x86/lib/clear_gigantic_page.c diff --git a/MAINTAINERS b/MAINTAINERS index 68eebf3650ac..efe84f085404 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7702,6 +7702,7 @@ S: Maintained F: fs/hugetlbfs/ F: mm/hugetlb.c F: include/linux/hugetlb.h +F: arch/x86/lib/clear_gigantic_page.c F: Documentation/admin-guide/mm/hugetlbpage.rst F: Documentation/vm/hugetlbfs_reserv.rst F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index beea77046f9b..f49e7b6f6851 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -70,6 +70,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_CLEAR_GIGANTIC_PAGE if X86_64 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL @@ -290,6 +291,9 @@ config ARCH_MAY_HAVE_PC_FDC config GENERIC_CALIBRATE_DELAY def_bool y +config ARCH_HAS_CLEAR_GIGANTIC_PAGE + bool + config ARCH_HAS_CPU_RELAX def_bool y diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 939b1cff4a7b..6ea60883b6d6 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -55,6 +55,7 @@ static inline void clear_page(void *page) } void copy_page(void *to, void *from); +void clear_page_nt(void *page, u64 page_size); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 5246db42de45..a620c6636210 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -56,7 +56,7 @@ endif else obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o - lib-y += clear_page_64.o copy_page_64.o + lib-y += clear_page_64.o copy_page_64.o clear_gigantic_page.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o lib-y += cmpxchg16b_emu.o diff --git a/arch/x86/lib/clear_gigantic_page.c b/arch/x86/lib/clear_gigantic_page.c new file mode 100644 index 000000000000..6fcb494ec9bc --- /dev/null +++ b/arch/x86/lib/clear_gigantic_page.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <asm/page.h> + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) + +void clear_gigantic_page(struct page *page, unsigned long addr, + unsigned int pages) +{ + int i; + void *dest = page_to_virt(page); + + /* + * cond_resched() every 2M. Hypothetical page sizes not divisible by + * this are not supported. + */ + BUG_ON(pages % HPAGE_PMD_NR != 0); + for (i = 0; i < pages; i += HPAGE_PMD_NR) { + clear_page_nt(dest + (i * PAGE_SIZE), HPAGE_PMD_NR * PAGE_SIZE); + cond_resched(); + } + /* clear_page_nt requires an `sfence` barrier. */ + wmb(); +} +#endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) */ diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index c4c7dd115953..1224094fd863 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -50,3 +50,22 @@ SYM_FUNC_START(clear_page_erms) ret SYM_FUNC_END(clear_page_erms) EXPORT_SYMBOL_GPL(clear_page_erms) + +/* + * Zero memory using non temporal stores, bypassing the cache. + * Requires an `sfence` (wmb()) afterwards. + * %rdi - destination. + * %rsi - page size. Must be 64 bit aligned. +*/ +SYM_FUNC_START(clear_page_nt) + leaq (%rdi,%rsi), %rdx + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L2: + movnti %rax, (%rdi) + addq $8, %rdi + cmpq %rdx, %rdi + jne .L2 + ret +SYM_FUNC_END(clear_page_nt) diff --git a/include/linux/mm.h b/include/linux/mm.h index c54fb96cb1e6..a57f9007374b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2856,6 +2856,8 @@ enum mf_action_page_type { }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) +extern void clear_gigantic_page(struct page *page, unsigned long addr, + unsigned int pages); extern void clear_huge_page(struct page *page, unsigned long addr_hint, unsigned int pages_per_huge_page); diff --git a/mm/memory.c b/mm/memory.c index e8bfdf0d9d1d..2a13bf102890 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4706,6 +4706,7 @@ static inline void process_huge_page( } } +#ifndef CONFIG_ARCH_HAS_CLEAR_GIGANTIC_PAGE static void clear_gigantic_page(struct page *page, unsigned long addr, unsigned int pages_per_huge_page) @@ -4720,6 +4721,7 @@ static void clear_gigantic_page(struct page *page, clear_user_highpage(p, addr + i * PAGE_SIZE); } } +#endif /* CONFIG_ARCH_HAS_CLEAR_GIGANTIC_PAGE */ static void clear_subpage(unsigned long addr, int idx, void *arg) { -- 2.25.1.481.gfbce0eb801-goog