On Fri, Dec 17, 2021 at 11:19:10AM +0300, Nikita Yushchenko wrote: > When batched page table freeing via struct mmu_table_batch is used, the > final freeing in __tlb_remove_table_free() executes a loop, calling > arch hook __tlb_remove_table() to free each table individually. > > Shift that loop down to archs. This allows archs to optimize it, by > freeing multiple tables in a single release_pages() call. This is > faster than individual put_page() calls, especially with memcg > accounting enabled. > > Signed-off-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> > Signed-off-by: Nikita Yushchenko <nikita.yushchenko@xxxxxxxxxxxxx> > --- > arch/arm/include/asm/tlb.h | 5 ++++ > arch/arm64/include/asm/tlb.h | 5 ++++ > arch/powerpc/include/asm/book3s/32/pgalloc.h | 8 +++++++ > arch/powerpc/include/asm/book3s/64/pgalloc.h | 1 + > arch/powerpc/include/asm/nohash/pgalloc.h | 8 +++++++ > arch/powerpc/mm/book3s64/pgtable.c | 8 +++++++ > arch/s390/include/asm/tlb.h | 1 + > arch/s390/mm/pgalloc.c | 8 +++++++ > arch/sparc/include/asm/pgalloc_64.h | 8 +++++++ > arch/x86/include/asm/tlb.h | 5 ++++ > include/asm-generic/tlb.h | 2 +- > include/linux/swap.h | 5 +++- > mm/mmu_gather.c | 6 +---- > mm/swap_state.c | 24 +++++++++++++++----- > 14 files changed, 81 insertions(+), 13 deletions(-) Oh gawd, that's terrible. Never, ever duplicate code like that. I'm thinking the below does the same? But yes, please do as Dave said, give us actual numbers that show this is worth it. --- arch/Kconfig | 4 ++++ arch/arm/Kconfig | 1 + arch/arm/include/asm/tlb.h | 5 ----- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/tlb.h | 5 ----- arch/x86/Kconfig | 1 + arch/x86/include/asm/tlb.h | 4 ---- mm/mmu_gather.c | 22 +++++++++++++++++++--- 8 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 26b8ed11639d..f2bd3f5af2b1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -415,6 +415,10 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE config MMU_GATHER_TABLE_FREE bool +config MMU_GATHER_TABLE_PAGE + bool + depends on MMU_GATHER_TABLE_FREE + config MMU_GATHER_RCU_TABLE_FREE bool select MMU_GATHER_TABLE_FREE diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f0f9e8bec83a..11baaa5719c2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -110,6 +110,7 @@ config ARM select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE + select MMU_GATHER_TABLE_PAGE if MMU select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_STACKPROTECTOR diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index b8cbe03ad260..9d9b21649ca0 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -29,11 +29,6 @@ #include <linux/swap.h> #include <asm/tlbflush.h> -static inline void __tlb_remove_table(void *_table) -{ - free_page_and_swap_cache((struct page *)_table); -} - #include <asm-generic/tlb.h> static inline void diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c4207cf9bb17..4aa28fb03f4f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -196,6 +196,7 @@ config ARM64 select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_TABLE_PAGE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index c995d1f4594f..401826260a5c 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -11,11 +11,6 @@ #include <linux/pagemap.h> #include <linux/swap.h> -static inline void __tlb_remove_table(void *_table) -{ - free_page_and_swap_cache((struct page *)_table); -} - #define tlb_flush tlb_flush static void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b9281fab4e3e..a22e653f4d0e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -235,6 +235,7 @@ config X86 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_TABLE_PAGE select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 1bfe979bb9bc..dec5ffa3042a 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -32,9 +32,5 @@ static inline void tlb_flush(struct mmu_gather *tlb) * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h * for more details. */ -static inline void __tlb_remove_table(void *table) -{ - free_page_and_swap_cache(table); -} #endif /* _ASM_X86_TLB_H */ diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 1b9837419bf9..0195d0f13ed3 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -93,13 +93,29 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ #ifdef CONFIG_MMU_GATHER_TABLE_FREE -static void __tlb_remove_table_free(struct mmu_table_batch *batch) +#ifdef CONFIG_MMU_GATHER_TABLE_PAGE +static inline void __tlb_remove_table(void *table) +{ + free_page_and_swap_cache(table); +} + +static inline void __tlb_remove_tables(void **tables, int nr) +{ + free_pages_and_swap_cache_nolru((struct page **)tables, nr); +} +#else +static inline void __tlb_remove_tables(void **tables, int nr) { int i; - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); + for (i = 0; i < nr; i++) + __tlb_remove_table(tables[i]); +} +#endif +static void __tlb_remove_table_free(struct mmu_table_batch *batch) +{ + __tlb_remove_tables(batch->tables, batch->nr); free_page((unsigned long)batch); }