Not all architecture supports hardware atomic updates of access bits. On such an arch, we don't use a page table walk to classify pages into generations. Add a kernel config option and remove adding all the page table walk code on such architecture. This avoid calling lru_gen related code (lru_gen_add/remove/migrate_mm) in fork/exit/context switch Also we don't build different components like Bloom filter and all the page table walk code (walk_mm and related code) on not supported architecture with this change. No preformance change observed with mongodb ycsb test: Patch details Throughput(Ops/sec) without patch 91252 With patch 91488 Without patch: $ size mm/vmscan.o text data bss dec hex filename 116016 36857 40 152913 25551 mm/vmscan.o With patch $ size mm/vmscan.o text data bss dec hex filename 112864 36437 40 149341 2475d mm/vmscan.o Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> --- arch/Kconfig | 3 +++ arch/arm64/Kconfig | 1 + arch/x86/Kconfig | 1 + include/linux/memcontrol.h | 2 +- include/linux/mm_types.h | 10 ++++----- include/linux/mmzone.h | 12 +++++++++- kernel/fork.c | 2 +- mm/memcontrol.c | 2 +- mm/vmscan.c | 45 ++++++++++++++++++++++++++++++++++++++ 9 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index aff2746c8af2..ec8662e2f3cb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1471,6 +1471,9 @@ config DYNAMIC_SIGFRAME config HAVE_ARCH_NODE_DEV_GROUP bool +config LRU_TASK_PAGE_AGING + bool + config ARCH_HAS_NONLEAF_PMD_YOUNG bool help diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7856c3a3e35a..d6b5d1647baa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -233,6 +233,7 @@ config ARM64 select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA + select LRU_TASK_PAGE_AGING if LRU_GEN select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7422db409770..940d86a0a566 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -281,6 +281,7 @@ config X86 select HOTPLUG_SPLIT_STARTUP if SMP && X86_32 select IRQ_FORCED_THREADING select LOCK_MM_AND_FIND_VMA + select LRU_TASK_PAGE_AGING if LRU_GEN select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK select NEED_SG_DMA_LENGTH diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0ab426a5696b..5ddc1abe95ae 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -321,7 +321,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif -#ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_TASK_PAGE_AGING /* per-memcg mm_struct list */ struct lru_gen_mm_list mm_list; #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index de10fc797c8e..9089762aa8e2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -793,7 +793,7 @@ struct mm_struct { */ unsigned long ksm_rmap_items; #endif -#ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_TASK_PAGE_AGING struct { /* this mm_struct is on lru_gen_mm_list */ struct list_head list; @@ -808,7 +808,7 @@ struct mm_struct { struct mem_cgroup *memcg; #endif } lru_gen; -#endif /* CONFIG_LRU_GEN */ +#endif /* CONFIG_LRU_TASK_PAGE_AGING */ } __randomize_layout; /* @@ -837,7 +837,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return (struct cpumask *)&mm->cpu_bitmap; } -#ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_TASK_PAGE_AGING struct lru_gen_mm_list { /* mm_struct list for page table walkers */ @@ -871,7 +871,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm) WRITE_ONCE(mm->lru_gen.bitmap, -1); } -#else /* !CONFIG_LRU_GEN */ +#else /* !CONFIG_LRU_TASK_PAGE_AGING */ static inline void lru_gen_add_mm(struct mm_struct *mm) { @@ -895,7 +895,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm) { } -#endif /* CONFIG_LRU_GEN */ +#endif /* CONFIG_LRU_TASK_PAGE_AGING */ struct vma_iterator { struct ma_state mas; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5e50b78d58ea..5300696d7c2c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -461,6 +461,7 @@ enum { struct lru_gen_mm_state { /* set to max_seq after each iteration */ unsigned long seq; +#ifdef CONFIG_LRU_TASK_PAGE_AGING /* where the current iteration continues after */ struct list_head *head; /* where the last iteration ended before */ @@ -469,6 +470,11 @@ struct lru_gen_mm_state { unsigned long *filters[NR_BLOOM_FILTERS]; /* the mm stats for debugging */ unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; +#else + /* protect the seq update above */ + /* May be we can use lruvec->lock? */ + spinlock_t lock; +#endif }; struct lru_gen_mm_walk { @@ -546,9 +552,13 @@ struct lru_gen_memcg { }; void lru_gen_init_pgdat(struct pglist_data *pgdat); - +#ifdef CONFIG_LRU_TASK_PAGE_AGING void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); +#else +static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) {} +static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) {} +#endif void lru_gen_online_memcg(struct mem_cgroup *memcg); void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); diff --git a/kernel/fork.c b/kernel/fork.c index b85814e614a5..c7e8f65a72c8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2932,7 +2932,7 @@ pid_t kernel_clone(struct kernel_clone_args *args) get_task_struct(p); } - if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) { + if (IS_ENABLED(CONFIG_LRU_TASK_PAGE_AGING) && !(clone_flags & CLONE_VM)) { /* lock the task to synchronize with memcg migration */ task_lock(p); lru_gen_add_mm(p->mm); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 92898e99e8a5..cdcf1b6baf3e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6357,7 +6357,7 @@ static void mem_cgroup_move_task(void) } #endif -#ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_TASK_PAGE_AGING static void mem_cgroup_attach(struct cgroup_taskset *tset) { struct task_struct *task; diff --git a/mm/vmscan.c b/mm/vmscan.c index 0ea7a07990d3..3c9f24d8a4a6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3244,10 +3244,17 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); #define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap]) #endif +#ifdef CONFIG_LRU_TASK_PAGE_AGING static bool should_walk_mmu(void) { return arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK); } +#else +static bool should_walk_mmu(void) +{ + return false; +} +#endif static bool should_clear_pmd_young(void) { @@ -3588,6 +3595,8 @@ static void clear_mm_walk(void) kfree(walk); } +#ifdef CONFIG_LRU_TASK_PAGE_AGING + /****************************************************************************** * Bloom filters ******************************************************************************/ @@ -4382,6 +4391,33 @@ static bool iterate_mm_list_walk(struct lruvec *lruvec, unsigned long max_seq, return success; } +#else + +static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) +{ + bool success = false; + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + spin_lock(&mm_state->lock); + + VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq); + + if (max_seq > mm_state->seq) { + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + success = true; + } + + spin_unlock(&mm_state->lock); + + return success; +} + +static bool iterate_mm_list_walk(struct lruvec *lruvec, unsigned long max_seq, + bool can_swap, bool force_scan) +{ + return false; +} +#endif static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { @@ -4744,9 +4780,11 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) arch_leave_lazy_mmu_mode(); mem_cgroup_unlock_pages(); +#ifdef CONFIG_LRU_TASK_PAGE_AGING /* feedback from rmap walkers to page table walkers */ if (suitable_to_scan(i, young)) update_bloom_filter(lruvec, max_seq, pvmw->pmd); +#endif } /****************************************************************************** @@ -5896,6 +5934,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, seq_putc(m, '\n'); } +#ifdef CONFIG_LRU_TASK_PAGE_AGING seq_puts(m, " "); for (i = 0; i < NR_MM_STATS; i++) { const char *s = " "; @@ -5912,6 +5951,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, seq_printf(m, " %10lu%c", n, s[i]); } seq_putc(m, '\n'); +#endif } /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ @@ -6186,6 +6226,9 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); lruvec->mm_state.seq = MIN_NR_GENS; +#ifndef CONFIG_LRU_TASK_PAGE_AGING + spin_lock_init(&lruvec->mm_state.lock); +#endif } #ifdef CONFIG_MEMCG @@ -6202,6 +6245,7 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat) } } +#ifdef CONFIG_LRU_TASK_PAGE_AGING void lru_gen_init_memcg(struct mem_cgroup *memcg) { INIT_LIST_HEAD(&memcg->mm_list.fifo); @@ -6229,6 +6273,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) } } } +#endif #endif /* CONFIG_MEMCG */ -- 2.41.0