The patch titled Subject: mm: remove vmacache has been added to the -mm mm-unstable branch. Its filename is mm-remove-vmacache.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-remove-vmacache.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: "Liam R. Howlett" <Liam.Howlett@xxxxxxxxxx> Subject: mm: remove vmacache Date: Mon, 22 Aug 2022 15:05:11 +0000 By using the maple tree and the maple tree state, the vmacache is no longer beneficial and is complicating the VMA code. Remove the vmacache to reduce the work in keeping it up to date and code complexity. Link: https://lkml.kernel.org/r/20220822150128.1562046-26-Liam.Howlett@xxxxxxxxxx Signed-off-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> Acked-by: Vlastimil Babka <vbabka@xxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: David Howells <dhowells@xxxxxxxxxx> Cc: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> Cc: SeongJae Park <sj@xxxxxxxxxx> Cc: Sven Schnelle <svens@xxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Yu Zhao <yuzhao@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/exec.c | 3 fs/proc/task_mmu.c | 1 include/linux/mm_types.h | 1 include/linux/mm_types_task.h | 12 --- include/linux/sched.h | 1 include/linux/vm_event_item.h | 4 - include/linux/vmacache.h | 28 ------- include/linux/vmstat.h | 6 - kernel/debug/debug_core.c | 12 --- kernel/fork.c | 5 - lib/Kconfig.debug | 8 -- mm/Makefile | 2 mm/debug.c | 4 - mm/mmap.c | 31 -------- mm/nommu.c | 37 +--------- mm/vmacache.c | 117 -------------------------------- mm/vmstat.c | 4 - 17 files changed, 9 insertions(+), 267 deletions(-) --- a/fs/exec.c~mm-remove-vmacache +++ a/fs/exec.c @@ -28,7 +28,6 @@ #include <linux/file.h> #include <linux/fdtable.h> #include <linux/mm.h> -#include <linux/vmacache.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/swap.h> @@ -1027,8 +1026,6 @@ static int exec_mmap(struct mm_struct *m activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); - tsk->mm->vmacache_seqnum = 0; - vmacache_flush(tsk); task_unlock(tsk); lru_gen_use_mm(mm); --- a/fs/proc/task_mmu.c~mm-remove-vmacache +++ a/fs/proc/task_mmu.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/pagewalk.h> -#include <linux/vmacache.h> #include <linux/mm_inline.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> --- a/include/linux/mm_types.h~mm-remove-vmacache +++ a/include/linux/mm_types.h @@ -477,7 +477,6 @@ struct mm_struct { struct { struct vm_area_struct *mmap; /* list of VMAs */ struct maple_tree mm_mt; - u64 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, --- a/include/linux/mm_types_task.h~mm-remove-vmacache +++ a/include/linux/mm_types_task.h @@ -25,18 +25,6 @@ #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) /* - * The per task VMA cache array: - */ -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - -struct vmacache { - u64 seqnum; - struct vm_area_struct *vmas[VMACACHE_SIZE]; -}; - -/* * When updating this, please also update struct resident_page_types[] in * kernel/fork.c */ --- a/include/linux/sched.h~mm-remove-vmacache +++ a/include/linux/sched.h @@ -861,7 +861,6 @@ struct task_struct { struct mm_struct *active_mm; /* Per-thread vma caching: */ - struct vmacache vmacache; #ifdef SPLIT_RSS_COUNTING struct task_rss_stat rss_stat; --- a/include/linux/vmacache.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_VMACACHE_H -#define __LINUX_VMACACHE_H - -#include <linux/sched.h> -#include <linux/mm.h> - -static inline void vmacache_flush(struct task_struct *tsk) -{ - memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); -} - -extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); -extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, - unsigned long addr); - -#ifndef CONFIG_MMU -extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, - unsigned long start, - unsigned long end); -#endif - -static inline void vmacache_invalidate(struct mm_struct *mm) -{ - mm->vmacache_seqnum++; -} - -#endif /* __LINUX_VMACACHE_H */ --- a/include/linux/vm_event_item.h~mm-remove-vmacache +++ a/include/linux/vm_event_item.h @@ -129,10 +129,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ -#ifdef CONFIG_DEBUG_VM_VMACACHE - VMACACHE_FIND_CALLS, - VMACACHE_FIND_HITS, -#endif #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, --- a/include/linux/vmstat.h~mm-remove-vmacache +++ a/include/linux/vmstat.h @@ -125,12 +125,6 @@ static inline void vm_events_fold_cpu(in #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif -#ifdef CONFIG_DEBUG_VM_VMACACHE -#define count_vm_vmacache_event(x) count_vm_event(x) -#else -#define count_vm_vmacache_event(x) do {} while (0) -#endif - #define __count_zid_vm_events(item, zid, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) --- a/kernel/debug/debug_core.c~mm-remove-vmacache +++ a/kernel/debug/debug_core.c @@ -50,7 +50,6 @@ #include <linux/pid.h> #include <linux/smp.h> #include <linux/mm.h> -#include <linux/vmacache.h> #include <linux/rcupdate.h> #include <linux/irq.h> #include <linux/security.h> @@ -283,17 +282,6 @@ static void kgdb_flush_swbreak_addr(unsi if (!CACHE_FLUSH_IS_SAFE) return; - if (current->mm) { - int i; - - for (i = 0; i < VMACACHE_SIZE; i++) { - if (!current->vmacache.vmas[i]) - continue; - flush_cache_range(current->vmacache.vmas[i], - addr, addr + BREAK_INSTR_SIZE); - } - } - /* Force flush instruction cache if it was outside the mm */ flush_icache_range(addr, addr + BREAK_INSTR_SIZE); } --- a/kernel/fork.c~mm-remove-vmacache +++ a/kernel/fork.c @@ -43,7 +43,6 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/mm_inline.h> -#include <linux/vmacache.h> #include <linux/nsproxy.h> #include <linux/capability.h> #include <linux/cpu.h> @@ -1128,7 +1127,6 @@ static struct mm_struct *mm_init(struct mm->mmap = NULL; mt_init_flags(&mm->mm_mt, MM_MT_FLAGS); mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock); - mm->vmacache_seqnum = 0; atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); seqcount_init(&mm->write_protect_seq); @@ -1585,9 +1583,6 @@ static int copy_mm(unsigned long clone_f if (!oldmm) return 0; - /* initialize the new vmacache entries */ - vmacache_flush(tsk); - if (clone_flags & CLONE_VM) { mmget(oldmm); mm = oldmm; --- a/lib/Kconfig.debug~mm-remove-vmacache +++ a/lib/Kconfig.debug @@ -812,14 +812,6 @@ config DEBUG_VM If unsure, say N. -config DEBUG_VM_VMACACHE - bool "Debug VMA caching" - depends on DEBUG_VM - help - Enable this to turn on VMA caching debug information. Doing so - can cause significant overhead, so only enable it in non-production - environments. - config DEBUG_VM_MAPLE_TREE bool "Debug VM maple trees" depends on DEBUG_VM --- a/mm/debug.c~mm-remove-vmacache +++ a/mm/debug.c @@ -155,7 +155,7 @@ EXPORT_SYMBOL(dump_vma); void dump_mm(const struct mm_struct *mm) { - pr_emerg("mm %px mmap %px seqnum %llu task_size %lu\n" + pr_emerg("mm %px mmap %px task_size %lu\n" #ifdef CONFIG_MMU "get_unmapped_area %px\n" #endif @@ -183,7 +183,7 @@ void dump_mm(const struct mm_struct *mm) "tlb_flush_pending %d\n" "def_flags: %#lx(%pGv)\n", - mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size, + mm, mm->mmap, mm->task_size, #ifdef CONFIG_MMU mm->get_unmapped_area, #endif --- a/mm/Makefile~mm-remove-vmacache +++ a/mm/Makefile @@ -52,7 +52,7 @@ obj-y := filemap.o mempool.o oom_kill. readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o percpu.o slab_common.o \ - compaction.o vmacache.o \ + compaction.o \ interval_tree.o list_lru.o workingset.o \ debug.o gup.o mmap_lock.o $(mmu-y) --- a/mm/mmap.c~mm-remove-vmacache +++ a/mm/mmap.c @@ -14,7 +14,6 @@ #include <linux/backing-dev.h> #include <linux/mm.h> #include <linux/mm_inline.h> -#include <linux/vmacache.h> #include <linux/shm.h> #include <linux/mman.h> #include <linux/pagemap.h> @@ -680,9 +679,6 @@ inline int vma_expand(struct ma_state *m /* Remove from mm linked list - also updates highest_vm_end */ __vma_unlink_list(mm, next); - /* Kill the cache */ - vmacache_invalidate(mm); - if (file) __remove_shared_vm_struct(next, file, mapping); @@ -923,8 +919,6 @@ int __vma_adjust(struct vm_area_struct * __vma_unlink_list(mm, next); if (remove_next == 2) __vma_unlink_list(mm, next_next); - /* Kill the cache */ - vmacache_invalidate(mm); if (file) { __remove_shared_vm_struct(next, file, mapping); @@ -2233,19 +2227,10 @@ struct vm_area_struct *find_vma_intersec unsigned long start_addr, unsigned long end_addr) { - struct vm_area_struct *vma; unsigned long index = start_addr; mmap_assert_locked(mm); - /* Check the cache first. */ - vma = vmacache_find(mm, start_addr); - if (likely(vma)) - return vma; - - vma = mt_find(&mm->mm_mt, &index, end_addr - 1); - if (vma) - vmacache_update(start_addr, vma); - return vma; + return mt_find(&mm->mm_mt, &index, end_addr - 1); } EXPORT_SYMBOL(find_vma_intersection); @@ -2259,19 +2244,10 @@ EXPORT_SYMBOL(find_vma_intersection); */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma; unsigned long index = addr; mmap_assert_locked(mm); - /* Check the cache first. */ - vma = vmacache_find(mm, addr); - if (likely(vma)) - return vma; - - vma = mt_find(&mm->mm_mt, &index, ULONG_MAX); - if (vma) - vmacache_update(addr, vma); - return vma; + return mt_find(&mm->mm_mt, &index, ULONG_MAX); } EXPORT_SYMBOL(find_vma); @@ -2660,9 +2636,6 @@ detach_vmas_to_be_unmapped(struct mm_str mm->highest_vm_end = prev ? vm_end_gap(prev) : 0; tail_vma->vm_next = NULL; - /* Kill the cache */ - vmacache_invalidate(mm); - /* * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or * VM_GROWSUP VMA. Such VMAs can change their size under --- a/mm/nommu.c~mm-remove-vmacache +++ a/mm/nommu.c @@ -19,7 +19,6 @@ #include <linux/export.h> #include <linux/mm.h> #include <linux/sched/mm.h> -#include <linux/vmacache.h> #include <linux/mman.h> #include <linux/swap.h> #include <linux/file.h> @@ -598,23 +597,12 @@ static void add_vma_to_mm(struct mm_stru */ static void delete_vma_from_mm(struct vm_area_struct *vma) { - int i; - struct address_space *mapping; - struct mm_struct *mm = vma->vm_mm; - struct task_struct *curr = current; MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0); - mm->map_count--; - for (i = 0; i < VMACACHE_SIZE; i++) { - /* if the vma is cached, invalidate the entire cache */ - if (curr->vmacache.vmas[i] == vma) { - vmacache_invalidate(mm); - break; - } - } - + vma->vm_mm->map_count--; /* remove the VMA from the mapping */ if (vma->vm_file) { + struct address_space *mapping; mapping = vma->vm_file->f_mapping; i_mmap_lock_write(mapping); @@ -626,7 +614,7 @@ static void delete_vma_from_mm(struct vm /* remove from the MM's tree and list */ vma_mas_remove(vma, &mas); - __vma_unlink_list(mm, vma); + __vma_unlink_list(vma->vm_mm, vma); } /* @@ -659,20 +647,9 @@ EXPORT_SYMBOL(find_vma_intersection); */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma; MA_STATE(mas, &mm->mm_mt, addr, addr); - /* check the cache first */ - vma = vmacache_find(mm, addr); - if (likely(vma)) - return vma; - - vma = mas_walk(&mas); - - if (vma) - vmacache_update(addr, vma); - - return vma; + return mas_walk(&mas); } EXPORT_SYMBOL(find_vma); @@ -706,11 +683,6 @@ static struct vm_area_struct *find_vma_e unsigned long end = addr + len; MA_STATE(mas, &mm->mm_mt, addr, addr); - /* check the cache first */ - vma = vmacache_find_exact(mm, addr, end); - if (vma) - return vma; - vma = mas_walk(&mas); if (!vma) return NULL; @@ -719,7 +691,6 @@ static struct vm_area_struct *find_vma_e if (vma->vm_end != end) return NULL; - vmacache_update(addr, vma); return vma; } --- a/mm/vmacache.c +++ /dev/null @@ -1,117 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2014 Davidlohr Bueso. - */ -#include <linux/sched/signal.h> -#include <linux/sched/task.h> -#include <linux/mm.h> -#include <linux/vmacache.h> - -/* - * Hash based on the pmd of addr if configured with MMU, which provides a good - * hit rate for workloads with spatial locality. Otherwise, use pages. - */ -#ifdef CONFIG_MMU -#define VMACACHE_SHIFT PMD_SHIFT -#else -#define VMACACHE_SHIFT PAGE_SHIFT -#endif -#define VMACACHE_HASH(addr) ((addr >> VMACACHE_SHIFT) & VMACACHE_MASK) - -/* - * This task may be accessing a foreign mm via (for example) - * get_user_pages()->find_vma(). The vmacache is task-local and this - * task's vmacache pertains to a different mm (ie, its own). There is - * nothing we can do here. - * - * Also handle the case where a kernel thread has adopted this mm via - * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm. - */ -static inline bool vmacache_valid_mm(struct mm_struct *mm) -{ - return current->mm == mm && !(current->flags & PF_KTHREAD); -} - -void vmacache_update(unsigned long addr, struct vm_area_struct *newvma) -{ - if (vmacache_valid_mm(newvma->vm_mm)) - current->vmacache.vmas[VMACACHE_HASH(addr)] = newvma; -} - -static bool vmacache_valid(struct mm_struct *mm) -{ - struct task_struct *curr; - - if (!vmacache_valid_mm(mm)) - return false; - - curr = current; - if (mm->vmacache_seqnum != curr->vmacache.seqnum) { - /* - * First attempt will always be invalid, initialize - * the new cache for this task here. - */ - curr->vmacache.seqnum = mm->vmacache_seqnum; - vmacache_flush(curr); - return false; - } - return true; -} - -struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr) -{ - int idx = VMACACHE_HASH(addr); - int i; - - count_vm_vmacache_event(VMACACHE_FIND_CALLS); - - if (!vmacache_valid(mm)) - return NULL; - - for (i = 0; i < VMACACHE_SIZE; i++) { - struct vm_area_struct *vma = current->vmacache.vmas[idx]; - - if (vma) { -#ifdef CONFIG_DEBUG_VM_VMACACHE - if (WARN_ON_ONCE(vma->vm_mm != mm)) - break; -#endif - if (vma->vm_start <= addr && vma->vm_end > addr) { - count_vm_vmacache_event(VMACACHE_FIND_HITS); - return vma; - } - } - if (++idx == VMACACHE_SIZE) - idx = 0; - } - - return NULL; -} - -#ifndef CONFIG_MMU -struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ - int idx = VMACACHE_HASH(start); - int i; - - count_vm_vmacache_event(VMACACHE_FIND_CALLS); - - if (!vmacache_valid(mm)) - return NULL; - - for (i = 0; i < VMACACHE_SIZE; i++) { - struct vm_area_struct *vma = current->vmacache.vmas[idx]; - - if (vma && vma->vm_start == start && vma->vm_end == end) { - count_vm_vmacache_event(VMACACHE_FIND_HITS); - return vma; - } - if (++idx == VMACACHE_SIZE) - idx = 0; - } - - return NULL; -} -#endif --- a/mm/vmstat.c~mm-remove-vmacache +++ a/mm/vmstat.c @@ -1389,10 +1389,6 @@ const char * const vmstat_text[] = { "nr_tlb_local_flush_one", #endif /* CONFIG_DEBUG_TLBFLUSH */ -#ifdef CONFIG_DEBUG_VM_VMACACHE - "vmacache_find_calls", - "vmacache_find_hits", -#endif #ifdef CONFIG_SWAP "swap_ra", "swap_ra_hit", _ Patches currently in -mm which might be from Liam.Howlett@xxxxxxxxxx are maple-tree-add-new-data-structure.patch radix-tree-test-suite-add-pr_err-define.patch radix-tree-test-suite-add-kmem_cache_set_non_kernel.patch radix-tree-test-suite-add-allocation-counts-and-size-to-kmem_cache.patch radix-tree-test-suite-add-support-for-slab-bulk-apis.patch radix-tree-test-suite-add-lockdep_is_held-to-header.patch lib-test_maple_tree-add-testing-for-maple-tree.patch mm-start-tracking-vmas-with-maple-tree.patch mm-mmap-use-the-maple-tree-in-find_vma-instead-of-the-rbtree.patch mm-mmap-use-the-maple-tree-for-find_vma_prev-instead-of-the-rbtree.patch mm-mmap-use-maple-tree-for-unmapped_area_topdown.patch kernel-fork-use-maple-tree-for-dup_mmap-during-forking.patch damon-convert-__damon_va_three_regions-to-use-the-vma-iterator.patch mm-remove-rb-tree.patch mmap-change-zeroing-of-maple-tree-in-__vma_adjust.patch xen-use-vma_lookup-in-privcmd_ioctl_mmap.patch mm-optimize-find_exact_vma-to-use-vma_lookup.patch mm-khugepaged-optimize-collapse_pte_mapped_thp-by-using-vma_lookup.patch mm-mmap-change-do_brk_flags-to-expand-existing-vma-and-add-do_brk_munmap.patch mm-use-maple-tree-operations-for-find_vma_intersection.patch mm-mmap-use-advanced-maple-tree-api-for-mmap_region.patch mm-remove-vmacache.patch mm-convert-vma_lookup-to-use-mtree_load.patch mm-mmap-move-mmap_region-below-do_munmap.patch mm-mmap-reorganize-munmap-to-use-maple-states.patch mm-mmap-change-do_brk_munmap-to-use-do_mas_align_munmap.patch arm64-change-elfcore-for_each_mte_vma-to-use-vma-iterator.patch fs-proc-base-use-maple-tree-iterators-in-place-of-linked-list.patch userfaultfd-use-maple-tree-iterator-to-iterate-vmas.patch ipc-shm-use-vma-iterator-instead-of-linked-list.patch bpf-remove-vma-linked-list.patch mm-gup-use-maple-tree-navigation-instead-of-linked-list.patch mm-madvise-use-vma_find-instead-of-vma-linked-list.patch mm-memcontrol-stop-using-mm-highest_vm_end.patch mm-mempolicy-use-vma-iterator-maple-state-instead-of-vma-linked-list.patch mm-mprotect-use-maple-tree-navigation-instead-of-vma-linked-list.patch mm-mremap-use-vma_find_intersection-instead-of-vma-linked-list.patch mm-msync-use-vma_find-instead-of-vma-linked-list.patch mm-oom_kill-use-maple-tree-iterators-instead-of-vma-linked-list.patch mm-swapfile-use-vma-iterator-instead-of-vma-linked-list.patch riscv-use-vma-iterator-for-vdso.patch mm-remove-the-vma-linked-list.patch mm-mmap-drop-range_has_overlap-function.patch mm-mmapc-pass-in-mapping-to-__vma_link_file.patch