The patch titled Subject: mm/memory.c: add apply_to_existing_page_range() helper has been added to the -mm tree. Its filename is mm-add-apply_to_existing_pages-helper.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-add-apply_to_existing_pages-helper.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-add-apply_to_existing_pages-helper.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Daniel Axtens <dja@xxxxxxxxxx> Subject: mm/memory.c: add apply_to_existing_page_range() helper apply_to_page_range() takes an address range, and if any parts of it are not covered by the existing page table hierarchy, it allocates memory to fill them in. In some use cases, this is not what we want - we want to be able to operate exclusively on PTEs that are already in the tables. Add apply_to_existing_page_range() for this. Adjust the walker functions for apply_to_page_range to take 'create', which switches them between the old and new modes. This will be used in KASAN vmalloc. Link: http://lkml.kernel.org/r/20191205140407.1874-1-dja@xxxxxxxxxx Signed-off-by: Daniel Axtens <dja@xxxxxxxxxx> Cc: Dmitry Vyukov <dvyukov@xxxxxxxxxx> Cc: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx> Cc: Alexander Potapenko <glider@xxxxxxxxxx> Cc: Daniel Axtens <dja@xxxxxxxxxx> Cc: Qian Cai <cai@xxxxxx> Cc: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/mm.h | 3 mm/memory.c | 131 +++++++++++++++++++++++++++++++------------ 2 files changed, 99 insertions(+), 35 deletions(-) --- a/include/linux/mm.h~mm-add-apply_to_existing_pages-helper +++ a/include/linux/mm.h @@ -2621,6 +2621,9 @@ static inline int vm_fault_to_errno(vm_f typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +extern int apply_to_existing_pages(struct mm_struct *mm, unsigned long address, + unsigned long size, pte_fn_t fn, + void *data); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); --- a/mm/memory.c~mm-add-apply_to_existing_pages-helper +++ a/mm/memory.c @@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pte_t *pte; - int err; + int err = 0; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (!pte) - return -ENOMEM; + if (create) { + pte = (mm == &init_mm) ? + pte_alloc_kernel(pmd, addr) : + pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + } else { + pte = (mm == &init_mm) ? + pte_offset_kernel(pmd, addr) : + pte_offset_map_lock(mm, pmd, addr, &ptl); + } BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); do { - err = fn(pte++, addr, data); - if (err) - break; + if (create || !pte_none(*pte)) { + err = fn(pte++, addr, data); + if (err) + break; + } } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -2052,62 +2060,83 @@ static int apply_to_pte_range(struct mm_ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return -ENOMEM; + if (create) { + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -ENOMEM; + } else { + pmd = pmd_offset(pud, addr); + } do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); - if (err) - break; + if (create || !pmd_none_or_clear_bad(pmd)) { + err = apply_to_pte_range(mm, pmd, addr, next, fn, data, + create); + if (err) + break; + } } while (pmd++, addr = next, addr != end); return err; } static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pud_t *pud; unsigned long next; - int err; + int err = 0; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return -ENOMEM; + if (create) { + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -ENOMEM; + } else { + pud = pud_offset(p4d, addr); + } do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); - if (err) - break; + if (create || !pud_none_or_clear_bad(pud)) { + err = apply_to_pmd_range(mm, pud, addr, next, fn, data, + create); + if (err) + break; + } } while (pud++, addr = next, addr != end); return err; } static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { p4d_t *p4d; unsigned long next; - int err; + int err = 0; - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return -ENOMEM; + if (create) { + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + } else { + p4d = p4d_offset(pgd, addr); + } do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); - if (err) - break; + if (create || !p4d_none_or_clear_bad(p4d)) { + err = apply_to_pud_range(mm, p4d, addr, next, fn, data, + create); + if (err) + break; + } } while (p4d++, addr = next, addr != end); return err; } @@ -2130,7 +2159,7 @@ int apply_to_page_range(struct mm_struct pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, true); if (err) break; } while (pgd++, addr = next, addr != end); @@ -2140,6 +2169,38 @@ int apply_to_page_range(struct mm_struct EXPORT_SYMBOL_GPL(apply_to_page_range); /* + * Scan a region of virtual memory, calling a provided function on + * each leaf page table where it exists. + * + * Unlike apply_to_page_range, this does _not_ fill in page tables + * where they are absent. + */ +int apply_to_existing_pages(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + pgd_t *pgd; + unsigned long next; + unsigned long end = addr + size; + int err = 0; + + if (WARN_ON(addr >= end)) + return -EINVAL; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, false); + if (err) + break; + } while (pgd++, addr = next, addr != end); + + return err; +} +EXPORT_SYMBOL_GPL(apply_to_existing_pages); + +/* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures * or configurations (e.g. i386 with PAE) which might give a mix of unmatched _ Patches currently in -mm which might be from dja@xxxxxxxxxx are mm-add-apply_to_existing_pages-helper.patch kasan-use-apply_to_existing_pages-for-releasing-vmalloc-shadow.patch kasan-dont-assume-percpu-shadow-allocations-will-succeed.patch