The patch titled Subject: mm/gup: remove vmas array from internal GUP functions has been added to the -mm mm-unstable branch. Its filename is mm-gup-remove-vmas-array-from-internal-gup-functions.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-gup-remove-vmas-array-from-internal-gup-functions.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Lorenzo Stoakes <lstoakes@xxxxxxxxx> Subject: mm/gup: remove vmas array from internal GUP functions Date: Sun, 14 May 2023 22:27:01 +0100 Now we have eliminated all callers to GUP APIs which use the vmas parameter, eliminate it altogether. This eliminates a class of bugs where vmas might have been kept around longer than the mmap_lock and thus we need not be concerned about locks being dropped during this operation leaving behind dangling pointers. This simplifies the GUP API and makes it considerably clearer as to its purpose - follow flags are applied and if pinning, an array of pages is returned. Link: https://lkml.kernel.org/r/23f5b6e75ccf31565355cd37bf1253d20f2e6388.1684097002.git.lstoakes@xxxxxxxxx Signed-off-by: Lorenzo Stoakes <lstoakes@xxxxxxxxx> Acked-by: David Hildenbrand <david@xxxxxxxxxx> Reviewed-by: Christoph Hellwig <hch@xxxxxx> Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: Bernard Metzler <bmt@xxxxxxxxxxxxxx> Cc: Björn Töpel <bjorn@xxxxxxxxxx> Cc: Christian Benvenuti <benve@xxxxxxxxx> Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> Cc: David S. Miller <davem@xxxxxxxxxxxxx> Cc: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxxxxxxxxxxxxx> Cc: Eric Dumazet <edumazet@xxxxxxxxxx> Cc: Jakub Kicinski <kuba@xxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxx> Cc: Jason Wang <jasowang@xxxxxxxxxx> Cc: Jens Axboe <axboe@xxxxxxxxx> Cc: Jesper Dangaard Brouer <hawk@xxxxxxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: John Hubbard <jhubbard@xxxxxxxxxx> Cc: Jonathan Lemon <jonathan.lemon@xxxxxxxxx> Cc: Leon Romanovsky <leon@xxxxxxxxxx> Cc: Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx> Cc: Magnus Karlsson <magnus.karlsson@xxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Mauro Carvalho Chehab <mchehab@xxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> Cc: Nelson Escobar <neescoba@xxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Paolo Abeni <pabeni@xxxxxxxxxx> Cc: Pavel Begunkov <asml.silence@xxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: Janosch Frank <frankja@xxxxxxxxxxxxx> Cc: Jarkko Sakkinen <jarkko@xxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/hugetlb.h | 10 +--- mm/gup.c | 83 ++++++++++++++------------------------ mm/hugetlb.c | 24 ++++------ 3 files changed, 45 insertions(+), 72 deletions(-) --- a/include/linux/hugetlb.h~mm-gup-remove-vmas-array-from-internal-gup-functions +++ a/include/linux/hugetlb.h @@ -133,9 +133,8 @@ int copy_hugetlb_page_range(struct mm_st struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int flags); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, - struct page **, struct vm_area_struct **, - unsigned long *, unsigned long *, long, unsigned int, - int *); + struct page **, unsigned long *, unsigned long *, + long, unsigned int, int *); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); @@ -306,9 +305,8 @@ static inline struct page *hugetlb_follo static inline long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, - struct vm_area_struct **vmas, unsigned long *position, - unsigned long *nr_pages, long i, unsigned int flags, - int *nonblocking) + unsigned long *position, unsigned long *nr_pages, + long i, unsigned int flags, int *nonblocking) { BUG(); return 0; --- a/mm/gup.c~mm-gup-remove-vmas-array-from-internal-gup-functions +++ a/mm/gup.c @@ -1067,8 +1067,6 @@ static int check_vma_flags(struct vm_are * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. Or NULL, if caller * only intends to ensure the pages are faulted in. - * @vmas: array of pointers to vmas corresponding to each page. - * Or NULL if the caller does not require them. * @locked: whether we're still with the mmap_lock held * * Returns either number of pages pinned (which may be less than the @@ -1082,8 +1080,6 @@ static int check_vma_flags(struct vm_are * * The caller is responsible for releasing returned @pages, via put_page(). * - * @vmas are valid only as long as mmap_lock is held. - * * Must be called with mmap_lock held. It may be released. See below. * * __get_user_pages walks a process's page tables and takes a reference to @@ -1119,7 +1115,7 @@ static int check_vma_flags(struct vm_are static long __get_user_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) + int *locked) { long ret = 0, i = 0; struct vm_area_struct *vma = NULL; @@ -1159,9 +1155,9 @@ static long __get_user_pages(struct mm_s goto out; if (is_vm_hugetlb_page(vma)) { - i = follow_hugetlb_page(mm, vma, pages, vmas, - &start, &nr_pages, i, - gup_flags, locked); + i = follow_hugetlb_page(mm, vma, pages, + &start, &nr_pages, i, + gup_flags, locked); if (!*locked) { /* * We've got a VM_FAULT_RETRY @@ -1226,10 +1222,6 @@ retry: ctx.page_mask = 0; } next_page: - if (vmas) { - vmas[i] = vma; - ctx.page_mask = 0; - } page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask); if (page_increm > nr_pages) page_increm = nr_pages; @@ -1384,7 +1376,6 @@ static __always_inline long __get_user_p unsigned long start, unsigned long nr_pages, struct page **pages, - struct vm_area_struct **vmas, int *locked, unsigned int flags) { @@ -1422,7 +1413,7 @@ static __always_inline long __get_user_p pages_done = 0; for (;;) { ret = __get_user_pages(mm, start, nr_pages, flags, pages, - vmas, locked); + locked); if (!(flags & FOLL_UNLOCKABLE)) { /* VM_FAULT_RETRY couldn't trigger, bypass */ pages_done = ret; @@ -1486,7 +1477,7 @@ retry: *locked = 1; ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED, - pages, NULL, locked); + pages, locked); if (!*locked) { /* Continue to retry until we succeeded */ BUG_ON(ret != 0); @@ -1584,7 +1575,7 @@ long populate_vma_page_range(struct vm_a * not result in a stack expansion that recurses back here. */ ret = __get_user_pages(mm, start, nr_pages, gup_flags, - NULL, NULL, locked ? locked : &local_locked); + NULL, locked ? locked : &local_locked); lru_add_drain(); return ret; } @@ -1642,7 +1633,7 @@ long faultin_vma_page_range(struct vm_ar return -EINVAL; ret = __get_user_pages(mm, start, nr_pages, gup_flags, - NULL, NULL, locked); + NULL, locked); lru_add_drain(); return ret; } @@ -1710,8 +1701,7 @@ int __mm_populate(unsigned long start, u #else /* CONFIG_MMU */ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, - struct vm_area_struct **vmas, int *locked, - unsigned int foll_flags) + int *locked, unsigned int foll_flags) { struct vm_area_struct *vma; bool must_unlock = false; @@ -1755,8 +1745,7 @@ static long __get_user_pages_locked(stru if (pages[i]) get_page(pages[i]); } - if (vmas) - vmas[i] = vma; + start = (start + PAGE_SIZE) & PAGE_MASK; } @@ -1937,8 +1926,7 @@ struct page *get_dump_page(unsigned long int locked = 0; int ret; - ret = __get_user_pages_locked(current->mm, addr, 1, &page, NULL, - &locked, + ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked, FOLL_FORCE | FOLL_DUMP | FOLL_GET); return (ret == 1) ? page : NULL; } @@ -2111,7 +2099,6 @@ static long __gup_longterm_locked(struct unsigned long start, unsigned long nr_pages, struct page **pages, - struct vm_area_struct **vmas, int *locked, unsigned int gup_flags) { @@ -2119,13 +2106,13 @@ static long __gup_longterm_locked(struct long rc, nr_pinned_pages; if (!(gup_flags & FOLL_LONGTERM)) - return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, locked, gup_flags); flags = memalloc_pin_save(); do { nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages, - pages, vmas, locked, + pages, locked, gup_flags); if (nr_pinned_pages <= 0) { rc = nr_pinned_pages; @@ -2143,9 +2130,8 @@ static long __gup_longterm_locked(struct * Check that the given flags are valid for the exported gup/pup interface, and * update them with the required flags that the caller must have set. */ -static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas, - int *locked, unsigned int *gup_flags_p, - unsigned int to_set) +static bool is_valid_gup_args(struct page **pages, int *locked, + unsigned int *gup_flags_p, unsigned int to_set) { unsigned int gup_flags = *gup_flags_p; @@ -2187,13 +2173,6 @@ static bool is_valid_gup_args(struct pag (gup_flags & FOLL_PCI_P2PDMA))) return false; - /* - * Can't use VMAs with locked, as locked allows GUP to unlock - * which invalidates the vmas array - */ - if (WARN_ON_ONCE(vmas && (gup_flags & FOLL_UNLOCKABLE))) - return false; - *gup_flags_p = gup_flags; return true; } @@ -2262,11 +2241,11 @@ long get_user_pages_remote(struct mm_str { int local_locked = 1; - if (!is_valid_gup_args(pages, NULL, locked, &gup_flags, + if (!is_valid_gup_args(pages, locked, &gup_flags, FOLL_TOUCH | FOLL_REMOTE)) return -EINVAL; - return __get_user_pages_locked(mm, start, nr_pages, pages, NULL, + return __get_user_pages_locked(mm, start, nr_pages, pages, locked ? locked : &local_locked, gup_flags); } @@ -2301,11 +2280,11 @@ long get_user_pages(unsigned long start, { int locked = 1; - if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH)) + if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_TOUCH)) return -EINVAL; return __get_user_pages_locked(current->mm, start, nr_pages, pages, - NULL, &locked, gup_flags); + &locked, gup_flags); } EXPORT_SYMBOL(get_user_pages); @@ -2329,12 +2308,12 @@ long get_user_pages_unlocked(unsigned lo { int locked = 0; - if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, + if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_TOUCH | FOLL_UNLOCKABLE)) return -EINVAL; return __get_user_pages_locked(current->mm, start, nr_pages, pages, - NULL, &locked, gup_flags); + &locked, gup_flags); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -3124,7 +3103,7 @@ static int internal_get_user_pages_fast( start += nr_pinned << PAGE_SHIFT; pages += nr_pinned; ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned, - pages, NULL, &locked, + pages, &locked, gup_flags | FOLL_TOUCH | FOLL_UNLOCKABLE); if (ret < 0) { /* @@ -3166,7 +3145,7 @@ int get_user_pages_fast_only(unsigned lo * FOLL_FAST_ONLY is required in order to match the API description of * this routine: no fall back to regular ("slow") GUP. */ - if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, + if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET | FOLL_FAST_ONLY)) return -EINVAL; @@ -3199,7 +3178,7 @@ int get_user_pages_fast(unsigned long st * FOLL_GET, because gup fast is always a "pin with a +1 page refcount" * request. */ - if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_GET)) + if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } @@ -3224,7 +3203,7 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast); int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { - if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN)) + if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } @@ -3257,10 +3236,10 @@ long pin_user_pages_remote(struct mm_str { int local_locked = 1; - if (!is_valid_gup_args(pages, NULL, locked, &gup_flags, + if (!is_valid_gup_args(pages, locked, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE)) return 0; - return __gup_longterm_locked(mm, start, nr_pages, pages, NULL, + return __gup_longterm_locked(mm, start, nr_pages, pages, locked ? locked : &local_locked, gup_flags); } @@ -3286,10 +3265,10 @@ long pin_user_pages(unsigned long start, { int locked = 1; - if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN)) + if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return 0; return __gup_longterm_locked(current->mm, start, nr_pages, - pages, NULL, &locked, gup_flags); + pages, &locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages); @@ -3303,11 +3282,11 @@ long pin_user_pages_unlocked(unsigned lo { int locked = 0; - if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, + if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCKABLE)) return 0; - return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL, + return __gup_longterm_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages_unlocked); --- a/mm/hugetlb.c~mm-gup-remove-vmas-array-from-internal-gup-functions +++ a/mm/hugetlb.c @@ -6425,17 +6425,14 @@ out_release_nounlock: } #endif /* CONFIG_USERFAULTFD */ -static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma, - int refs, struct page **pages, - struct vm_area_struct **vmas) +static void record_subpages(struct page *page, struct vm_area_struct *vma, + int refs, struct page **pages) { int nr; for (nr = 0; nr < refs; nr++) { if (likely(pages)) pages[nr] = nth_page(page, nr); - if (vmas) - vmas[nr] = vma; } } @@ -6508,9 +6505,9 @@ out_unlock: } long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *position, unsigned long *nr_pages, - long i, unsigned int flags, int *locked) + struct page **pages, unsigned long *position, + unsigned long *nr_pages, long i, unsigned int flags, + int *locked) { unsigned long pfn_offset; unsigned long vaddr = *position; @@ -6638,7 +6635,7 @@ long follow_hugetlb_page(struct mm_struc * If subpage information not requested, update counters * and skip the same_page loop below. */ - if (!pages && !vmas && !pfn_offset && + if (!pages && !pfn_offset && (vaddr + huge_page_size(h) < vma->vm_end) && (remainder >= pages_per_huge_page(h))) { vaddr += huge_page_size(h); @@ -6653,11 +6650,10 @@ long follow_hugetlb_page(struct mm_struc refs = min3(pages_per_huge_page(h) - pfn_offset, remainder, (vma->vm_end - ALIGN_DOWN(vaddr, PAGE_SIZE)) >> PAGE_SHIFT); - if (pages || vmas) - record_subpages_vmas(nth_page(page, pfn_offset), - vma, refs, - likely(pages) ? pages + i : NULL, - vmas ? vmas + i : NULL); + if (pages) + record_subpages(nth_page(page, pfn_offset), + vma, refs, + likely(pages) ? pages + i : NULL); if (pages) { /* _ Patches currently in -mm which might be from lstoakes@xxxxxxxxx are mm-mmap-separate-writenotify-and-dirty-tracking-logic.patch mm-gup-disallow-foll_longterm-gup-nonfast-writing-to-file-backed-mappings.patch mm-gup-disallow-foll_longterm-gup-fast-writing-to-file-backed-mappings.patch mm-gup-add-missing-gup_must_unshare-check-to-gup_huge_pgd.patch mm-gup-remove-unused-vmas-parameter-from-get_user_pages.patch mm-gup-remove-unused-vmas-parameter-from-pin_user_pages_remote.patch mm-gup-remove-vmas-parameter-from-get_user_pages_remote.patch io_uring-rsrc-delegate-vma-file-backed-check-to-gup.patch mm-gup-remove-vmas-parameter-from-pin_user_pages.patch mm-gup-remove-vmas-array-from-internal-gup-functions.patch