From: John Hubbard <jhubbard@xxxxxxxxxx> This is the "vaddr_pin_pages" corresponding variant to get_user_pages_remote(), but with FOLL_PIN semantics: the implementation sets FOLL_PIN. That, in turn, means that the pages must ultimately be released by put_user_page*()--typically, via vaddr_unpin_pages*(). Note that the put_user_page*() requirement won't be truly required until all of the call sites have been converted, and the tracking of pages is actually activated. Also introduce vaddr_unpin_pages(), in order to have a simpler call for the error handling cases. Use both of these new calls in the Infiniband drive, replacing get_user_pages_remote() and put_user_pages(). Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx> --- drivers/infiniband/core/umem_odp.c | 15 +++++---- include/linux/mm.h | 7 +++++ mm/gup.c | 50 ++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 53085896d718..fdff034a8a30 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -534,7 +534,7 @@ static int ib_umem_odp_map_dma_single_page( } out: - put_user_page(page); + vaddr_unpin_pages(&page, 1, &umem_odp->umem.vaddr_pin); if (remove_existing_mapping) { ib_umem_notifier_start_account(umem_odp); @@ -635,9 +635,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * complex (and doesn't gain us much performance in most use * cases). */ - npages = get_user_pages_remote(owning_process, owning_mm, + npages = vaddr_pin_pages_remote(owning_process, owning_mm, user_virt, gup_num_pages, - flags, local_page_list, NULL, NULL); + flags, local_page_list, NULL, NULL, + &umem_odp->umem.vaddr_pin); up_read(&owning_mm->mmap_sem); if (npages < 0) { @@ -657,7 +658,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = -EFAULT; break; } - put_user_page(local_page_list[j]); + vaddr_unpin_pages(&local_page_list[j], 1, + &umem_odp->umem.vaddr_pin); continue; } @@ -684,8 +686,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * ib_umem_odp_map_dma_single_page(). */ if (npages - (j + 1) > 0) - put_user_pages(&local_page_list[j+1], - npages - (j + 1)); + vaddr_unpin_pages(&local_page_list[j+1], + npages - (j + 1), + &umem_odp->umem.vaddr_pin); break; } } diff --git a/include/linux/mm.h b/include/linux/mm.h index 61b616cd9243..2bd76ad8787e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1606,6 +1606,13 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, long vaddr_pin_pages(unsigned long addr, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vaddr_pin *vaddr_pin); +long vaddr_pin_pages_remote(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas, int *locked, + struct vaddr_pin *vaddr_pin); +void vaddr_unpin_pages(struct page **pages, unsigned long nr_pages, + struct vaddr_pin *vaddr_pin); void vaddr_unpin_pages_dirty_lock(struct page **pages, unsigned long nr_pages, struct vaddr_pin *vaddr_pin, bool make_dirty); bool mapping_inode_has_layout(struct vaddr_pin *vaddr_pin, struct page *page); diff --git a/mm/gup.c b/mm/gup.c index 85f09958fbdc..bb95adfaf9b6 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2518,6 +2518,38 @@ long vaddr_pin_pages(unsigned long addr, unsigned long nr_pages, } EXPORT_SYMBOL(vaddr_pin_pages); +/** + * vaddr_pin_pages pin pages by virtual address and return the pages to the + * user. + * + * @tsk: the task_struct to use for page fault accounting, or + * NULL if faults are not to be recorded. + * @mm: mm_struct of target mm + * @addr: start address + * @nr_pages: number of pages to pin + * @gup_flags: flags to use for the pin + * @pages: array of pages returned + * @vaddr_pin: initialized meta information this pin is to be associated + * with. + * + * This is the "vaddr_pin_pages" corresponding variant to + * get_user_pages_remote(), but with FOLL_PIN semantics: the implementation sets + * FOLL_PIN. That, in turn, means that the pages must ultimately be released + * by put_user_page(). + */ +long vaddr_pin_pages_remote(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas, int *locked, + struct vaddr_pin *vaddr_pin) +{ + gup_flags |= FOLL_TOUCH | FOLL_REMOTE | FOLL_PIN; + + return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + locked, gup_flags, vaddr_pin); +} +EXPORT_SYMBOL(vaddr_pin_pages_remote); + /** * vaddr_unpin_pages_dirty_lock - counterpart to vaddr_pin_pages * @@ -2536,3 +2568,21 @@ void vaddr_unpin_pages_dirty_lock(struct page **pages, unsigned long nr_pages, __put_user_pages_dirty_lock(vaddr_pin, pages, nr_pages, make_dirty); } EXPORT_SYMBOL(vaddr_unpin_pages_dirty_lock); + +/** + * vaddr_unpin_pages - simple, non-dirtying counterpart to vaddr_pin_pages + * + * @pages: array of pages returned + * @nr_pages: number of pages in pages + * @vaddr_pin: same information passed to vaddr_pin_pages + * + * Like vaddr_unpin_pages_dirty_lock, but for non-dirty pages. Useful in putting + * back pages in an error case: they were never made dirty. + */ +void vaddr_unpin_pages(struct page **pages, unsigned long nr_pages, + struct vaddr_pin *vaddr_pin) +{ + __put_user_pages_dirty_lock(vaddr_pin, pages, nr_pages, false); +} +EXPORT_SYMBOL(vaddr_unpin_pages); + -- 2.22.0