On Fri, Dec 04, 2020 at 03:39:52PM -0400, Jason Gunthorpe wrote: > Long ago there wasn't a FOLL_LONGTERM flag so this DAX check was done by > post-processing the VMA list. > > These days it is trivial to just check each VMA to see if it is DAX before > processing it inside __get_user_pages() and return failure if a DAX VMA is > encountered with FOLL_LONGTERM. > > Removing the allocation of the VMA list is a significant speed up for many > call sites. > > Add an IS_ENABLED to vma_is_fsdax so that code generation is unchanged > when DAX is compiled out. > > Remove the dummy version of __gup_longterm_locked() as !CONFIG_CMA already > makes memalloc_nocma_save(), check_and_migrate_cma_pages(), and > memalloc_nocma_restore() into a NOP. > > Cc: Dan Williams <dan.j.williams@xxxxxxxxx> > Cc: Ira Weiny <ira.weiny@xxxxxxxxx> > Cc: John Hubbard <jhubbard@xxxxxxxxxx> > Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > --- > include/linux/fs.h | 2 +- > mm/gup.c | 83 +++++++++------------------------------------- > 2 files changed, 16 insertions(+), 69 deletions(-) > > This was tested using the fake nvdimm stuff and RDMA's FOLL_LONGTERM pin > continues to correctly reject DAX vmas and returns EOPNOTSUPP > > Pavel, this accomplishes the same #ifdef clean up as your patch series for CMA > by just deleting all the code that justified the ifdefs. > > FWIW, this is probably going to be the start of a longer trickle of patches to > make pin_user_pages()/unpin_user_pages() faster. This flow is offensively slow > right now. > > Ira, I investigated streamlining the callers from here, and you are right. > The distinction that FOLL_LONGTERM means locked == NULL is no longer required > now that the vma list isn't used, and with some adjusting of the CMA path we > can purge out a lot of other complexity too. > > I have some drafts, but I want to tackle this separately. > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 8667d0cdc71e76..1fcc2b00582b22 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -3230,7 +3230,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma) > { > struct inode *inode; > > - if (!vma->vm_file) > + if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file) > return false; Just a minor nit on this change. Is this still called in frame_vector.c? I thought I saw a patch from Daniel Vetter which removed that? Ah yea... https://lore.kernel.org/lkml/20201127164131.2244124-6-daniel.vetter@xxxxxxxx/ Regardless even if vma_is_fsdax() is still called there I don't think this will change anything there, other than making that code faster when FS_DAX is not configured. So perfect... Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx> > if (!vma_is_dax(vma)) > return false; > diff --git a/mm/gup.c b/mm/gup.c > index 9c6a2f5001c5c2..311a44ff41ff42 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -923,6 +923,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) > if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma)) > return -EFAULT; > > + if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) > + return -EOPNOTSUPP; > + > if (write) { > if (!(vm_flags & VM_WRITE)) { > if (!(gup_flags & FOLL_FORCE)) > @@ -1060,10 +1063,14 @@ static long __get_user_pages(struct mm_struct *mm, > goto next_page; > } > > - if (!vma || check_vma_flags(vma, gup_flags)) { > + if (!vma) { > ret = -EFAULT; > goto out; > } > + ret = check_vma_flags(vma, gup_flags); > + if (ret) > + goto out; > + > if (is_vm_hugetlb_page(vma)) { > i = follow_hugetlb_page(mm, vma, pages, vmas, > &start, &nr_pages, i, > @@ -1567,26 +1574,6 @@ struct page *get_dump_page(unsigned long addr) > } > #endif /* CONFIG_ELF_CORE */ > > -#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA) > -static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) > -{ > - long i; > - struct vm_area_struct *vma_prev = NULL; > - > - for (i = 0; i < nr_pages; i++) { > - struct vm_area_struct *vma = vmas[i]; > - > - if (vma == vma_prev) > - continue; > - > - vma_prev = vma; > - > - if (vma_is_fsdax(vma)) > - return true; > - } > - return false; > -} > - > #ifdef CONFIG_CMA > static long check_and_migrate_cma_pages(struct mm_struct *mm, > unsigned long start, > @@ -1705,63 +1692,23 @@ static long __gup_longterm_locked(struct mm_struct *mm, > struct vm_area_struct **vmas, > unsigned int gup_flags) > { > - struct vm_area_struct **vmas_tmp = vmas; > unsigned long flags = 0; > - long rc, i; > + long rc; > > - if (gup_flags & FOLL_LONGTERM) { > - if (!pages) > - return -EINVAL; > - > - if (!vmas_tmp) { > - vmas_tmp = kcalloc(nr_pages, > - sizeof(struct vm_area_struct *), > - GFP_KERNEL); > - if (!vmas_tmp) > - return -ENOMEM; > - } > + if (gup_flags & FOLL_LONGTERM) > flags = memalloc_nocma_save(); > - } > > - rc = __get_user_pages_locked(mm, start, nr_pages, pages, > - vmas_tmp, NULL, gup_flags); > + rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, > + gup_flags); > > if (gup_flags & FOLL_LONGTERM) { > - if (rc < 0) > - goto out; > - > - if (check_dax_vmas(vmas_tmp, rc)) { > - if (gup_flags & FOLL_PIN) > - unpin_user_pages(pages, rc); > - else > - for (i = 0; i < rc; i++) > - put_page(pages[i]); > - rc = -EOPNOTSUPP; > - goto out; > - } > - > - rc = check_and_migrate_cma_pages(mm, start, rc, pages, > - vmas_tmp, gup_flags); > -out: > + if (rc > 0) > + rc = check_and_migrate_cma_pages(mm, start, rc, pages, > + vmas, gup_flags); > memalloc_nocma_restore(flags); > } > - > - if (vmas_tmp != vmas) > - kfree(vmas_tmp); > return rc; > } > -#else /* !CONFIG_FS_DAX && !CONFIG_CMA */ > -static __always_inline long __gup_longterm_locked(struct mm_struct *mm, > - unsigned long start, > - unsigned long nr_pages, > - struct page **pages, > - struct vm_area_struct **vmas, > - unsigned int flags) > -{ > - return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, > - NULL, flags); > -} > -#endif /* CONFIG_FS_DAX || CONFIG_CMA */ > > static bool is_valid_gup_flags(unsigned int gup_flags) > { > -- > 2.29.2 >