Re: [PATCH v3 5/5] try_to_unmap_one: batched remove rmap, update folio refcount

"Yin, Fengwei" <fengwei.yin@xxxxxxxxx> · Tue, 7 Mar 2023 02:45:05 +0000



On Mon, 2023-03-06 at 20:39 +0800, haoxin wrote:
> 
> 在 2023/3/6 下午5:22, Yin Fengwei 写道:
> > If unmap one page fails, or the vma walk will skip next pte,
> > or the vma walk will end on next pte, batched remove map,
> > update folio refcount.
> > 
> > Signed-off-by: Yin Fengwei <fengwei.yin@xxxxxxxxx>
> > ---
> >   include/linux/rmap.h |  1 +
> >   mm/page_vma_mapped.c | 30 +++++++++++++++++++++++++++
> >   mm/rmap.c            | 48 ++++++++++++++++++++++++++++++++++-----
> > -----
> >   3 files changed, 68 insertions(+), 11 deletions(-)
> > 
> > diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> > index d2569b42e21a..18193d1d5a8e 100644
> > --- a/include/linux/rmap.h
> > +++ b/include/linux/rmap.h
> > @@ -424,6 +424,7 @@ static inline void
> > page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
> >   }
> >   
> >   bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
> > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk
> > *pvmw);
> >   
> >   /*
> >    * Used by swapoff to help locate where page is expected in vma.
> > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> > index 4e448cfbc6ef..19e997dfb5c6 100644
> > --- a/mm/page_vma_mapped.c
> > +++ b/mm/page_vma_mapped.c
> > @@ -291,6 +291,36 @@ bool page_vma_mapped_walk(struct
> > page_vma_mapped_walk *pvmw)
> >         return false;
> >   }
> >   
> > +/**
> > + * pvmw_walk_skip_or_end_on_next - check if next pte will be
> > skipped or
> > + *                                 end the walk
> > + * @pvmw: pointer to struct page_vma_mapped_walk.
> > + *
> > + * This function can only be called with correct pte lock hold
> > + */
> > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk
> > *pvmw)
> > +{
> > +       unsigned long address = pvmw->address + PAGE_SIZE;
> > +
> > +       if (address >= vma_address_end(pvmw))
> > +               return true;
> 
> If vma_address_end is exactly equal to next address(pvmw->address + 
> PAGE_SIZE) , does this mean that we are ignored to unmap the last
> page 
> here ? so
> 
> there can just use ' > '      ' if (address > vma_address_end(pvmw))'
> .
This check will be done after the last PTE is handled. Thanks.


Regards
Yin, Fengwei

> 
> I may have misunderstood, please correct me.
> 
> > +
> > +       if ((address & (PMD_SIZE - PAGE_SIZE)) == 0)
> > +               return true;
> > +
> > +       if (pte_none(*pvmw->pte))
> > +               return true;
> > +
> > +       pvmw->pte++;
> > +       if (!check_pte(pvmw)) {
> > +               pvmw->pte--;
> > +               return true;
> > +       }
> > +       pvmw->pte--;
> > +
> > +       return false;
> > +}
> > +
> >   /**
> >    * page_mapped_in_vma - check whether a page is really mapped in
> > a VMA
> >    * @page: the page to test
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index bb3fcb8df579..a64e9cbb52dd 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -1741,6 +1741,26 @@ static bool try_to_unmap_one_page(struct
> > folio *folio,
> >         return false;
> >   }
> >   
> > +static void folio_remove_rmap_and_update_count(struct folio
> > *folio,
> > +               struct page *start, struct vm_area_struct *vma, int
> > count)
> > +{
> > +       if (count == 0)
> > +               return;
> > +
> > +       /*
> > +        * No need to call mmu_notifier_invalidate_range() it has
> > be
> > +        * done above for all cases requiring it to happen under
> > page
> > +        * table lock before mmu_notifier_invalidate_range_end()
> > +        *
> > +        * See Documentation/mm/mmu_notifier.rst
> > +        */
> > +       folio_remove_rmap_range(folio, start, count, vma,
> > +                                       folio_test_hugetlb(folio));
> > +       if (vma->vm_flags & VM_LOCKED)
> > +               mlock_drain_local();
> > +       folio_ref_sub(folio, count);
> > +}
> > +
> >   /*
> >    * @arg: enum ttu_flags will be passed to this argument
> >    */
> > @@ -1748,10 +1768,11 @@ static bool try_to_unmap_one(struct folio
> > *folio, struct vm_area_struct *vma,
> >                      unsigned long address, void *arg)
> >   {
> >         DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
> > -       struct page *subpage;
> > +       struct page *start = NULL;
> >         bool ret = true;
> >         struct mmu_notifier_range range;
> >         enum ttu_flags flags = (enum ttu_flags)(long)arg;
> > +       int count = 0;
> >   
> >         /*
> >          * When racing against e.g. zap_pte_range() on another cpu,
> > @@ -1812,26 +1833,31 @@ static bool try_to_unmap_one(struct folio
> > *folio, struct vm_area_struct *vma,
> >                         break;
> >                 }
> >   
> > -               subpage = folio_page(folio,
> > +               if (!start)
> > +                       start = folio_page(folio,
> >                                         pte_pfn(*pvmw.pte) -
> > folio_pfn(folio));
> >                 ret = try_to_unmap_one_page(folio, vma,
> >                                                 range, pvmw,
> > address, flags);
> >                 if (!ret) {
> > +                       folio_remove_rmap_and_update_count(folio,
> > +                                                       start, vma,
> > count);
> >                         page_vma_mapped_walk_done(&pvmw);
> >                         break;
> >                 }
> > +               count++;
> >   
> >                 /*
> > -                * No need to call mmu_notifier_invalidate_range()
> > it has be
> > -                * done above for all cases requiring it to happen
> > under page
> > -                * table lock before
> > mmu_notifier_invalidate_range_end()
> > -                *
> > -                * See Documentation/mm/mmu_notifier.rst
> > +                * If next pte will be skipped in
> > page_vma_mapped_walk() or
> > +                * the walk will end at it, batched remove rmap and
> > update
> > +                * page refcount. We can't do it after
> > page_vma_mapped_walk()
> > +                * return false because the pte lock will not be
> > hold.
> >                  */
> > -               page_remove_rmap(subpage, vma, false);
> > -               if (vma->vm_flags & VM_LOCKED)
> > -                       mlock_drain_local();
> > -               folio_put(folio);
> > +               if (pvmw_walk_skip_or_end_on_next(&pvmw)) {
> > +                       folio_remove_rmap_and_update_count(folio,
> > +                                                       start, vma,
> > count);
> > +                       count = 0;
> > +                       start = NULL;
> > +               }
> >         }
> >   
> >         mmu_notifier_invalidate_range_end(&range);