On Mon, 2023-03-06 at 20:39 +0800, haoxin wrote: > > 在 2023/3/6 下午5:22, Yin Fengwei 写道: > > If unmap one page fails, or the vma walk will skip next pte, > > or the vma walk will end on next pte, batched remove map, > > update folio refcount. > > > > Signed-off-by: Yin Fengwei <fengwei.yin@xxxxxxxxx> > > --- > > include/linux/rmap.h | 1 + > > mm/page_vma_mapped.c | 30 +++++++++++++++++++++++++++ > > mm/rmap.c | 48 ++++++++++++++++++++++++++++++++++----- > > ----- > > 3 files changed, 68 insertions(+), 11 deletions(-) > > > > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > > index d2569b42e21a..18193d1d5a8e 100644 > > --- a/include/linux/rmap.h > > +++ b/include/linux/rmap.h > > @@ -424,6 +424,7 @@ static inline void > > page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) > > } > > > > bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); > > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk > > *pvmw); > > > > /* > > * Used by swapoff to help locate where page is expected in vma. > > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c > > index 4e448cfbc6ef..19e997dfb5c6 100644 > > --- a/mm/page_vma_mapped.c > > +++ b/mm/page_vma_mapped.c > > @@ -291,6 +291,36 @@ bool page_vma_mapped_walk(struct > > page_vma_mapped_walk *pvmw) > > return false; > > } > > > > +/** > > + * pvmw_walk_skip_or_end_on_next - check if next pte will be > > skipped or > > + * end the walk > > + * @pvmw: pointer to struct page_vma_mapped_walk. > > + * > > + * This function can only be called with correct pte lock hold > > + */ > > +bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk > > *pvmw) > > +{ > > + unsigned long address = pvmw->address + PAGE_SIZE; > > + > > + if (address >= vma_address_end(pvmw)) > > + return true; > > If vma_address_end is exactly equal to next address(pvmw->address + > PAGE_SIZE) , does this mean that we are ignored to unmap the last > page > here ? so > > there can just use ' > ' ' if (address > vma_address_end(pvmw))' > . This check will be done after the last PTE is handled. Thanks. Regards Yin, Fengwei > > I may have misunderstood, please correct me. > > > + > > + if ((address & (PMD_SIZE - PAGE_SIZE)) == 0) > > + return true; > > + > > + if (pte_none(*pvmw->pte)) > > + return true; > > + > > + pvmw->pte++; > > + if (!check_pte(pvmw)) { > > + pvmw->pte--; > > + return true; > > + } > > + pvmw->pte--; > > + > > + return false; > > +} > > + > > /** > > * page_mapped_in_vma - check whether a page is really mapped in > > a VMA > > * @page: the page to test > > diff --git a/mm/rmap.c b/mm/rmap.c > > index bb3fcb8df579..a64e9cbb52dd 100644 > > --- a/mm/rmap.c > > +++ b/mm/rmap.c > > @@ -1741,6 +1741,26 @@ static bool try_to_unmap_one_page(struct > > folio *folio, > > return false; > > } > > > > +static void folio_remove_rmap_and_update_count(struct folio > > *folio, > > + struct page *start, struct vm_area_struct *vma, int > > count) > > +{ > > + if (count == 0) > > + return; > > + > > + /* > > + * No need to call mmu_notifier_invalidate_range() it has > > be > > + * done above for all cases requiring it to happen under > > page > > + * table lock before mmu_notifier_invalidate_range_end() > > + * > > + * See Documentation/mm/mmu_notifier.rst > > + */ > > + folio_remove_rmap_range(folio, start, count, vma, > > + folio_test_hugetlb(folio)); > > + if (vma->vm_flags & VM_LOCKED) > > + mlock_drain_local(); > > + folio_ref_sub(folio, count); > > +} > > + > > /* > > * @arg: enum ttu_flags will be passed to this argument > > */ > > @@ -1748,10 +1768,11 @@ static bool try_to_unmap_one(struct folio > > *folio, struct vm_area_struct *vma, > > unsigned long address, void *arg) > > { > > DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); > > - struct page *subpage; > > + struct page *start = NULL; > > bool ret = true; > > struct mmu_notifier_range range; > > enum ttu_flags flags = (enum ttu_flags)(long)arg; > > + int count = 0; > > > > /* > > * When racing against e.g. zap_pte_range() on another cpu, > > @@ -1812,26 +1833,31 @@ static bool try_to_unmap_one(struct folio > > *folio, struct vm_area_struct *vma, > > break; > > } > > > > - subpage = folio_page(folio, > > + if (!start) > > + start = folio_page(folio, > > pte_pfn(*pvmw.pte) - > > folio_pfn(folio)); > > ret = try_to_unmap_one_page(folio, vma, > > range, pvmw, > > address, flags); > > if (!ret) { > > + folio_remove_rmap_and_update_count(folio, > > + start, vma, > > count); > > page_vma_mapped_walk_done(&pvmw); > > break; > > } > > + count++; > > > > /* > > - * No need to call mmu_notifier_invalidate_range() > > it has be > > - * done above for all cases requiring it to happen > > under page > > - * table lock before > > mmu_notifier_invalidate_range_end() > > - * > > - * See Documentation/mm/mmu_notifier.rst > > + * If next pte will be skipped in > > page_vma_mapped_walk() or > > + * the walk will end at it, batched remove rmap and > > update > > + * page refcount. We can't do it after > > page_vma_mapped_walk() > > + * return false because the pte lock will not be > > hold. > > */ > > - page_remove_rmap(subpage, vma, false); > > - if (vma->vm_flags & VM_LOCKED) > > - mlock_drain_local(); > > - folio_put(folio); > > + if (pvmw_walk_skip_or_end_on_next(&pvmw)) { > > + folio_remove_rmap_and_update_count(folio, > > + start, vma, > > count); > > + count = 0; > > + start = NULL; > > + } > > } > > > > mmu_notifier_invalidate_range_end(&range);