On 20/12/2023 22:44, David Hildenbrand wrote: > Let's convert zap_pte_range() and closely-related > tlb_flush_rmap_batch(). While at it, perform some more folio conversion > in zap_pte_range(). > > Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> > --- > mm/memory.c | 23 +++++++++++++---------- > mm/mmu_gather.c | 2 +- > 2 files changed, 14 insertions(+), 11 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 6552ea27b0bfa..eda2181275d9b 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -1434,6 +1434,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > arch_enter_lazy_mmu_mode(); > do { > pte_t ptent = ptep_get(pte); > + struct folio *folio; > struct page *page; > > if (pte_none(ptent)) > @@ -1459,21 +1460,22 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > continue; > } > > + folio = page_folio(page); > delay_rmap = 0; > - if (!PageAnon(page)) { > + if (!folio_test_anon(folio)) { > if (pte_dirty(ptent)) { > - set_page_dirty(page); > + folio_set_dirty(folio); Is this foliation change definitely correct? I note that set_page_dirty() is defined as this: bool set_page_dirty(struct page *page) { return folio_mark_dirty(page_folio(page)); } And folio_mark_dirty() is doing more than just setting teh PG_dirty bit. In my equivalent change, as part of the contpte series, I've swapped set_page_dirty() for folio_mark_dirty(). > if (tlb_delay_rmap(tlb)) { > delay_rmap = 1; > force_flush = 1; > } > } > if (pte_young(ptent) && likely(vma_has_recency(vma))) > - mark_page_accessed(page); > + folio_mark_accessed(folio); > } > rss[mm_counter(page)]--; > if (!delay_rmap) { > - page_remove_rmap(page, vma, false); > + folio_remove_rmap_pte(folio, page, vma); > if (unlikely(page_mapcount(page) < 0)) > print_bad_pte(vma, addr, ptent, page); > } > @@ -1489,6 +1491,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > if (is_device_private_entry(entry) || > is_device_exclusive_entry(entry)) { > page = pfn_swap_entry_to_page(entry); > + folio = page_folio(page); > if (unlikely(!should_zap_page(details, page))) > continue; > /* > @@ -1500,8 +1503,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > WARN_ON_ONCE(!vma_is_anonymous(vma)); > rss[mm_counter(page)]--; > if (is_device_private_entry(entry)) > - page_remove_rmap(page, vma, false); > - put_page(page); > + folio_remove_rmap_pte(folio, page, vma); > + folio_put(folio); > } else if (!non_swap_entry(entry)) { > /* Genuine swap entry, hence a private anon page */ > if (!should_zap_cows(details)) > @@ -3220,10 +3223,10 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > * threads. > * > * The critical issue is to order this > - * page_remove_rmap with the ptp_clear_flush above. > - * Those stores are ordered by (if nothing else,) > + * folio_remove_rmap_pte() with the ptp_clear_flush > + * above. Those stores are ordered by (if nothing else,) > * the barrier present in the atomic_add_negative > - * in page_remove_rmap. > + * in folio_remove_rmap_pte(); > * > * Then the TLB flush in ptep_clear_flush ensures that > * no process can access the old page before the > @@ -3232,7 +3235,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > * mapcount is visible. So transitively, TLBs to > * old page will be flushed before it can be reused. > */ > - page_remove_rmap(vmf->page, vma, false); > + folio_remove_rmap_pte(old_folio, vmf->page, vma); > } > > /* Free the old page.. */ > diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c > index 4f559f4ddd217..604ddf08affed 100644 > --- a/mm/mmu_gather.c > +++ b/mm/mmu_gather.c > @@ -55,7 +55,7 @@ static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_ > > if (encoded_page_flags(enc)) { > struct page *page = encoded_page_ptr(enc); > - page_remove_rmap(page, vma, false); > + folio_remove_rmap_pte(page_folio(page), page, vma); > } > } > }