On Fri, Oct 20, 2023 at 11:34 AM Vishal Moola (Oracle) <vishal.moola@xxxxxxxxx> wrote: > > This removes 2 calls to compound_head() and helps convert khugepaged to > use folios throughout. > > Previously, if the address passed to collapse_pte_mapped_thp() > corresponded to a tail page, the scan would fail immediately. Using > filemap_lock_folio() we get the corresponding folio back and try to > operate on the folio instead. > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@xxxxxxxxx> Reviewed-by: Yang Shi <shy828301@xxxxxxxxx> > --- > mm/khugepaged.c | 45 ++++++++++++++++++++------------------------- > 1 file changed, 20 insertions(+), 25 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 6a7184cd291b..bc2d8ff269c7 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1477,7 +1477,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > bool notified = false; > unsigned long haddr = addr & HPAGE_PMD_MASK; > struct vm_area_struct *vma = vma_lookup(mm, haddr); > - struct page *hpage; > + struct folio *folio; > pte_t *start_pte, *pte; > pmd_t *pmd, pgt_pmd; > spinlock_t *pml = NULL, *ptl; > @@ -1510,19 +1510,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > if (userfaultfd_wp(vma)) > return SCAN_PTE_UFFD_WP; > > - hpage = find_lock_page(vma->vm_file->f_mapping, > + folio = filemap_lock_folio(vma->vm_file->f_mapping, > linear_page_index(vma, haddr)); > - if (!hpage) > + if (IS_ERR(folio)) > return SCAN_PAGE_NULL; > > - if (!PageHead(hpage)) { > - result = SCAN_FAIL; > - goto drop_hpage; > - } > - > - if (compound_order(hpage) != HPAGE_PMD_ORDER) { > + if (folio_order(folio) != HPAGE_PMD_ORDER) { > result = SCAN_PAGE_COMPOUND; > - goto drop_hpage; > + goto drop_folio; > } > > result = find_pmd_or_thp_or_none(mm, haddr, &pmd); > @@ -1536,13 +1531,13 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > */ > goto maybe_install_pmd; > default: > - goto drop_hpage; > + goto drop_folio; > } > > result = SCAN_FAIL; > start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); > if (!start_pte) /* mmap_lock + page lock should prevent this */ > - goto drop_hpage; > + goto drop_folio; > > /* step 1: check all mapped PTEs are to the right huge page */ > for (i = 0, addr = haddr, pte = start_pte; > @@ -1567,7 +1562,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > * Note that uprobe, debugger, or MAP_PRIVATE may change the > * page table, but the new page will not be a subpage of hpage. > */ > - if (hpage + i != page) > + if (folio_page(folio, i) != page) > goto abort; > } > > @@ -1582,7 +1577,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > * page_table_lock) ptl nests inside pml. The less time we hold pml, > * the better; but userfaultfd's mfill_atomic_pte() on a private VMA > * inserts a valid as-if-COWed PTE without even looking up page cache. > - * So page lock of hpage does not protect from it, so we must not drop > + * So page lock of folio does not protect from it, so we must not drop > * ptl before pgt_pmd is removed, so uffd private needs pml taken now. > */ > if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED)) > @@ -1606,7 +1601,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > continue; > /* > * We dropped ptl after the first scan, to do the mmu_notifier: > - * page lock stops more PTEs of the hpage being faulted in, but > + * page lock stops more PTEs of the folio being faulted in, but > * does not stop write faults COWing anon copies from existing > * PTEs; and does not stop those being swapped out or migrated. > */ > @@ -1615,7 +1610,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > goto abort; > } > page = vm_normal_page(vma, addr, ptent); > - if (hpage + i != page) > + if (folio_page(folio, i) != page) > goto abort; > > /* > @@ -1634,8 +1629,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > > /* step 3: set proper refcount and mm_counters. */ > if (nr_ptes) { > - page_ref_sub(hpage, nr_ptes); > - add_mm_counter(mm, mm_counter_file(hpage), -nr_ptes); > + folio_ref_sub(folio, nr_ptes); > + add_mm_counter(mm, mm_counter_file(&folio->page), -nr_ptes); > } > > /* step 4: remove empty page table */ > @@ -1659,14 +1654,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > maybe_install_pmd: > /* step 5: install pmd entry */ > result = install_pmd > - ? set_huge_pmd(vma, haddr, pmd, hpage) > + ? set_huge_pmd(vma, haddr, pmd, &folio->page) > : SCAN_SUCCEED; > - goto drop_hpage; > + goto drop_folio; > abort: > if (nr_ptes) { > flush_tlb_mm(mm); > - page_ref_sub(hpage, nr_ptes); > - add_mm_counter(mm, mm_counter_file(hpage), -nr_ptes); > + folio_ref_sub(folio, nr_ptes); > + add_mm_counter(mm, mm_counter_file(&folio->page), -nr_ptes); > } > if (start_pte) > pte_unmap_unlock(start_pte, ptl); > @@ -1674,9 +1669,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > spin_unlock(pml); > if (notified) > mmu_notifier_invalidate_range_end(&range); > -drop_hpage: > - unlock_page(hpage); > - put_page(hpage); > +drop_folio: > + folio_unlock(folio); > + folio_put(folio); > return result; > } > > -- > 2.40.1 >