On 16/12/2024 16:50, Dev Jain wrote: > Scale down the scan range and the sysfs tunables according to the scan order, > and isolate the folios. > > Signed-off-by: Dev Jain <dev.jain@xxxxxxx> > --- > mm/khugepaged.c | 19 +++++++++++-------- > 1 file changed, 11 insertions(+), 8 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index f52dae7d5179..de044b1f83d4 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -564,15 +564,18 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > unsigned long address, > pte_t *pte, > struct collapse_control *cc, > - struct list_head *compound_pagelist) > + struct list_head *compound_pagelist, int order) > { > - struct page *page = NULL; > - struct folio *folio = NULL; > - pte_t *_pte; > + unsigned int max_ptes_shared = khugepaged_max_ptes_shared >> (HPAGE_PMD_ORDER - order); > + unsigned int max_ptes_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order); This is implicitly rounding down. I think that's the right thing to do; it's better to be conservative. > int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0; > + struct folio *folio = NULL; > + struct page *page = NULL; > bool writable = false; > + pte_t *_pte; > > - for (_pte = pte; _pte < pte + HPAGE_PMD_NR; > + > + for (_pte = pte; _pte < pte + (1UL << order); > _pte++, address += PAGE_SIZE) { > pte_t pteval = ptep_get(_pte); > if (pte_none(pteval) || (pte_present(pteval) && > @@ -580,7 +583,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > ++none_or_zero; > if (!userfaultfd_armed(vma) && > (!cc->is_khugepaged || > - none_or_zero <= khugepaged_max_ptes_none)) { > + none_or_zero <= max_ptes_none)) { > continue; > } else { > result = SCAN_EXCEED_NONE_PTE; > @@ -609,7 +612,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > if (folio_likely_mapped_shared(folio)) { > ++shared; > if (cc->is_khugepaged && > - shared > khugepaged_max_ptes_shared) { > + shared > max_ptes_shared) { > result = SCAN_EXCEED_SHARED_PTE; > count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); > goto out; > @@ -1200,7 +1203,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, > pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl); > if (pte) { > result = __collapse_huge_page_isolate(vma, address, pte, cc, > - &compound_pagelist); > + &compound_pagelist, order); > spin_unlock(pte_ptl); > } else { > result = SCAN_PMD_NULL;