On Fri, Jun 3, 2022 at 5:40 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote: > > Add enforce_page_heuristics flag to struct collapse_control that allows > context to ignore heuristics originally designed to guide khugepaged: > > 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared] > 2) requirement that some pages in region being collapsed be young or > referenced > > This flag is set in khugepaged collapse context to preserve existing > khugepaged behavior. > > This flag will be used (unset) when introducing madvise collapse > context since here, the user presumably has reason to believe the > collapse will be beneficial and khugepaged heuristics shouldn't tell > the user they are wrong. > > Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx> Reviewed-by: Yang Shi <shy828301@xxxxxxxxx> > --- > mm/khugepaged.c | 55 +++++++++++++++++++++++++++++++++---------------- > 1 file changed, 37 insertions(+), 18 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 03e0da0008f1..c3589b3e238d 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -87,6 +87,13 @@ static struct kmem_cache *mm_slot_cache __read_mostly; > #define MAX_PTE_MAPPED_THP 8 > > struct collapse_control { > + /* > + * Heuristics: > + * - khugepaged_max_ptes_[none|swap|shared] > + * - require memory to be young / referenced > + */ > + bool enforce_page_heuristics; > + > /* Num pages scanned per node */ > int node_load[MAX_NUMNODES]; > > @@ -604,6 +611,7 @@ static bool is_refcount_suitable(struct page *page) > static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > unsigned long address, > pte_t *pte, > + struct collapse_control *cc, > struct list_head *compound_pagelist) > { > struct page *page = NULL; > @@ -617,7 +625,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > if (pte_none(pteval) || (pte_present(pteval) && > is_zero_pfn(pte_pfn(pteval)))) { > if (!userfaultfd_armed(vma) && > - ++none_or_zero <= khugepaged_max_ptes_none) { > + (++none_or_zero <= khugepaged_max_ptes_none || > + !cc->enforce_page_heuristics)) { > continue; > } else { > result = SCAN_EXCEED_NONE_PTE; > @@ -637,8 +646,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > VM_BUG_ON_PAGE(!PageAnon(page), page); > > - if (page_mapcount(page) > 1 && > - ++shared > khugepaged_max_ptes_shared) { > + if (cc->enforce_page_heuristics && page_mapcount(page) > 1 && > + ++shared > khugepaged_max_ptes_shared) { > result = SCAN_EXCEED_SHARED_PTE; > count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); > goto out; > @@ -705,9 +714,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > list_add_tail(&page->lru, compound_pagelist); > next: > /* There should be enough young pte to collapse the page */ > - if (pte_young(pteval) || > - page_is_young(page) || PageReferenced(page) || > - mmu_notifier_test_young(vma->vm_mm, address)) > + if (cc->enforce_page_heuristics && > + (pte_young(pteval) || page_is_young(page) || > + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, > + address))) > referenced++; > > if (pte_write(pteval)) > @@ -716,7 +726,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > if (unlikely(!writable)) { > result = SCAN_PAGE_RO; > - } else if (unlikely(!referenced)) { > + } else if (unlikely(cc->enforce_page_heuristics && !referenced)) { > result = SCAN_LACK_REFERENCED_PAGE; > } else { > result = SCAN_SUCCEED; > @@ -1096,7 +1106,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, > mmu_notifier_invalidate_range_end(&range); > > spin_lock(pte_ptl); > - result = __collapse_huge_page_isolate(vma, address, pte, > + result = __collapse_huge_page_isolate(vma, address, pte, cc, > &compound_pagelist); > spin_unlock(pte_ptl); > > @@ -1185,7 +1195,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > _pte++, _address += PAGE_SIZE) { > pte_t pteval = *_pte; > if (is_swap_pte(pteval)) { > - if (++unmapped <= khugepaged_max_ptes_swap) { > + if (++unmapped <= khugepaged_max_ptes_swap || > + !cc->enforce_page_heuristics) { > /* > * Always be strict with uffd-wp > * enabled swap entries. Please see > @@ -1204,7 +1215,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > } > if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { > if (!userfaultfd_armed(vma) && > - ++none_or_zero <= khugepaged_max_ptes_none) { > + (++none_or_zero <= khugepaged_max_ptes_none || > + !cc->enforce_page_heuristics)) { > continue; > } else { > result = SCAN_EXCEED_NONE_PTE; > @@ -1234,8 +1246,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > goto out_unmap; > } > > - if (page_mapcount(page) > 1 && > - ++shared > khugepaged_max_ptes_shared) { > + if (cc->enforce_page_heuristics && > + page_mapcount(page) > 1 && > + ++shared > khugepaged_max_ptes_shared) { > result = SCAN_EXCEED_SHARED_PTE; > count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); > goto out_unmap; > @@ -1289,14 +1302,17 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > result = SCAN_PAGE_COUNT; > goto out_unmap; > } > - if (pte_young(pteval) || > - page_is_young(page) || PageReferenced(page) || > - mmu_notifier_test_young(vma->vm_mm, address)) > + if (cc->enforce_page_heuristics && > + (pte_young(pteval) || page_is_young(page) || > + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, > + address))) > referenced++; > } > if (!writable) { > result = SCAN_PAGE_RO; > - } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) { > + } else if (cc->enforce_page_heuristics && > + (!referenced || > + (unmapped && referenced < HPAGE_PMD_NR / 2))) { > result = SCAN_LACK_REFERENCED_PAGE; > } else { > result = SCAN_SUCCEED; > @@ -1966,7 +1982,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, > continue; > > if (xa_is_value(page)) { > - if (++swap > khugepaged_max_ptes_swap) { > + if (cc->enforce_page_heuristics && > + ++swap > khugepaged_max_ptes_swap) { > result = SCAN_EXCEED_SWAP_PTE; > count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); > break; > @@ -2017,7 +2034,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, > rcu_read_unlock(); > > if (result == SCAN_SUCCEED) { > - if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { > + if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none && > + cc->enforce_page_heuristics) { > result = SCAN_EXCEED_NONE_PTE; > count_vm_event(THP_SCAN_EXCEED_NONE_PTE); > } else { > @@ -2258,6 +2276,7 @@ static int khugepaged(void *none) > { > struct mm_slot *mm_slot; > struct collapse_control cc = { > + .enforce_page_heuristics = true, > .last_target_node = NUMA_NO_NODE, > /* .gfp set later */ > }; > -- > 2.36.1.255.ge46751e96f-goog >