Re: [PATCH v6 07/15] mm/khugepaged: add flag to ignore khugepaged heuristics

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 3, 2022 at 5:40 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote:
>
> Add enforce_page_heuristics flag to struct collapse_control that allows
> context to ignore heuristics originally designed to guide khugepaged:
>
> 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared]
> 2) requirement that some pages in region being collapsed be young or
>    referenced
>
> This flag is set in khugepaged collapse context to preserve existing
> khugepaged behavior.
>
> This flag will be used (unset) when introducing madvise collapse
> context since here, the user presumably has reason to believe the
> collapse will be beneficial and khugepaged heuristics shouldn't tell
> the user they are wrong.
>
> Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>

Reviewed-by: Yang Shi <shy828301@xxxxxxxxx>

> ---
>  mm/khugepaged.c | 55 +++++++++++++++++++++++++++++++++----------------
>  1 file changed, 37 insertions(+), 18 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 03e0da0008f1..c3589b3e238d 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -87,6 +87,13 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
>  #define MAX_PTE_MAPPED_THP 8
>
>  struct collapse_control {
> +       /*
> +        * Heuristics:
> +        * - khugepaged_max_ptes_[none|swap|shared]
> +        * - require memory to be young / referenced
> +        */
> +       bool enforce_page_heuristics;
> +
>         /* Num pages scanned per node */
>         int node_load[MAX_NUMNODES];
>
> @@ -604,6 +611,7 @@ static bool is_refcount_suitable(struct page *page)
>  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                                         unsigned long address,
>                                         pte_t *pte,
> +                                       struct collapse_control *cc,
>                                         struct list_head *compound_pagelist)
>  {
>         struct page *page = NULL;
> @@ -617,7 +625,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                 if (pte_none(pteval) || (pte_present(pteval) &&
>                                 is_zero_pfn(pte_pfn(pteval)))) {
>                         if (!userfaultfd_armed(vma) &&
> -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> +                            !cc->enforce_page_heuristics)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -637,8 +646,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>
>                 VM_BUG_ON_PAGE(!PageAnon(page), page);
>
> -               if (page_mapcount(page) > 1 &&
> -                               ++shared > khugepaged_max_ptes_shared) {
> +               if (cc->enforce_page_heuristics && page_mapcount(page) > 1 &&
> +                   ++shared > khugepaged_max_ptes_shared) {
>                         result = SCAN_EXCEED_SHARED_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
>                         goto out;
> @@ -705,9 +714,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                         list_add_tail(&page->lru, compound_pagelist);
>  next:
>                 /* There should be enough young pte to collapse the page */
> -               if (pte_young(pteval) ||
> -                   page_is_young(page) || PageReferenced(page) ||
> -                   mmu_notifier_test_young(vma->vm_mm, address))
> +               if (cc->enforce_page_heuristics &&
> +                   (pte_young(pteval) || page_is_young(page) ||
> +                    PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
> +                                                                    address)))
>                         referenced++;
>
>                 if (pte_write(pteval))
> @@ -716,7 +726,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>
>         if (unlikely(!writable)) {
>                 result = SCAN_PAGE_RO;
> -       } else if (unlikely(!referenced)) {
> +       } else if (unlikely(cc->enforce_page_heuristics && !referenced)) {
>                 result = SCAN_LACK_REFERENCED_PAGE;
>         } else {
>                 result = SCAN_SUCCEED;
> @@ -1096,7 +1106,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>         mmu_notifier_invalidate_range_end(&range);
>
>         spin_lock(pte_ptl);
> -       result =  __collapse_huge_page_isolate(vma, address, pte,
> +       result =  __collapse_huge_page_isolate(vma, address, pte, cc,
>                                                &compound_pagelist);
>         spin_unlock(pte_ptl);
>
> @@ -1185,7 +1195,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>              _pte++, _address += PAGE_SIZE) {
>                 pte_t pteval = *_pte;
>                 if (is_swap_pte(pteval)) {
> -                       if (++unmapped <= khugepaged_max_ptes_swap) {
> +                       if (++unmapped <= khugepaged_max_ptes_swap ||
> +                           !cc->enforce_page_heuristics) {
>                                 /*
>                                  * Always be strict with uffd-wp
>                                  * enabled swap entries.  Please see
> @@ -1204,7 +1215,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>                 }
>                 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
>                         if (!userfaultfd_armed(vma) &&
> -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> +                            !cc->enforce_page_heuristics)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -1234,8 +1246,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>                         goto out_unmap;
>                 }
>
> -               if (page_mapcount(page) > 1 &&
> -                               ++shared > khugepaged_max_ptes_shared) {
> +               if (cc->enforce_page_heuristics &&
> +                   page_mapcount(page) > 1 &&
> +                   ++shared > khugepaged_max_ptes_shared) {
>                         result = SCAN_EXCEED_SHARED_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
>                         goto out_unmap;
> @@ -1289,14 +1302,17 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>                         result = SCAN_PAGE_COUNT;
>                         goto out_unmap;
>                 }
> -               if (pte_young(pteval) ||
> -                   page_is_young(page) || PageReferenced(page) ||
> -                   mmu_notifier_test_young(vma->vm_mm, address))
> +               if (cc->enforce_page_heuristics &&
> +                   (pte_young(pteval) || page_is_young(page) ||
> +                    PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
> +                                                                    address)))
>                         referenced++;
>         }
>         if (!writable) {
>                 result = SCAN_PAGE_RO;
> -       } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
> +       } else if (cc->enforce_page_heuristics &&
> +                  (!referenced ||
> +                   (unmapped && referenced < HPAGE_PMD_NR / 2))) {
>                 result = SCAN_LACK_REFERENCED_PAGE;
>         } else {
>                 result = SCAN_SUCCEED;
> @@ -1966,7 +1982,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>                         continue;
>
>                 if (xa_is_value(page)) {
> -                       if (++swap > khugepaged_max_ptes_swap) {
> +                       if (cc->enforce_page_heuristics &&
> +                           ++swap > khugepaged_max_ptes_swap) {
>                                 result = SCAN_EXCEED_SWAP_PTE;
>                                 count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
>                                 break;
> @@ -2017,7 +2034,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>         rcu_read_unlock();
>
>         if (result == SCAN_SUCCEED) {
> -               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
> +               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
> +                   cc->enforce_page_heuristics) {
>                         result = SCAN_EXCEED_NONE_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
>                 } else {
> @@ -2258,6 +2276,7 @@ static int khugepaged(void *none)
>  {
>         struct mm_slot *mm_slot;
>         struct collapse_control cc = {
> +               .enforce_page_heuristics = true,
>                 .last_target_node = NUMA_NO_NODE,
>                 /* .gfp set later  */
>         };
> --
> 2.36.1.255.ge46751e96f-goog
>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux