Re: [mm-unstable v7 06/18] mm/khugepaged: add flag to predicate khugepaged-only behavior

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote:
>
> Add .is_khugepaged flag to struct collapse_control so
> khugepaged-specific behavior can be elided by MADV_COLLAPSE context.
>
> Start by protecting khugepaged-specific heuristics by this flag. In
> MADV_COLLAPSE, the user presumably has reason to believe the collapse
> will be beneficial and khugepaged heuristics shouldn't prevent the user
> from doing so:
>
> 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared]
>
> 2) requirement that some pages in region being collapsed be young or
>    referenced
>
> Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>
> ---
>
> v6 -> v7: There is no functional change here from v6, just a renaming of
>           flags to explicitly be predicated on khugepaged.

Reviewed-by: Yang Shi <shy828301@xxxxxxxxx>

Just a nit, some conditions check is_khugepaged first, some don't. Why
not make them more consistent to check is_khugepaged first?

> ---
>  mm/khugepaged.c | 62 ++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 43 insertions(+), 19 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 147f5828f052..d89056d8cbad 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -73,6 +73,8 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
>   * default collapse hugepages if there is at least one pte mapped like
>   * it would have happened if the vma was large enough during page
>   * fault.
> + *
> + * Note that these are only respected if collapse was initiated by khugepaged.
>   */
>  static unsigned int khugepaged_max_ptes_none __read_mostly;
>  static unsigned int khugepaged_max_ptes_swap __read_mostly;
> @@ -86,6 +88,8 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
>  #define MAX_PTE_MAPPED_THP 8
>
>  struct collapse_control {
> +       bool is_khugepaged;
> +
>         /* Num pages scanned per node */
>         int node_load[MAX_NUMNODES];
>
> @@ -554,6 +558,7 @@ static bool is_refcount_suitable(struct page *page)
>  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                                         unsigned long address,
>                                         pte_t *pte,
> +                                       struct collapse_control *cc,
>                                         struct list_head *compound_pagelist)
>  {
>         struct page *page = NULL;
> @@ -567,7 +572,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                 if (pte_none(pteval) || (pte_present(pteval) &&
>                                 is_zero_pfn(pte_pfn(pteval)))) {
>                         if (!userfaultfd_armed(vma) &&
> -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> +                            !cc->is_khugepaged)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -587,8 +593,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>
>                 VM_BUG_ON_PAGE(!PageAnon(page), page);
>
> -               if (page_mapcount(page) > 1 &&
> -                               ++shared > khugepaged_max_ptes_shared) {
> +               if (cc->is_khugepaged && page_mapcount(page) > 1 &&
> +                   ++shared > khugepaged_max_ptes_shared) {
>                         result = SCAN_EXCEED_SHARED_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
>                         goto out;
> @@ -654,10 +660,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                 if (PageCompound(page))
>                         list_add_tail(&page->lru, compound_pagelist);
>  next:
> -               /* There should be enough young pte to collapse the page */
> -               if (pte_young(pteval) ||
> -                   page_is_young(page) || PageReferenced(page) ||
> -                   mmu_notifier_test_young(vma->vm_mm, address))
> +               /*
> +                * If collapse was initiated by khugepaged, check that there is
> +                * enough young pte to justify collapsing the page
> +                */
> +               if (cc->is_khugepaged &&
> +                   (pte_young(pteval) || page_is_young(page) ||
> +                    PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
> +                                                                    address)))
>                         referenced++;
>
>                 if (pte_write(pteval))
> @@ -666,7 +676,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>
>         if (unlikely(!writable)) {
>                 result = SCAN_PAGE_RO;
> -       } else if (unlikely(!referenced)) {
> +       } else if (unlikely(cc->is_khugepaged && !referenced)) {
>                 result = SCAN_LACK_REFERENCED_PAGE;
>         } else {
>                 result = SCAN_SUCCEED;
> @@ -745,6 +755,7 @@ static void khugepaged_alloc_sleep(void)
>
>
>  struct collapse_control khugepaged_collapse_control = {
> +       .is_khugepaged = true,
>         .last_target_node = NUMA_NO_NODE,
>  };
>
> @@ -1023,7 +1034,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>         mmu_notifier_invalidate_range_end(&range);
>
>         spin_lock(pte_ptl);
> -       result =  __collapse_huge_page_isolate(vma, address, pte,
> +       result =  __collapse_huge_page_isolate(vma, address, pte, cc,
>                                                &compound_pagelist);
>         spin_unlock(pte_ptl);
>
> @@ -1114,7 +1125,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>              _pte++, _address += PAGE_SIZE) {
>                 pte_t pteval = *_pte;
>                 if (is_swap_pte(pteval)) {
> -                       if (++unmapped <= khugepaged_max_ptes_swap) {
> +                       if (++unmapped <= khugepaged_max_ptes_swap ||
> +                           !cc->is_khugepaged) {
>                                 /*
>                                  * Always be strict with uffd-wp
>                                  * enabled swap entries.  Please see
> @@ -1133,7 +1145,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>                 }
>                 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
>                         if (!userfaultfd_armed(vma) &&
> -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> +                            !cc->is_khugepaged)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -1163,8 +1176,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>                         goto out_unmap;
>                 }
>
> -               if (page_mapcount(page) > 1 &&
> -                               ++shared > khugepaged_max_ptes_shared) {
> +               if (cc->is_khugepaged &&
> +                   page_mapcount(page) > 1 &&
> +                   ++shared > khugepaged_max_ptes_shared) {
>                         result = SCAN_EXCEED_SHARED_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
>                         goto out_unmap;
> @@ -1218,14 +1232,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>                         result = SCAN_PAGE_COUNT;
>                         goto out_unmap;
>                 }
> -               if (pte_young(pteval) ||
> -                   page_is_young(page) || PageReferenced(page) ||
> -                   mmu_notifier_test_young(vma->vm_mm, address))
> +
> +               /*
> +                * If collapse was initiated by khugepaged, check that there is
> +                * enough young pte to justify collapsing the page
> +                */
> +               if (cc->is_khugepaged &&
> +                   (pte_young(pteval) || page_is_young(page) ||
> +                    PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
> +                                                                    address)))
>                         referenced++;
>         }
>         if (!writable) {
>                 result = SCAN_PAGE_RO;
> -       } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
> +       } else if (cc->is_khugepaged &&
> +                  (!referenced ||
> +                   (unmapped && referenced < HPAGE_PMD_NR / 2))) {
>                 result = SCAN_LACK_REFERENCED_PAGE;
>         } else {
>                 result = SCAN_SUCCEED;
> @@ -1894,7 +1916,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>                         continue;
>
>                 if (xa_is_value(page)) {
> -                       if (++swap > khugepaged_max_ptes_swap) {
> +                       if (cc->is_khugepaged &&
> +                           ++swap > khugepaged_max_ptes_swap) {
>                                 result = SCAN_EXCEED_SWAP_PTE;
>                                 count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
>                                 break;
> @@ -1945,7 +1968,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>         rcu_read_unlock();
>
>         if (result == SCAN_SUCCEED) {
> -               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
> +               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
> +                   cc->is_khugepaged) {
>                         result = SCAN_EXCEED_NONE_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
>                 } else {
> --
> 2.37.0.rc0.161.g10f37bed90-goog
>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux