Re: [RFC PATCH 02/14] mm/khugepaged: add struct collapse_control

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Mar 8, 2022 at 1:34 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote:
>
> Modularize huge page collapse by introducing struct collapse_control.
> This structure serves to describe the properties of the requested
> collapse, as well as serve as a local scratch pad to use during the
> collapse itself.
>
> Later in the series when we introduce the madvise collapse context, we
> will want to be able to ignore khugepaged_max_ptes_[none|swap|shared]
> in said context, and so is included here as a property of the
> requested collapse.
>
> Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>
> ---
>  mm/khugepaged.c | 120 ++++++++++++++++++++++++++++++------------------
>  1 file changed, 76 insertions(+), 44 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index a4e5eaf3eb01..36fc0099c445 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -85,6 +85,24 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
>
>  #define MAX_PTE_MAPPED_THP 8
>
> +struct collapse_control {
> +       /* Respect khugepaged_max_ptes_[none|swap|shared] */
> +       bool enforce_pte_scan_limits;

I'm fine to have collapse_control struct, but it seems
enforce_pte_scan_limits is actually not used until a later patch. So
as patch #1, it'd better to have new functions or new variables in the
same patch with their users.

> +
> +       /* Num pages scanned per node */
> +       int node_load[MAX_NUMNODES];
> +
> +       /* Last target selected in khugepaged_find_target_node() for this scan */
> +       int last_target_node;
> +};
> +
> +static void collapse_control_init(struct collapse_control *cc,
> +                                 bool enforce_pte_scan_limits)
> +{
> +       cc->enforce_pte_scan_limits = enforce_pte_scan_limits;
> +       cc->last_target_node = NUMA_NO_NODE;
> +}
> +
>  /**
>   * struct mm_slot - hash lookup from mm to mm_slot
>   * @hash: hash collision list
> @@ -601,6 +619,7 @@ static bool is_refcount_suitable(struct page *page)
>  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                                         unsigned long address,
>                                         pte_t *pte,
> +                                       bool enforce_pte_scan_limits,
>                                         struct list_head *compound_pagelist)
>  {
>         struct page *page = NULL;
> @@ -614,7 +633,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                 if (pte_none(pteval) || (pte_present(pteval) &&
>                                 is_zero_pfn(pte_pfn(pteval)))) {
>                         if (!userfaultfd_armed(vma) &&
> -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> +                            !enforce_pte_scan_limits)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -634,8 +654,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>
>                 VM_BUG_ON_PAGE(!PageAnon(page), page);
>
> -               if (page_mapcount(page) > 1 &&
> -                               ++shared > khugepaged_max_ptes_shared) {
> +               if (page_mapcount(page) > 1 && enforce_pte_scan_limits &&
> +                   ++shared > khugepaged_max_ptes_shared) {
>                         result = SCAN_EXCEED_SHARED_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
>                         goto out;
> @@ -785,9 +805,7 @@ static void khugepaged_alloc_sleep(void)
>         remove_wait_queue(&khugepaged_wait, &wait);
>  }
>
> -static int khugepaged_node_load[MAX_NUMNODES];
> -
> -static bool khugepaged_scan_abort(int nid)
> +static bool khugepaged_scan_abort(int nid, struct collapse_control *cc)
>  {
>         int i;
>
> @@ -799,11 +817,11 @@ static bool khugepaged_scan_abort(int nid)
>                 return false;
>
>         /* If there is a count for this node already, it must be acceptable */
> -       if (khugepaged_node_load[nid])
> +       if (cc->node_load[nid])
>                 return false;
>
>         for (i = 0; i < MAX_NUMNODES; i++) {
> -               if (!khugepaged_node_load[i])
> +               if (!cc->node_load[i])
>                         continue;
>                 if (node_distance(nid, i) > node_reclaim_distance)
>                         return true;
> @@ -818,28 +836,28 @@ static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
>  }
>
>  #ifdef CONFIG_NUMA
> -static int khugepaged_find_target_node(void)
> +static int khugepaged_find_target_node(struct collapse_control *cc)
>  {
> -       static int last_khugepaged_target_node = NUMA_NO_NODE;
>         int nid, target_node = 0, max_value = 0;
>
>         /* find first node with max normal pages hit */
>         for (nid = 0; nid < MAX_NUMNODES; nid++)
> -               if (khugepaged_node_load[nid] > max_value) {
> -                       max_value = khugepaged_node_load[nid];
> +               if (cc->node_load[nid] > max_value) {
> +                       max_value = cc->node_load[nid];
>                         target_node = nid;
>                 }
>
>         /* do some balance if several nodes have the same hit record */
> -       if (target_node <= last_khugepaged_target_node)
> -               for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
> -                               nid++)
> -                       if (max_value == khugepaged_node_load[nid]) {
> +       if (target_node <= cc->last_target_node)
> +               for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES;
> +                    nid++) {
> +                       if (max_value == cc->node_load[nid]) {
>                                 target_node = nid;
>                                 break;
>                         }
> +               }
>
> -       last_khugepaged_target_node = target_node;
> +       cc->last_target_node = target_node;
>         return target_node;
>  }
>
> @@ -877,7 +895,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
>         return *hpage;
>  }
>  #else
> -static int khugepaged_find_target_node(void)
> +static int khugepaged_find_target_node(struct collapse_control *cc)
>  {
>         return 0;
>  }
> @@ -1043,7 +1061,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
>  static void collapse_huge_page(struct mm_struct *mm,
>                                    unsigned long address,
>                                    struct page **hpage,
> -                                  int node, int referenced, int unmapped)
> +                                  int node, int referenced, int unmapped,
> +                                  int enforce_pte_scan_limits)
>  {
>         LIST_HEAD(compound_pagelist);
>         pmd_t *pmd, _pmd;
> @@ -1141,7 +1160,7 @@ static void collapse_huge_page(struct mm_struct *mm,
>
>         spin_lock(pte_ptl);
>         isolated = __collapse_huge_page_isolate(vma, address, pte,
> -                       &compound_pagelist);
> +                       enforce_pte_scan_limits, &compound_pagelist);
>         spin_unlock(pte_ptl);
>
>         if (unlikely(!isolated)) {
> @@ -1206,7 +1225,8 @@ static void collapse_huge_page(struct mm_struct *mm,
>  static int khugepaged_scan_pmd(struct mm_struct *mm,
>                                struct vm_area_struct *vma,
>                                unsigned long address,
> -                              struct page **hpage)
> +                              struct page **hpage,
> +                              struct collapse_control *cc)
>  {
>         pmd_t *pmd;
>         pte_t *pte, *_pte;
> @@ -1226,13 +1246,14 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>                 goto out;
>         }
>
> -       memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
> +       memset(cc->node_load, 0, sizeof(cc->node_load));
>         pte = pte_offset_map_lock(mm, pmd, address, &ptl);
>         for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
>              _pte++, _address += PAGE_SIZE) {
>                 pte_t pteval = *_pte;
>                 if (is_swap_pte(pteval)) {
> -                       if (++unmapped <= khugepaged_max_ptes_swap) {
> +                       if (++unmapped <= khugepaged_max_ptes_swap ||
> +                           !cc->enforce_pte_scan_limits) {
>                                 /*
>                                  * Always be strict with uffd-wp
>                                  * enabled swap entries.  Please see
> @@ -1251,7 +1272,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>                 }
>                 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
>                         if (!userfaultfd_armed(vma) &&
> -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> +                            !cc->enforce_pte_scan_limits)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -1282,7 +1304,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>                 }
>
>                 if (page_mapcount(page) > 1 &&
> -                               ++shared > khugepaged_max_ptes_shared) {
> +                               ++shared > khugepaged_max_ptes_shared &&
> +                               cc->enforce_pte_scan_limits) {
>                         result = SCAN_EXCEED_SHARED_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
>                         goto out_unmap;
> @@ -1292,16 +1315,16 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>
>                 /*
>                  * Record which node the original page is from and save this
> -                * information to khugepaged_node_load[].
> +                * information to cc->node_load[].
>                  * Khugepaged will allocate hugepage from the node has the max
>                  * hit record.
>                  */
>                 node = page_to_nid(page);
> -               if (khugepaged_scan_abort(node)) {
> +               if (khugepaged_scan_abort(node, cc)) {
>                         result = SCAN_SCAN_ABORT;
>                         goto out_unmap;
>                 }
> -               khugepaged_node_load[node]++;
> +               cc->node_load[node]++;
>                 if (!PageLRU(page)) {
>                         result = SCAN_PAGE_LRU;
>                         goto out_unmap;
> @@ -1352,10 +1375,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>  out_unmap:
>         pte_unmap_unlock(pte, ptl);
>         if (ret) {
> -               node = khugepaged_find_target_node();
> +               node = khugepaged_find_target_node(cc);
>                 /* collapse_huge_page will return with the mmap_lock released */
>                 collapse_huge_page(mm, address, hpage, node,
> -                               referenced, unmapped);
> +                               referenced, unmapped,
> +                               cc->enforce_pte_scan_limits);
>         }
>  out:
>         trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
> @@ -1992,7 +2016,8 @@ static void collapse_file(struct mm_struct *mm,
>  }
>
>  static void khugepaged_scan_file(struct mm_struct *mm,
> -               struct file *file, pgoff_t start, struct page **hpage)
> +               struct file *file, pgoff_t start, struct page **hpage,
> +               struct collapse_control *cc)
>  {
>         struct page *page = NULL;
>         struct address_space *mapping = file->f_mapping;
> @@ -2003,14 +2028,15 @@ static void khugepaged_scan_file(struct mm_struct *mm,
>
>         present = 0;
>         swap = 0;
> -       memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
> +       memset(cc->node_load, 0, sizeof(cc->node_load));
>         rcu_read_lock();
>         xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
>                 if (xas_retry(&xas, page))
>                         continue;
>
>                 if (xa_is_value(page)) {
> -                       if (++swap > khugepaged_max_ptes_swap) {
> +                       if (cc->enforce_pte_scan_limits &&
> +                           ++swap > khugepaged_max_ptes_swap) {
>                                 result = SCAN_EXCEED_SWAP_PTE;
>                                 count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
>                                 break;
> @@ -2028,11 +2054,11 @@ static void khugepaged_scan_file(struct mm_struct *mm,
>                 }
>
>                 node = page_to_nid(page);
> -               if (khugepaged_scan_abort(node)) {
> +               if (khugepaged_scan_abort(node, cc)) {
>                         result = SCAN_SCAN_ABORT;
>                         break;
>                 }
> -               khugepaged_node_load[node]++;
> +               cc->node_load[node]++;
>
>                 if (!PageLRU(page)) {
>                         result = SCAN_PAGE_LRU;
> @@ -2061,11 +2087,12 @@ static void khugepaged_scan_file(struct mm_struct *mm,
>         rcu_read_unlock();
>
>         if (result == SCAN_SUCCEED) {
> -               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
> +               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
> +                   cc->enforce_pte_scan_limits) {
>                         result = SCAN_EXCEED_NONE_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
>                 } else {
> -                       node = khugepaged_find_target_node();
> +                       node = khugepaged_find_target_node(cc);
>                         collapse_file(mm, file, start, hpage, node);
>                 }
>         }
> @@ -2074,7 +2101,8 @@ static void khugepaged_scan_file(struct mm_struct *mm,
>  }
>  #else
>  static void khugepaged_scan_file(struct mm_struct *mm,
> -               struct file *file, pgoff_t start, struct page **hpage)
> +               struct file *file, pgoff_t start, struct page **hpage,
> +               struct collapse_control *cc)
>  {
>         BUILD_BUG();
>  }
> @@ -2085,7 +2113,8 @@ static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
>  #endif
>
>  static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> -                                           struct page **hpage)
> +                                           struct page **hpage,
> +                                           struct collapse_control *cc)
>         __releases(&khugepaged_mm_lock)
>         __acquires(&khugepaged_mm_lock)
>  {
> @@ -2161,12 +2190,12 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>
>                                 mmap_read_unlock(mm);
>                                 ret = 1;
> -                               khugepaged_scan_file(mm, file, pgoff, hpage);
> +                               khugepaged_scan_file(mm, file, pgoff, hpage, cc);
>                                 fput(file);
>                         } else {
>                                 ret = khugepaged_scan_pmd(mm, vma,
>                                                 khugepaged_scan.address,
> -                                               hpage);
> +                                               hpage, cc);
>                         }
>                         /* move to next address */
>                         khugepaged_scan.address += HPAGE_PMD_SIZE;
> @@ -2222,7 +2251,7 @@ static int khugepaged_wait_event(void)
>                 kthread_should_stop();
>  }
>
> -static void khugepaged_do_scan(void)
> +static void khugepaged_do_scan(struct collapse_control *cc)
>  {
>         struct page *hpage = NULL;
>         unsigned int progress = 0, pass_through_head = 0;
> @@ -2246,7 +2275,7 @@ static void khugepaged_do_scan(void)
>                 if (khugepaged_has_work() &&
>                     pass_through_head < 2)
>                         progress += khugepaged_scan_mm_slot(pages - progress,
> -                                                           &hpage);
> +                                                           &hpage, cc);
>                 else
>                         progress = pages;
>                 spin_unlock(&khugepaged_mm_lock);
> @@ -2285,12 +2314,15 @@ static void khugepaged_wait_work(void)
>  static int khugepaged(void *none)
>  {
>         struct mm_slot *mm_slot;
> +       struct collapse_control cc;
> +
> +       collapse_control_init(&cc, /* enforce_pte_scan_limits= */ 1);
>
>         set_freezable();
>         set_user_nice(current, MAX_NICE);
>
>         while (!kthread_should_stop()) {
> -               khugepaged_do_scan();
> +               khugepaged_do_scan(&cc);
>                 khugepaged_wait_work();
>         }
>
> --
> 2.35.1.616.g0bdcbb4464-goog
>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux