Re: [mm-unstable v7 06/18] mm/khugepaged: add flag to predicate khugepaged-only behavior

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Jul 11 13:43, Yang Shi wrote:
> On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote:
> >
> > Add .is_khugepaged flag to struct collapse_control so
> > khugepaged-specific behavior can be elided by MADV_COLLAPSE context.
> >
> > Start by protecting khugepaged-specific heuristics by this flag. In
> > MADV_COLLAPSE, the user presumably has reason to believe the collapse
> > will be beneficial and khugepaged heuristics shouldn't prevent the user
> > from doing so:
> >
> > 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared]
> >
> > 2) requirement that some pages in region being collapsed be young or
> >    referenced
> >
> > Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>
> > ---
> >
> > v6 -> v7: There is no functional change here from v6, just a renaming of
> >           flags to explicitly be predicated on khugepaged.
> 
> Reviewed-by: Yang Shi <shy828301@xxxxxxxxx>
> 
> Just a nit, some conditions check is_khugepaged first, some don't. Why
> not make them more consistent to check is_khugepaged first?
>

Again, thank you for taking the time to review. Agreed the inconsistency is
ugly, and have updated to check is_khugepaged consistently first. Thanks for the
suggestion.

Zach

> > ---
> >  mm/khugepaged.c | 62 ++++++++++++++++++++++++++++++++++---------------
> >  1 file changed, 43 insertions(+), 19 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 147f5828f052..d89056d8cbad 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -73,6 +73,8 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
> >   * default collapse hugepages if there is at least one pte mapped like
> >   * it would have happened if the vma was large enough during page
> >   * fault.
> > + *
> > + * Note that these are only respected if collapse was initiated by khugepaged.
> >   */
> >  static unsigned int khugepaged_max_ptes_none __read_mostly;
> >  static unsigned int khugepaged_max_ptes_swap __read_mostly;
> > @@ -86,6 +88,8 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
> >  #define MAX_PTE_MAPPED_THP 8
> >
> >  struct collapse_control {
> > +       bool is_khugepaged;
> > +
> >         /* Num pages scanned per node */
> >         int node_load[MAX_NUMNODES];
> >
> > @@ -554,6 +558,7 @@ static bool is_refcount_suitable(struct page *page)
> >  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                                         unsigned long address,
> >                                         pte_t *pte,
> > +                                       struct collapse_control *cc,
> >                                         struct list_head *compound_pagelist)
> >  {
> >         struct page *page = NULL;
> > @@ -567,7 +572,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                 if (pte_none(pteval) || (pte_present(pteval) &&
> >                                 is_zero_pfn(pte_pfn(pteval)))) {
> >                         if (!userfaultfd_armed(vma) &&
> > -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> > +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> > +                            !cc->is_khugepaged)) {
> >                                 continue;
> >                         } else {
> >                                 result = SCAN_EXCEED_NONE_PTE;
> > @@ -587,8 +593,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >
> >                 VM_BUG_ON_PAGE(!PageAnon(page), page);
> >
> > -               if (page_mapcount(page) > 1 &&
> > -                               ++shared > khugepaged_max_ptes_shared) {
> > +               if (cc->is_khugepaged && page_mapcount(page) > 1 &&
> > +                   ++shared > khugepaged_max_ptes_shared) {
> >                         result = SCAN_EXCEED_SHARED_PTE;
> >                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> >                         goto out;
> > @@ -654,10 +660,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                 if (PageCompound(page))
> >                         list_add_tail(&page->lru, compound_pagelist);
> >  next:
> > -               /* There should be enough young pte to collapse the page */
> > -               if (pte_young(pteval) ||
> > -                   page_is_young(page) || PageReferenced(page) ||
> > -                   mmu_notifier_test_young(vma->vm_mm, address))
> > +               /*
> > +                * If collapse was initiated by khugepaged, check that there is
> > +                * enough young pte to justify collapsing the page
> > +                */
> > +               if (cc->is_khugepaged &&
> > +                   (pte_young(pteval) || page_is_young(page) ||
> > +                    PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
> > +                                                                    address)))
> >                         referenced++;
> >
> >                 if (pte_write(pteval))
> > @@ -666,7 +676,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >
> >         if (unlikely(!writable)) {
> >                 result = SCAN_PAGE_RO;
> > -       } else if (unlikely(!referenced)) {
> > +       } else if (unlikely(cc->is_khugepaged && !referenced)) {
> >                 result = SCAN_LACK_REFERENCED_PAGE;
> >         } else {
> >                 result = SCAN_SUCCEED;
> > @@ -745,6 +755,7 @@ static void khugepaged_alloc_sleep(void)
> >
> >
> >  struct collapse_control khugepaged_collapse_control = {
> > +       .is_khugepaged = true,
> >         .last_target_node = NUMA_NO_NODE,
> >  };
> >
> > @@ -1023,7 +1034,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
> >         mmu_notifier_invalidate_range_end(&range);
> >
> >         spin_lock(pte_ptl);
> > -       result =  __collapse_huge_page_isolate(vma, address, pte,
> > +       result =  __collapse_huge_page_isolate(vma, address, pte, cc,
> >                                                &compound_pagelist);
> >         spin_unlock(pte_ptl);
> >
> > @@ -1114,7 +1125,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
> >              _pte++, _address += PAGE_SIZE) {
> >                 pte_t pteval = *_pte;
> >                 if (is_swap_pte(pteval)) {
> > -                       if (++unmapped <= khugepaged_max_ptes_swap) {
> > +                       if (++unmapped <= khugepaged_max_ptes_swap ||
> > +                           !cc->is_khugepaged) {
> >                                 /*
> >                                  * Always be strict with uffd-wp
> >                                  * enabled swap entries.  Please see
> > @@ -1133,7 +1145,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
> >                 }
> >                 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
> >                         if (!userfaultfd_armed(vma) &&
> > -                           ++none_or_zero <= khugepaged_max_ptes_none) {
> > +                           (++none_or_zero <= khugepaged_max_ptes_none ||
> > +                            !cc->is_khugepaged)) {
> >                                 continue;
> >                         } else {
> >                                 result = SCAN_EXCEED_NONE_PTE;
> > @@ -1163,8 +1176,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
> >                         goto out_unmap;
> >                 }
> >
> > -               if (page_mapcount(page) > 1 &&
> > -                               ++shared > khugepaged_max_ptes_shared) {
> > +               if (cc->is_khugepaged &&
> > +                   page_mapcount(page) > 1 &&
> > +                   ++shared > khugepaged_max_ptes_shared) {
> >                         result = SCAN_EXCEED_SHARED_PTE;
> >                         count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> >                         goto out_unmap;
> > @@ -1218,14 +1232,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
> >                         result = SCAN_PAGE_COUNT;
> >                         goto out_unmap;
> >                 }
> > -               if (pte_young(pteval) ||
> > -                   page_is_young(page) || PageReferenced(page) ||
> > -                   mmu_notifier_test_young(vma->vm_mm, address))
> > +
> > +               /*
> > +                * If collapse was initiated by khugepaged, check that there is
> > +                * enough young pte to justify collapsing the page
> > +                */
> > +               if (cc->is_khugepaged &&
> > +                   (pte_young(pteval) || page_is_young(page) ||
> > +                    PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
> > +                                                                    address)))
> >                         referenced++;
> >         }
> >         if (!writable) {
> >                 result = SCAN_PAGE_RO;
> > -       } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
> > +       } else if (cc->is_khugepaged &&
> > +                  (!referenced ||
> > +                   (unmapped && referenced < HPAGE_PMD_NR / 2))) {
> >                 result = SCAN_LACK_REFERENCED_PAGE;
> >         } else {
> >                 result = SCAN_SUCCEED;
> > @@ -1894,7 +1916,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> >                         continue;
> >
> >                 if (xa_is_value(page)) {
> > -                       if (++swap > khugepaged_max_ptes_swap) {
> > +                       if (cc->is_khugepaged &&
> > +                           ++swap > khugepaged_max_ptes_swap) {
> >                                 result = SCAN_EXCEED_SWAP_PTE;
> >                                 count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
> >                                 break;
> > @@ -1945,7 +1968,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> >         rcu_read_unlock();
> >
> >         if (result == SCAN_SUCCEED) {
> > -               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
> > +               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
> > +                   cc->is_khugepaged) {
> >                         result = SCAN_EXCEED_NONE_PTE;
> >                         count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
> >                 } else {
> > --
> > 2.37.0.rc0.161.g10f37bed90-goog
> >




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux