Re: [PATCH v6 06/15] mm/khugepaged: pipe enum scan_result codes back to callers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 3, 2022 at 5:40 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote:
>
> Pipe enum scan_result codes back through return values of functions
> downstream of khugepaged_scan_file() and khugepaged_scan_pmd() to
> inform callers if the operation was successful, and if not, why.
>
> Since khugepaged_scan_pmd()'s return value already has a specific
> meaning (whether mmap_lock was unlocked or not), add a bool* argument
> to khugepaged_scan_pmd() to retrieve this information.
>
> Change khugepaged to take action based on the return values of
> khugepaged_scan_file() and khugepaged_scan_pmd() instead of acting
> deep within the collapsing functions themselves.
>
> Remove dependency on error pointers to communicate to khugepaged that
> allocation failed and it should sleep; instead just use the result of
> the scan (SCAN_ALLOC_HUGE_PAGE_FAIL if allocation fails).
>
> Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>

Reviewed-by: Yang Shi <shy828301@xxxxxxxxx>

A couple of minor nits below...

> ---
>  mm/khugepaged.c | 192 ++++++++++++++++++++++++------------------------
>  1 file changed, 96 insertions(+), 96 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index ba722347bebd..03e0da0008f1 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -722,13 +722,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                 result = SCAN_SUCCEED;
>                 trace_mm_collapse_huge_page_isolate(page, none_or_zero,
>                                                     referenced, writable, result);
> -               return 1;
> +               return SCAN_SUCCEED;

You could do "return result" too.

>         }
>  out:
>         release_pte_pages(pte, _pte, compound_pagelist);
>         trace_mm_collapse_huge_page_isolate(page, none_or_zero,
>                                             referenced, writable, result);
> -       return 0;
> +       return result;
>  }
>
>  static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
> @@ -850,14 +850,13 @@ static int khugepaged_find_target_node(struct collapse_control *cc)
>  #endif
>
>  /* Sleep for the first alloc fail, break the loop for the second fail */
> -static bool alloc_fail_should_sleep(struct page **hpage, bool *wait)
> +static bool alloc_fail_should_sleep(int result, bool *wait)
>  {
> -       if (IS_ERR(*hpage)) {
> +       if (result == SCAN_ALLOC_HUGE_PAGE_FAIL) {
>                 if (!*wait)
>                         return true;
>
>                 *wait = false;
> -               *hpage = NULL;
>                 khugepaged_alloc_sleep();
>         }
>         return false;
> @@ -868,7 +867,6 @@ static bool khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
>         *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
>         if (unlikely(!*hpage)) {
>                 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
> -               *hpage = ERR_PTR(-ENOMEM);
>                 return false;
>         }
>
> @@ -1010,17 +1008,17 @@ static int alloc_charge_hpage(struct mm_struct *mm, struct page **hpage,
>         return SCAN_SUCCEED;
>  }
>
> -static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
> -                              struct page **hpage, int referenced,
> -                              int unmapped, struct collapse_control *cc)
> +static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
> +                             int referenced, int unmapped,
> +                             struct collapse_control *cc)
>  {
>         LIST_HEAD(compound_pagelist);
>         pmd_t *pmd, _pmd;
>         pte_t *pte;
>         pgtable_t pgtable;
> -       struct page *new_page;
> +       struct page *hpage;
>         spinlock_t *pmd_ptl, *pte_ptl;
> -       int isolated = 0, result = 0;
> +       int result = SCAN_FAIL;
>         struct vm_area_struct *vma;
>         struct mmu_notifier_range range;
>
> @@ -1034,12 +1032,10 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
>          */
>         mmap_read_unlock(mm);
>
> -       result = alloc_charge_hpage(mm, hpage, cc);
> +       result = alloc_charge_hpage(mm, &hpage, cc);
>         if (result != SCAN_SUCCEED)
>                 goto out_nolock;
>
> -       new_page = *hpage;
> -
>         mmap_read_lock(mm);
>         result = hugepage_vma_revalidate(mm, address, &vma);
>         if (result) {
> @@ -1100,11 +1096,11 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
>         mmu_notifier_invalidate_range_end(&range);
>
>         spin_lock(pte_ptl);
> -       isolated = __collapse_huge_page_isolate(vma, address, pte,
> -                       &compound_pagelist);
> +       result =  __collapse_huge_page_isolate(vma, address, pte,
> +                                              &compound_pagelist);
>         spin_unlock(pte_ptl);
>
> -       if (unlikely(!isolated)) {
> +       if (unlikely(result != SCAN_SUCCEED)) {
>                 pte_unmap(pte);
>                 spin_lock(pmd_ptl);
>                 BUG_ON(!pmd_none(*pmd));
> @@ -1116,7 +1112,6 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
>                 pmd_populate(mm, pmd, pmd_pgtable(_pmd));
>                 spin_unlock(pmd_ptl);
>                 anon_vma_unlock_write(vma->anon_vma);
> -               result = SCAN_FAIL;
>                 goto out_up_write;
>         }
>
> @@ -1126,8 +1121,8 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
>          */
>         anon_vma_unlock_write(vma->anon_vma);
>
> -       __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl,
> -                       &compound_pagelist);
> +       __collapse_huge_page_copy(pte, hpage, vma, address, pte_ptl,
> +                                 &compound_pagelist);
>         pte_unmap(pte);
>         /*
>          * spin_lock() below is not the equivalent of smp_wmb(), but
> @@ -1135,43 +1130,42 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
>          * avoid the copy_huge_page writes to become visible after
>          * the set_pmd_at() write.
>          */
> -       __SetPageUptodate(new_page);
> +       __SetPageUptodate(hpage);
>         pgtable = pmd_pgtable(_pmd);
>
> -       _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
> +       _pmd = mk_huge_pmd(hpage, vma->vm_page_prot);
>         _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
>
>         spin_lock(pmd_ptl);
>         BUG_ON(!pmd_none(*pmd));
> -       page_add_new_anon_rmap(new_page, vma, address);
> -       lru_cache_add_inactive_or_unevictable(new_page, vma);
> +       page_add_new_anon_rmap(hpage, vma, address);
> +       lru_cache_add_inactive_or_unevictable(hpage, vma);
>         pgtable_trans_huge_deposit(mm, pmd, pgtable);
>         set_pmd_at(mm, address, pmd, _pmd);
>         update_mmu_cache_pmd(vma, address, pmd);
>         spin_unlock(pmd_ptl);
>
> -       *hpage = NULL;
> +       hpage = NULL;
>
> -       khugepaged_pages_collapsed++;
>         result = SCAN_SUCCEED;
>  out_up_write:
>         mmap_write_unlock(mm);
>  out_nolock:
> -       if (!IS_ERR_OR_NULL(*hpage)) {
> -               mem_cgroup_uncharge(page_folio(*hpage));
> -               put_page(*hpage);
> +       if (hpage) {
> +               mem_cgroup_uncharge(page_folio(hpage));
> +               put_page(hpage);
>         }
> -       trace_mm_collapse_huge_page(mm, isolated, result);
> -       return;
> +       trace_mm_collapse_huge_page(mm, result == SCAN_SUCCEED, result);
> +       return result;
>  }
>
>  static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
> -                              unsigned long address, struct page **hpage,
> +                              unsigned long address, bool *mmap_locked,
>                                struct collapse_control *cc)
>  {
>         pmd_t *pmd;
>         pte_t *pte, *_pte;
> -       int ret = 0, result = 0, referenced = 0;
> +       int result = SCAN_FAIL, referenced = 0;
>         int none_or_zero = 0, shared = 0;
>         struct page *page = NULL;
>         unsigned long _address;
> @@ -1306,19 +1300,19 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
>                 result = SCAN_LACK_REFERENCED_PAGE;
>         } else {
>                 result = SCAN_SUCCEED;
> -               ret = 1;
>         }
>  out_unmap:
>         pte_unmap_unlock(pte, ptl);
> -       if (ret) {
> +       if (result == SCAN_SUCCEED) {
>                 /* collapse_huge_page will return with the mmap_lock released */
> -               collapse_huge_page(mm, address, hpage, referenced, unmapped,
> -                                  cc);
> +               *mmap_locked = false;
> +               result = collapse_huge_page(mm, address, referenced,
> +                                           unmapped, cc);

Shall move "*mmap_locked = false" after collapse_huge_page() call? No
functional change, but seems more consistent.

>         }
>  out:
>         trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
>                                      none_or_zero, result, unmapped);
> -       return ret;
> +       return result;
>  }
>
>  static void collect_mm_slot(struct mm_slot *mm_slot)
> @@ -1581,7 +1575,6 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
>   * @mm: process address space where collapse happens
>   * @file: file that collapse on
>   * @start: collapse start address
> - * @hpage: new allocated huge page for collapse
>   * @cc: collapse context and scratchpad
>   *
>   * Basic scheme is simple, details are more complex:
> @@ -1599,12 +1592,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
>   *    + restore gaps in the page cache;
>   *    + unlock and free huge page;
>   */
> -static void collapse_file(struct mm_struct *mm, struct file *file,
> -                         pgoff_t start, struct page **hpage,
> -                         struct collapse_control *cc)
> +static int collapse_file(struct mm_struct *mm, struct file *file,
> +                        pgoff_t start, struct collapse_control *cc)
>  {
>         struct address_space *mapping = file->f_mapping;
> -       struct page *new_page;
> +       struct page *hpage;
>         pgoff_t index, end = start + HPAGE_PMD_NR;
>         LIST_HEAD(pagelist);
>         XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
> @@ -1615,12 +1607,10 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>         VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
>         VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
>
> -       result = alloc_charge_hpage(mm, hpage, cc);
> +       result = alloc_charge_hpage(mm, &hpage, cc);
>         if (result != SCAN_SUCCEED)
>                 goto out;
>
> -       new_page = *hpage;
> -
>         /*
>          * Ensure we have slots for all the pages in the range.  This is
>          * almost certainly a no-op because most of the pages must be present
> @@ -1637,14 +1627,14 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>                 }
>         } while (1);
>
> -       __SetPageLocked(new_page);
> +       __SetPageLocked(hpage);
>         if (is_shmem)
> -               __SetPageSwapBacked(new_page);
> -       new_page->index = start;
> -       new_page->mapping = mapping;
> +               __SetPageSwapBacked(hpage);
> +       hpage->index = start;
> +       hpage->mapping = mapping;
>
>         /*
> -        * At this point the new_page is locked and not up-to-date.
> +        * At this point the hpage is locked and not up-to-date.
>          * It's safe to insert it into the page cache, because nobody would
>          * be able to map it or use it in another way until we unlock it.
>          */
> @@ -1672,7 +1662,7 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>                                         result = SCAN_FAIL;
>                                         goto xa_locked;
>                                 }
> -                               xas_store(&xas, new_page);
> +                               xas_store(&xas, hpage);
>                                 nr_none++;
>                                 continue;
>                         }
> @@ -1814,19 +1804,19 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>                 list_add_tail(&page->lru, &pagelist);
>
>                 /* Finally, replace with the new page. */
> -               xas_store(&xas, new_page);
> +               xas_store(&xas, hpage);
>                 continue;
>  out_unlock:
>                 unlock_page(page);
>                 put_page(page);
>                 goto xa_unlocked;
>         }
> -       nr = thp_nr_pages(new_page);
> +       nr = thp_nr_pages(hpage);
>
>         if (is_shmem)
> -               __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr);
> +               __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
>         else {
> -               __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr);
> +               __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
>                 filemap_nr_thps_inc(mapping);
>                 /*
>                  * Paired with smp_mb() in do_dentry_open() to ensure
> @@ -1837,21 +1827,21 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>                 smp_mb();
>                 if (inode_is_open_for_write(mapping->host)) {
>                         result = SCAN_FAIL;
> -                       __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr);
> +                       __mod_lruvec_page_state(hpage, NR_FILE_THPS, -nr);
>                         filemap_nr_thps_dec(mapping);
>                         goto xa_locked;
>                 }
>         }
>
>         if (nr_none) {
> -               __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none);
> +               __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
>                 if (is_shmem)
> -                       __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
> +                       __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
>         }
>
>         /* Join all the small entries into a single multi-index entry */
>         xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> -       xas_store(&xas, new_page);
> +       xas_store(&xas, hpage);
>  xa_locked:
>         xas_unlock_irq(&xas);
>  xa_unlocked:
> @@ -1873,11 +1863,11 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>                 index = start;
>                 list_for_each_entry_safe(page, tmp, &pagelist, lru) {
>                         while (index < page->index) {
> -                               clear_highpage(new_page + (index % HPAGE_PMD_NR));
> +                               clear_highpage(hpage + (index % HPAGE_PMD_NR));
>                                 index++;
>                         }
> -                       copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
> -                                       page);
> +                       copy_highpage(hpage + (page->index % HPAGE_PMD_NR),
> +                                     page);
>                         list_del(&page->lru);
>                         page->mapping = NULL;
>                         page_ref_unfreeze(page, 1);
> @@ -1888,23 +1878,23 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>                         index++;
>                 }
>                 while (index < end) {
> -                       clear_highpage(new_page + (index % HPAGE_PMD_NR));
> +                       clear_highpage(hpage + (index % HPAGE_PMD_NR));
>                         index++;
>                 }
>
> -               SetPageUptodate(new_page);
> -               page_ref_add(new_page, HPAGE_PMD_NR - 1);
> +               SetPageUptodate(hpage);
> +               page_ref_add(hpage, HPAGE_PMD_NR - 1);
>                 if (is_shmem)
> -                       set_page_dirty(new_page);
> -               lru_cache_add(new_page);
> +                       set_page_dirty(hpage);
> +               lru_cache_add(hpage);
>
>                 /*
>                  * Remove pte page tables, so we can re-fault the page as huge.
>                  */
>                 retract_page_tables(mapping, start);
> -               *hpage = NULL;
> -
> -               khugepaged_pages_collapsed++;
> +               unlock_page(hpage);
> +               hpage = NULL;
> +               goto out;

Maybe just set hpage to NULL here, then later...

>         } else {
>                 struct page *page;
>
> @@ -1943,22 +1933,22 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>                 VM_BUG_ON(nr_none);
>                 xas_unlock_irq(&xas);
>
> -               new_page->mapping = NULL;
> +               hpage->mapping = NULL;
>         }
>
> -       unlock_page(new_page);
> +       unlock_page(hpage);

do
if (hpage)
    unlock_page(hpage);

>  out:
>         VM_BUG_ON(!list_empty(&pagelist));
> -       if (!IS_ERR_OR_NULL(*hpage)) {
> -               mem_cgroup_uncharge(page_folio(*hpage));
> -               put_page(*hpage);
> +       if (hpage) {
> +               mem_cgroup_uncharge(page_folio(hpage));
> +               put_page(hpage);
>         }
>         /* TODO: tracepoints */
> +       return result;
>  }
>
> -static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> -                                pgoff_t start, struct page **hpage,
> -                                struct collapse_control *cc)
> +static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> +                               pgoff_t start, struct collapse_control *cc)
>  {
>         struct page *page = NULL;
>         struct address_space *mapping = file->f_mapping;
> @@ -2031,15 +2021,16 @@ static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>                         result = SCAN_EXCEED_NONE_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
>                 } else {
> -                       collapse_file(mm, file, start, hpage, cc);
> +                       result = collapse_file(mm, file, start, cc);
>                 }
>         }
>
>         /* TODO: tracepoints */
> +       return result;
>  }
>  #else
> -static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> -                                pgoff_t start, struct collapse_control *cc)
> +static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, pgoff_t start,
> +                               struct collapse_control *cc)
>  {
>         BUILD_BUG();
>  }
> @@ -2049,8 +2040,7 @@ static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
>  }
>  #endif
>
> -static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> -                                           struct page **hpage,
> +static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
>                                             struct collapse_control *cc)
>         __releases(&khugepaged_mm_lock)
>         __acquires(&khugepaged_mm_lock)
> @@ -2064,6 +2054,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>
>         VM_BUG_ON(!pages);
>         lockdep_assert_held(&khugepaged_mm_lock);
> +       *result = SCAN_FAIL;
>
>         if (khugepaged_scan.mm_slot)
>                 mm_slot = khugepaged_scan.mm_slot;
> @@ -2117,7 +2108,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>                         goto skip;
>
>                 while (khugepaged_scan.address < hend) {
> -                       int ret;
> +                       bool mmap_locked = true;
> +
>                         cond_resched();
>                         if (unlikely(khugepaged_test_exit(mm)))
>                                 goto breakouterloop;
> @@ -2134,20 +2126,28 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>                                                 khugepaged_scan.address);
>
>                                 mmap_read_unlock(mm);
> -                               ret = 1;
> -                               khugepaged_scan_file(mm, file, pgoff, hpage,
> -                                                    cc);
> +                               mmap_locked = false;
> +                               *result = khugepaged_scan_file(mm, file, pgoff,
> +                                                              cc);
>                                 fput(file);
>                         } else {
> -                               ret = khugepaged_scan_pmd(mm, vma,
> -                                               khugepaged_scan.address,
> -                                               hpage, cc);
> +                               *result = khugepaged_scan_pmd(mm, vma,
> +                                                             khugepaged_scan.address,
> +                                                             &mmap_locked, cc);
>                         }
> +                       if (*result == SCAN_SUCCEED)
> +                               ++khugepaged_pages_collapsed;
>                         /* move to next address */
>                         khugepaged_scan.address += HPAGE_PMD_SIZE;
>                         progress += HPAGE_PMD_NR;
> -                       if (ret)
> -                               /* we released mmap_lock so break loop */
> +                       if (!mmap_locked)
> +                               /*
> +                                * We released mmap_lock so break loop.  Note
> +                                * that we drop mmap_lock before all hugepage
> +                                * allocations, so if allocation fails, we are
> +                                * guaranteed to break here and report the
> +                                * correct result back to caller.
> +                                */
>                                 goto breakouterloop_mmap_lock;
>                         if (progress >= pages)
>                                 goto breakouterloop;
> @@ -2199,15 +2199,15 @@ static int khugepaged_wait_event(void)
>
>  static void khugepaged_do_scan(struct collapse_control *cc)
>  {
> -       struct page *hpage = NULL;
>         unsigned int progress = 0, pass_through_head = 0;
>         unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
>         bool wait = true;
> +       int result = SCAN_SUCCEED;
>
>         lru_add_drain_all();
>
>         while (progress < pages) {
> -               if (alloc_fail_should_sleep(&hpage, &wait))
> +               if (alloc_fail_should_sleep(result, &wait))
>                         break;
>
>                 cond_resched();
> @@ -2221,7 +2221,7 @@ static void khugepaged_do_scan(struct collapse_control *cc)
>                 if (khugepaged_has_work() &&
>                     pass_through_head < 2)
>                         progress += khugepaged_scan_mm_slot(pages - progress,
> -                                                           &hpage, cc);
> +                                                           &result, cc);
>                 else
>                         progress = pages;
>                 spin_unlock(&khugepaged_mm_lock);
> --
> 2.36.1.255.ge46751e96f-goog
>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux