Add struct collapse_result which aggregates data from a single khugepaged_scan_pmd() or khugapaged_scan_file() request. Change khugepaged to take action based on this returned data instead of deep within the collapsing functions themselves. Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx> --- mm/khugepaged.c | 187 ++++++++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 86 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 21c8436fa73c..e330a95a0479 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -98,6 +98,14 @@ struct collapse_control { int node); }; +/* Gather information from one khugepaged_scan_[pmd|file]() request */ +struct collapse_result { + enum scan_result result; + + /* Was mmap_lock dropped during request? */ + bool dropped_mmap_lock; +}; + /** * struct mm_slot - hash lookup from mm to mm_slot * @hash: hash collision list @@ -742,13 +750,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, result = SCAN_SUCCEED; trace_mm_collapse_huge_page_isolate(page, none_or_zero, referenced, writable, result); - return 1; + return SCAN_SUCCEED; } out: release_pte_pages(pte, _pte, compound_pagelist); trace_mm_collapse_huge_page_isolate(page, none_or_zero, referenced, writable, result); - return 0; + return result; } static void __collapse_huge_page_copy(pte_t *pte, struct page *page, @@ -1086,7 +1094,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, static void collapse_huge_page(struct mm_struct *mm, unsigned long address, struct collapse_control *cc, int referenced, - int unmapped) + int unmapped, struct collapse_result *cr) { LIST_HEAD(compound_pagelist); pmd_t *pmd, _pmd; @@ -1094,7 +1102,6 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, pgtable_t pgtable; struct page *new_page; spinlock_t *pmd_ptl, *pte_ptl; - int isolated = 0, result = 0; struct vm_area_struct *vma; struct mmu_notifier_range range; gfp_t gfp; @@ -1102,6 +1109,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, int node; VM_BUG_ON(address & ~HPAGE_PMD_MASK); + cr->result = SCAN_FAIL; /* Only allocate from the target node */ gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE; @@ -1113,6 +1121,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, * that. We will recheck the vma after taking it again in write mode. */ mmap_read_unlock(mm); + cr->dropped_mmap_lock = true; node = khugepaged_find_target_node(cc); /* sched to specified node before huage page memory copy */ @@ -1123,26 +1132,26 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, } new_page = cc->alloc_hpage(cc, gfp, node); if (!new_page) { - result = SCAN_ALLOC_HUGE_PAGE_FAIL; + cr->result = SCAN_ALLOC_HUGE_PAGE_FAIL; goto out_nolock; } if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { - result = SCAN_CGROUP_CHARGE_FAIL; + cr->result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC); mmap_read_lock(mm); - result = hugepage_vma_revalidate(mm, address, &vma); - if (result) { + cr->result = hugepage_vma_revalidate(mm, address, &vma); + if (cr->result) { mmap_read_unlock(mm); goto out_nolock; } pmd = mm_find_pmd(mm, address); if (!pmd) { - result = SCAN_PMD_NULL; + cr->result = SCAN_PMD_NULL; mmap_read_unlock(mm); goto out_nolock; } @@ -1165,8 +1174,8 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, * handled by the anon_vma lock + PG_lock. */ mmap_write_lock(mm); - result = hugepage_vma_revalidate(mm, address, &vma); - if (result) + cr->result = hugepage_vma_revalidate(mm, address, &vma); + if (cr->result) goto out_up_write; /* check if the pmd is still valid */ if (mm_find_pmd(mm, address) != pmd) @@ -1193,11 +1202,11 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, mmu_notifier_invalidate_range_end(&range); spin_lock(pte_ptl); - isolated = __collapse_huge_page_isolate(vma, address, pte, - &compound_pagelist); + cr->result = __collapse_huge_page_isolate(vma, address, pte, + &compound_pagelist); spin_unlock(pte_ptl); - if (unlikely(!isolated)) { + if (unlikely(cr->result != SCAN_SUCCEED)) { pte_unmap(pte); spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); @@ -1209,7 +1218,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, pmd_populate(mm, pmd, pmd_pgtable(_pmd)); spin_unlock(pmd_ptl); anon_vma_unlock_write(vma->anon_vma); - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto out_up_write; } @@ -1245,25 +1254,25 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address, cc->hpage = NULL; - khugepaged_pages_collapsed++; - result = SCAN_SUCCEED; + cr->result = SCAN_SUCCEED; out_up_write: mmap_write_unlock(mm); out_nolock: if (!IS_ERR_OR_NULL(cc->hpage)) mem_cgroup_uncharge(page_folio(cc->hpage)); - trace_mm_collapse_huge_page(mm, isolated, result); + trace_mm_collapse_huge_page(mm, cr->result == SCAN_SUCCEED, cr->result); return; } -static int khugepaged_scan_pmd(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long address, - struct collapse_control *cc) +static void khugepaged_scan_pmd(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long address, + struct collapse_control *cc, + struct collapse_result *cr) { pmd_t *pmd; pte_t *pte, *_pte; - int ret = 0, result = 0, referenced = 0; + int referenced = 0; int none_or_zero = 0, shared = 0; struct page *page = NULL; unsigned long _address; @@ -1272,9 +1281,10 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, bool writable = false; VM_BUG_ON(address & ~HPAGE_PMD_MASK); + cr->result = SCAN_FAIL; - result = find_pmd_or_thp_or_none(mm, address, &pmd); - if (result != SCAN_SUCCEED) + cr->result = find_pmd_or_thp_or_none(mm, address, &pmd); + if (cr->result != SCAN_SUCCEED) goto out; memset(cc->node_load, 0, sizeof(cc->node_load)); @@ -1290,12 +1300,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, * comment below for pte_uffd_wp(). */ if (pte_swp_uffd_wp(pteval)) { - result = SCAN_PTE_UFFD_WP; + cr->result = SCAN_PTE_UFFD_WP; goto out_unmap; } continue; } else { - result = SCAN_EXCEED_SWAP_PTE; + cr->result = SCAN_EXCEED_SWAP_PTE; count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); goto out_unmap; } @@ -1305,7 +1315,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, ++none_or_zero <= khugepaged_max_ptes_none) { continue; } else { - result = SCAN_EXCEED_NONE_PTE; + cr->result = SCAN_EXCEED_NONE_PTE; count_vm_event(THP_SCAN_EXCEED_NONE_PTE); goto out_unmap; } @@ -1320,7 +1330,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, * userfault messages that falls outside of * the registered range. So, just be simple. */ - result = SCAN_PTE_UFFD_WP; + cr->result = SCAN_PTE_UFFD_WP; goto out_unmap; } if (pte_write(pteval)) @@ -1328,13 +1338,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, page = vm_normal_page(vma, _address, pteval); if (unlikely(!page)) { - result = SCAN_PAGE_NULL; + cr->result = SCAN_PAGE_NULL; goto out_unmap; } if (page_mapcount(page) > 1 && ++shared > khugepaged_max_ptes_shared) { - result = SCAN_EXCEED_SHARED_PTE; + cr->result = SCAN_EXCEED_SHARED_PTE; count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); goto out_unmap; } @@ -1349,20 +1359,20 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, */ node = page_to_nid(page); if (khugepaged_scan_abort(node, cc)) { - result = SCAN_SCAN_ABORT; + cr->result = SCAN_SCAN_ABORT; goto out_unmap; } cc->node_load[node]++; if (!PageLRU(page)) { - result = SCAN_PAGE_LRU; + cr->result = SCAN_PAGE_LRU; goto out_unmap; } if (PageLocked(page)) { - result = SCAN_PAGE_LOCK; + cr->result = SCAN_PAGE_LOCK; goto out_unmap; } if (!PageAnon(page)) { - result = SCAN_PAGE_ANON; + cr->result = SCAN_PAGE_ANON; goto out_unmap; } @@ -1384,7 +1394,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, * will be done again later the risk seems low. */ if (!is_refcount_suitable(page)) { - result = SCAN_PAGE_COUNT; + cr->result = SCAN_PAGE_COUNT; goto out_unmap; } if (pte_young(pteval) || @@ -1393,23 +1403,20 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, referenced++; } if (!writable) { - result = SCAN_PAGE_RO; + cr->result = SCAN_PAGE_RO; } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) { - result = SCAN_LACK_REFERENCED_PAGE; + cr->result = SCAN_LACK_REFERENCED_PAGE; } else { - result = SCAN_SUCCEED; - ret = 1; + cr->result = SCAN_SUCCEED; } out_unmap: pte_unmap_unlock(pte, ptl); - if (ret) { + if (cr->result == SCAN_SUCCEED) /* collapse_huge_page will return with the mmap_lock released */ - collapse_huge_page(mm, address, cc, referenced, unmapped); - } + collapse_huge_page(mm, address, cc, referenced, unmapped, cr); out: trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, - none_or_zero, result, unmapped); - return ret; + none_or_zero, cr->result, unmapped); } static void collect_mm_slot(struct mm_slot *mm_slot) @@ -1670,6 +1677,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * @file: file that collapse on * @start: collapse start address * @cc: collapse context and scratchpad + * @cr: aggregate result information of collapse * * Basic scheme is simple, details are more complex: * - allocate and lock a new huge page; @@ -1688,7 +1696,9 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ static void collapse_file(struct mm_struct *mm, struct file *file, pgoff_t start, - struct collapse_control *cc) + struct collapse_control *cc, + struct collapse_result *cr) + { struct address_space *mapping = file->f_mapping; gfp_t gfp; @@ -1696,25 +1706,27 @@ static void collapse_file(struct mm_struct *mm, pgoff_t index, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); - int nr_none = 0, result = SCAN_SUCCEED; + int nr_none = 0; bool is_shmem = shmem_file(file); int nr, node; VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); + cr->result = SCAN_SUCCEED; + /* Only allocate from the target node */ gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE; node = khugepaged_find_target_node(cc); new_page = cc->alloc_hpage(cc, gfp, node); if (!new_page) { - result = SCAN_ALLOC_HUGE_PAGE_FAIL; + cr->result = SCAN_ALLOC_HUGE_PAGE_FAIL; goto out; } if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { - result = SCAN_CGROUP_CHARGE_FAIL; + cr->result = SCAN_CGROUP_CHARGE_FAIL; goto out; } count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC); @@ -1730,7 +1742,7 @@ static void collapse_file(struct mm_struct *mm, break; xas_unlock_irq(&xas); if (!xas_nomem(&xas, GFP_KERNEL)) { - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto out; } } while (1); @@ -1761,13 +1773,13 @@ static void collapse_file(struct mm_struct *mm, */ if (index == start) { if (!xas_next_entry(&xas, end - 1)) { - result = SCAN_TRUNCATED; + cr->result = SCAN_TRUNCATED; goto xa_locked; } xas_set(&xas, index); } if (!shmem_charge(mapping->host, 1)) { - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto xa_locked; } xas_store(&xas, new_page); @@ -1780,14 +1792,14 @@ static void collapse_file(struct mm_struct *mm, /* swap in or instantiate fallocated page */ if (shmem_getpage(mapping->host, index, &page, SGP_NOALLOC)) { - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto xa_unlocked; } } else if (trylock_page(page)) { get_page(page); xas_unlock_irq(&xas); } else { - result = SCAN_PAGE_LOCK; + cr->result = SCAN_PAGE_LOCK; goto xa_locked; } } else { /* !is_shmem */ @@ -1800,7 +1812,7 @@ static void collapse_file(struct mm_struct *mm, lru_add_drain(); page = find_lock_page(mapping, index); if (unlikely(page == NULL)) { - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto xa_unlocked; } } else if (PageDirty(page)) { @@ -1819,17 +1831,17 @@ static void collapse_file(struct mm_struct *mm, */ xas_unlock_irq(&xas); filemap_flush(mapping); - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto xa_unlocked; } else if (PageWriteback(page)) { xas_unlock_irq(&xas); - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto xa_unlocked; } else if (trylock_page(page)) { get_page(page); xas_unlock_irq(&xas); } else { - result = SCAN_PAGE_LOCK; + cr->result = SCAN_PAGE_LOCK; goto xa_locked; } } @@ -1842,7 +1854,7 @@ static void collapse_file(struct mm_struct *mm, /* make sure the page is up to date */ if (unlikely(!PageUptodate(page))) { - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto out_unlock; } @@ -1851,12 +1863,12 @@ static void collapse_file(struct mm_struct *mm, * we locked the first page, then a THP might be there already. */ if (PageTransCompound(page)) { - result = SCAN_PAGE_COMPOUND; + cr->result = SCAN_PAGE_COMPOUND; goto out_unlock; } if (page_mapping(page) != mapping) { - result = SCAN_TRUNCATED; + cr->result = SCAN_TRUNCATED; goto out_unlock; } @@ -1867,18 +1879,18 @@ static void collapse_file(struct mm_struct *mm, * page is dirty because it hasn't been flushed * since first write. */ - result = SCAN_FAIL; + cr->result = SCAN_FAIL; goto out_unlock; } if (isolate_lru_page(page)) { - result = SCAN_DEL_PAGE_LRU; + cr->result = SCAN_DEL_PAGE_LRU; goto out_unlock; } if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) { - result = SCAN_PAGE_HAS_PRIVATE; + cr->result = SCAN_PAGE_HAS_PRIVATE; putback_lru_page(page); goto out_unlock; } @@ -1899,7 +1911,7 @@ static void collapse_file(struct mm_struct *mm, * - one from isolate_lru_page; */ if (!page_ref_freeze(page, 3)) { - result = SCAN_PAGE_COUNT; + cr->result = SCAN_PAGE_COUNT; xas_unlock_irq(&xas); putback_lru_page(page); goto out_unlock; @@ -1934,7 +1946,7 @@ static void collapse_file(struct mm_struct *mm, */ smp_mb(); if (inode_is_open_for_write(mapping->host)) { - result = SCAN_FAIL; + cr->result = SCAN_FAIL; __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr); filemap_nr_thps_dec(mapping); goto xa_locked; @@ -1961,7 +1973,7 @@ static void collapse_file(struct mm_struct *mm, */ try_to_unmap_flush(); - if (result == SCAN_SUCCEED) { + if (cr->result == SCAN_SUCCEED) { struct page *page, *tmp; /* @@ -2001,8 +2013,6 @@ static void collapse_file(struct mm_struct *mm, */ retract_page_tables(mapping, start); cc->hpage = NULL; - - khugepaged_pages_collapsed++; } else { struct page *page; @@ -2054,15 +2064,16 @@ static void collapse_file(struct mm_struct *mm, static void khugepaged_scan_file(struct mm_struct *mm, struct file *file, pgoff_t start, - struct collapse_control *cc) + struct collapse_control *cc, + struct collapse_result *cr) { struct page *page = NULL; struct address_space *mapping = file->f_mapping; XA_STATE(xas, &mapping->i_pages, start); int present, swap; int node = NUMA_NO_NODE; - int result = SCAN_SUCCEED; + cr->result = SCAN_SUCCEED; present = 0; swap = 0; memset(cc->node_load, 0, sizeof(cc->node_load)); @@ -2073,7 +2084,7 @@ static void khugepaged_scan_file(struct mm_struct *mm, if (xa_is_value(page)) { if (++swap > khugepaged_max_ptes_swap) { - result = SCAN_EXCEED_SWAP_PTE; + cr->result = SCAN_EXCEED_SWAP_PTE; count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); break; } @@ -2085,25 +2096,25 @@ static void khugepaged_scan_file(struct mm_struct *mm, * into a PMD sized page */ if (PageTransCompound(page)) { - result = SCAN_PAGE_COMPOUND; + cr->result = SCAN_PAGE_COMPOUND; break; } node = page_to_nid(page); if (khugepaged_scan_abort(node, cc)) { - result = SCAN_SCAN_ABORT; + cr->result = SCAN_SCAN_ABORT; break; } cc->node_load[node]++; if (!PageLRU(page)) { - result = SCAN_PAGE_LRU; + cr->result = SCAN_PAGE_LRU; break; } if (page_count(page) != 1 + page_mapcount(page) + page_has_private(page)) { - result = SCAN_PAGE_COUNT; + cr->result = SCAN_PAGE_COUNT; break; } @@ -2122,12 +2133,12 @@ static void khugepaged_scan_file(struct mm_struct *mm, } rcu_read_unlock(); - if (result == SCAN_SUCCEED) { + if (cr->result == SCAN_SUCCEED) { if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { - result = SCAN_EXCEED_NONE_PTE; + cr->result = SCAN_EXCEED_NONE_PTE; count_vm_event(THP_SCAN_EXCEED_NONE_PTE); } else { - collapse_file(mm, file, start, cc); + collapse_file(mm, file, start, cc, cr); } } @@ -2136,7 +2147,8 @@ static void khugepaged_scan_file(struct mm_struct *mm, #else static void khugepaged_scan_file(struct mm_struct *mm, struct file *file, pgoff_t start, - struct collapse_control *cc) + struct collapse_control *cc, + struct collapse_result *cr) { BUILD_BUG(); } @@ -2208,7 +2220,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, goto skip; while (khugepaged_scan.address < hend) { - int ret; + struct collapse_result cr = {0}; cond_resched(); if (unlikely(khugepaged_test_exit(mm))) goto breakouterloop; @@ -2222,17 +2234,20 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, khugepaged_scan.address); mmap_read_unlock(mm); - ret = 1; - khugepaged_scan_file(mm, file, pgoff, cc); + cr.dropped_mmap_lock = true; + khugepaged_scan_file(mm, file, pgoff, cc, &cr); fput(file); } else { - ret = khugepaged_scan_pmd(mm, vma, - khugepaged_scan.address, cc); + khugepaged_scan_pmd(mm, vma, + khugepaged_scan.address, + cc, &cr); } + if (cr.result == SCAN_SUCCEED) + ++khugepaged_pages_collapsed; /* move to next address */ khugepaged_scan.address += HPAGE_PMD_SIZE; progress += HPAGE_PMD_NR; - if (ret) + if (cr.dropped_mmap_lock) /* we released mmap_lock so break loop */ goto breakouterloop_mmap_lock; if (progress >= pages) -- 2.36.0.rc0.470.gd361397f0d-goog