The quilt patch titled Subject: mm/madvise: clean up pte_offset_map_lock() scans has been removed from the -mm tree. Its filename was mm-madvise-clean-up-pte_offset_map_lock-scans.patch This patch was dropped because it was merged into the mm-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Hugh Dickins <hughd@xxxxxxxxxx> Subject: mm/madvise: clean up pte_offset_map_lock() scans Date: Thu, 8 Jun 2023 18:34:03 -0700 (PDT) Came here to make madvise's several pte_offset_map_lock() scans advance to next extent on failure, and remove superfluous pmd_trans_unstable() and pmd_none_or_trans_huge_or_clear_bad() calls. But also did some nearby cleanup. swapin_walk_pmd_entry(): don't name an address "index"; don't drop the lock after every pte, only when calling out to read_swap_cache_async(). madvise_cold_or_pageout_pte_range() and madvise_free_pte_range(): prefer "start_pte" for pointer, orig_pte usually denotes a saved pte value; leave lazy MMU mode before unlocking; merge the success and failure paths after split_folio(). Link: https://lkml.kernel.org/r/cc4d9a88-9da6-362-50d9-6735c2b125c6@xxxxxxxxxx Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Alistair Popple <apopple@xxxxxxxxxx> Cc: Anshuman Khandual <anshuman.khandual@xxxxxxx> Cc: Axel Rasmussen <axelrasmussen@xxxxxxxxxx> Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: "Huang, Ying" <ying.huang@xxxxxxxxx> Cc: Ira Weiny <ira.weiny@xxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxx> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Lorenzo Stoakes <lstoakes@xxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Mike Rapoport (IBM) <rppt@xxxxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> Cc: Ralph Campbell <rcampbell@xxxxxxxxxx> Cc: Ryan Roberts <ryan.roberts@xxxxxxx> Cc: SeongJae Park <sj@xxxxxxxxxx> Cc: Song Liu <song@xxxxxxxxxx> Cc: Steven Price <steven.price@xxxxxxx> Cc: Suren Baghdasaryan <surenb@xxxxxxxxxx> Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Yang Shi <shy828301@xxxxxxxxx> Cc: Yu Zhao <yuzhao@xxxxxxxxxx> Cc: Zack Rusin <zackr@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/madvise.c | 122 +++++++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 54 deletions(-) --- a/mm/madvise.c~mm-madvise-clean-up-pte_offset_map_lock-scans +++ a/mm/madvise.c @@ -188,37 +188,43 @@ success: #ifdef CONFIG_SWAP static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, - unsigned long end, struct mm_walk *walk) + unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->private; - unsigned long index; struct swap_iocb *splug = NULL; + pte_t *ptep = NULL; + spinlock_t *ptl; + unsigned long addr; - if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - return 0; - - for (index = start; index != end; index += PAGE_SIZE) { + for (addr = start; addr < end; addr += PAGE_SIZE) { pte_t pte; swp_entry_t entry; struct page *page; - spinlock_t *ptl; - pte_t *ptep; - ptep = pte_offset_map_lock(vma->vm_mm, pmd, index, &ptl); - pte = *ptep; - pte_unmap_unlock(ptep, ptl); + if (!ptep++) { + ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!ptep) + break; + } + pte = *ptep; if (!is_swap_pte(pte)) continue; entry = pte_to_swp_entry(pte); if (unlikely(non_swap_entry(entry))) continue; + pte_unmap_unlock(ptep, ptl); + ptep = NULL; + page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, - vma, index, false, &splug); + vma, addr, false, &splug); if (page) put_page(page); } + + if (ptep) + pte_unmap_unlock(ptep, ptl); swap_read_unplug(splug); cond_resched(); @@ -340,7 +346,7 @@ static int madvise_cold_or_pageout_pte_r bool pageout = private->pageout; struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; - pte_t *orig_pte, *pte, ptent; + pte_t *start_pte, *pte, ptent; spinlock_t *ptl; struct folio *folio = NULL; LIST_HEAD(folio_list); @@ -422,11 +428,11 @@ huge_unlock: } regular_folio: - if (pmd_trans_unstable(pmd)) - return 0; #endif tlb_change_page_size(tlb, PAGE_SIZE); - orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!start_pte) + return 0; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr < end; pte++, addr += PAGE_SIZE) { @@ -447,25 +453,28 @@ regular_folio: * are sure it's worth. Split it if we are only owner. */ if (folio_test_large(folio)) { + int err; + if (folio_mapcount(folio) != 1) break; if (pageout_anon_only_filter && !folio_test_anon(folio)) break; - folio_get(folio); - if (!folio_trylock(folio)) { - folio_put(folio); - break; - } - pte_unmap_unlock(orig_pte, ptl); - if (split_folio(folio)) { - folio_unlock(folio); - folio_put(folio); - orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!folio_trylock(folio)) break; - } + folio_get(folio); + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + start_pte = NULL; + err = split_folio(folio); folio_unlock(folio); folio_put(folio); - orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (err) + break; + start_pte = pte = + pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + break; + arch_enter_lazy_mmu_mode(); pte--; addr -= PAGE_SIZE; continue; @@ -510,8 +519,10 @@ regular_folio: folio_deactivate(folio); } - arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(orig_pte, ptl); + if (start_pte) { + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + } if (pageout) reclaim_pages(&folio_list); cond_resched(); @@ -612,7 +623,7 @@ static int madvise_free_pte_range(pmd_t struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; - pte_t *orig_pte, *pte, ptent; + pte_t *start_pte, *pte, ptent; struct folio *folio; int nr_swap = 0; unsigned long next; @@ -620,13 +631,12 @@ static int madvise_free_pte_range(pmd_t next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd)) if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) - goto next; - - if (pmd_trans_unstable(pmd)) - return 0; + return 0; tlb_change_page_size(tlb, PAGE_SIZE); - orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + return 0; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -664,23 +674,26 @@ static int madvise_free_pte_range(pmd_t * deactivate all pages. */ if (folio_test_large(folio)) { + int err; + if (folio_mapcount(folio) != 1) - goto out; + break; + if (!folio_trylock(folio)) + break; folio_get(folio); - if (!folio_trylock(folio)) { - folio_put(folio); - goto out; - } - pte_unmap_unlock(orig_pte, ptl); - if (split_folio(folio)) { - folio_unlock(folio); - folio_put(folio); - orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); - goto out; - } + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + start_pte = NULL; + err = split_folio(folio); folio_unlock(folio); folio_put(folio); - orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (err) + break; + start_pte = pte = + pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + break; + arch_enter_lazy_mmu_mode(); pte--; addr -= PAGE_SIZE; continue; @@ -725,17 +738,18 @@ static int madvise_free_pte_range(pmd_t } folio_mark_lazyfree(folio); } -out: + if (nr_swap) { if (current->mm == mm) sync_mm_rss(mm); - add_mm_counter(mm, MM_SWAPENTS, nr_swap); } - arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(orig_pte, ptl); + if (start_pte) { + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); + } cond_resched(); -next: + return 0; } _ Patches currently in -mm which might be from hughd@xxxxxxxxxx are