The patch titled Subject: mm/pagewalkers: ACTION_AGAIN if pte_offset_map_lock() fails has been added to the -mm mm-unstable branch. Its filename is mm-pagewalkers-action_again-if-pte_offset_map_lock-fails.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-pagewalkers-action_again-if-pte_offset_map_lock-fails.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Hugh Dickins <hughd@xxxxxxxxxx> Subject: mm/pagewalkers: ACTION_AGAIN if pte_offset_map_lock() fails Date: Thu, 8 Jun 2023 18:17:26 -0700 (PDT) Simple walk_page_range() users should set ACTION_AGAIN to retry when pte_offset_map_lock() fails. No need to check pmd_trans_unstable(): that was precisely to avoid the possiblity of calling pte_offset_map() on a racily removed or inserted THP entry, but such cases are now safely handled inside it. Likewise there is no need to check pmd_none() or pmd_bad() before calling it. Link: https://lkml.kernel.org/r/c77d9d10-3aad-e3ce-4896-99e91c7947f3@xxxxxxxxxx Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx> Reviewed-by: SeongJae Park <sj@xxxxxxxxxx> for mm/damon part Cc: Alistair Popple <apopple@xxxxxxxxxx> Cc: Anshuman Khandual <anshuman.khandual@xxxxxxx> Cc: Axel Rasmussen <axelrasmussen@xxxxxxxxxx> Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: "Huang, Ying" <ying.huang@xxxxxxxxx> Cc: Ira Weiny <ira.weiny@xxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxx> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Lorenzo Stoakes <lstoakes@xxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Mike Rapoport (IBM) <rppt@xxxxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> Cc: Ralph Campbell <rcampbell@xxxxxxxxxx> Cc: Ryan Roberts <ryan.roberts@xxxxxxx> Cc: Song Liu <song@xxxxxxxxxx> Cc: Steven Price <steven.price@xxxxxxx> Cc: Suren Baghdasaryan <surenb@xxxxxxxxxx> Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Yang Shi <shy828301@xxxxxxxxx> Cc: Yu Zhao <yuzhao@xxxxxxxxxx> Cc: Zack Rusin <zackr@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/proc/task_mmu.c | 32 ++++++++++++++++---------------- mm/damon/vaddr.c | 12 ++++++++---- mm/mempolicy.c | 7 ++++--- mm/mincore.c | 9 ++++----- mm/mlock.c | 4 ++++ 5 files changed, 36 insertions(+), 28 deletions(-) --- a/fs/proc/task_mmu.c~mm-pagewalkers-action_again-if-pte_offset_map_lock-fails +++ a/fs/proc/task_mmu.c @@ -631,14 +631,11 @@ static int smaps_pte_range(pmd_t *pmd, u goto out; } - if (pmd_trans_unstable(pmd)) - goto out; - /* - * The mmap_lock held all the way back in m_start() is what - * keeps khugepaged out of here and from collapsing things - * in here. - */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!pte) { + walk->action = ACTION_AGAIN; + return 0; + } for (; addr != end; pte++, addr += PAGE_SIZE) smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); @@ -1191,10 +1188,11 @@ out: return 0; } - if (pmd_trans_unstable(pmd)) - return 0; - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!pte) { + walk->action = ACTION_AGAIN; + return 0; + } for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; @@ -1538,9 +1536,6 @@ static int pagemap_pmd_range(pmd_t *pmdp spin_unlock(ptl); return err; } - - if (pmd_trans_unstable(pmdp)) - return 0; #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* @@ -1548,6 +1543,10 @@ static int pagemap_pmd_range(pmd_t *pmdp * goes beyond vma->vm_end. */ orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl); + if (!pte) { + walk->action = ACTION_AGAIN; + return err; + } for (; addr < end; pte++, addr += PAGE_SIZE) { pagemap_entry_t pme; @@ -1887,11 +1886,12 @@ static int gather_pte_stats(pmd_t *pmd, spin_unlock(ptl); return 0; } - - if (pmd_trans_unstable(pmd)) - return 0; #endif orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) { + walk->action = ACTION_AGAIN; + return 0; + } do { struct page *page = can_gather_numa_stats(*pte, vma, addr); if (!page) --- a/mm/damon/vaddr.c~mm-pagewalkers-action_again-if-pte_offset_map_lock-fails +++ a/mm/damon/vaddr.c @@ -318,9 +318,11 @@ static int damon_mkold_pmd_entry(pmd_t * spin_unlock(ptl); } - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return 0; pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) { + walk->action = ACTION_AGAIN; + return 0; + } if (!pte_present(*pte)) goto out; damon_ptep_mkold(pte, walk->vma, addr); @@ -464,9 +466,11 @@ huge_out: regular_page: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return -EINVAL; pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) { + walk->action = ACTION_AGAIN; + return 0; + } if (!pte_present(*pte)) goto out; folio = damon_get_folio(pte_pfn(*pte)); --- a/mm/mempolicy.c~mm-pagewalkers-action_again-if-pte_offset_map_lock-fails +++ a/mm/mempolicy.c @@ -514,10 +514,11 @@ static int queue_folios_pte_range(pmd_t if (ptl) return queue_folios_pmd(pmd, ptl, addr, end, walk); - if (pmd_trans_unstable(pmd)) - return 0; - mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte) { + walk->action = ACTION_AGAIN; + return 0; + } for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; --- a/mm/mincore.c~mm-pagewalkers-action_again-if-pte_offset_map_lock-fails +++ a/mm/mincore.c @@ -113,12 +113,11 @@ static int mincore_pte_range(pmd_t *pmd, goto out; } - if (pmd_trans_unstable(pmd)) { - __mincore_unmapped_range(addr, end, vma, vec); - goto out; - } - ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!ptep) { + walk->action = ACTION_AGAIN; + return 0; + } for (; addr != end; ptep++, addr += PAGE_SIZE) { pte_t pte = *ptep; --- a/mm/mlock.c~mm-pagewalkers-action_again-if-pte_offset_map_lock-fails +++ a/mm/mlock.c @@ -329,6 +329,10 @@ static int mlock_pte_range(pmd_t *pmd, u } start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (!start_pte) { + walk->action = ACTION_AGAIN; + return 0; + } for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; _ Patches currently in -mm which might be from hughd@xxxxxxxxxx are arm-allow-pte_offset_map-to-fail.patch arm64-allow-pte_offset_map-to-fail.patch arm64-hugetlb-pte_alloc_huge-pte_offset_huge.patch ia64-hugetlb-pte_alloc_huge-pte_offset_huge.patch m68k-allow-pte_offset_map-to-fail.patch microblaze-allow-pte_offset_map-to-fail.patch mips-update_mmu_cache-can-replace-__update_tlb.patch mips-update_mmu_cache-can-replace-__update_tlb-fix.patch parisc-add-pte_unmap-to-balance-get_ptep.patch parisc-unmap_uncached_pte-use-pte_offset_kernel.patch parisc-hugetlb-pte_alloc_huge-pte_offset_huge.patch powerpc-kvmppc_unmap_free_pmd-pte_offset_kernel.patch powerpc-allow-pte_offset_map-to-fail.patch powerpc-hugetlb-pte_alloc_huge.patch riscv-hugetlb-pte_alloc_huge-pte_offset_huge.patch s390-allow-pte_offset_map_lock-to-fail.patch s390-gmap-use-pte_unmap_unlock-not-spin_unlock.patch sh-hugetlb-pte_alloc_huge-pte_offset_huge.patch sparc-hugetlb-pte_alloc_huge-pte_offset_huge.patch sparc-allow-pte_offset_map-to-fail.patch sparc-iounit-and-iommu-use-pte_offset_kernel.patch x86-allow-get_locked_pte-to-fail.patch x86-sme_populate_pgd-use-pte_offset_kernel.patch xtensa-add-pte_unmap-to-balance-pte_offset_map.patch mm-use-pmdp_get_lockless-without-surplus-barrier.patch mm-migrate-remove-cruft-from-migration_entry_waits.patch mm-pgtable-kmap_local_page-instead-of-kmap_atomic.patch mm-pgtable-allow-pte_offset_map-to-fail.patch mm-filemap-allow-pte_offset_map_lock-to-fail.patch mm-page_vma_mapped-delete-bogosity-in-page_vma_mapped_walk.patch mm-page_vma_mapped-reformat-map_pte-with-less-indentation.patch mm-page_vma_mapped-pte_offset_map_nolock-not-pte_lockptr.patch mm-pagewalkers-action_again-if-pte_offset_map_lock-fails.patch mm-pagewalk-walk_pte_range-allow-for-pte_offset_map.patch mm-vmwgfx-simplify-pmd-pud-mapping-dirty-helpers.patch mm-vmalloc-vmalloc_to_page-use-pte_offset_kernel.patch mm-hmm-retry-if-pte_offset_map-fails.patch mm-userfaultfd-retry-if-pte_offset_map-fails.patch mm-userfaultfd-allow-pte_offset_map_lock-to-fail.patch mm-debug_vm_pgtablepage_table_check-warn-pte-map-fails.patch mm-various-give-up-if-pte_offset_map-fails.patch mm-mprotect-delete-pmd_none_or_clear_bad_unless_trans_huge.patch mm-mremap-retry-if-either-pte_offset_map_lock-fails.patch mm-madvise-clean-up-pte_offset_map_lock-scans.patch mm-madvise-clean-up-force_shm_swapin_readahead.patch mm-swapoff-allow-pte_offset_map-to-fail.patch mm-mglru-allow-pte_offset_map_nolock-to-fail.patch mm-migrate_device-allow-pte_offset_map_lock-to-fail.patch mm-gup-remove-foll_split_pmd-use-of-pmd_trans_unstable.patch mm-huge_memory-split-huge-pmd-under-one-pte_offset_map.patch mm-khugepaged-allow-pte_offset_map-to-fail.patch mm-memory-allow-pte_offset_map-to-fail.patch mm-memory-handle_pte_fault-use-pte_offset_map_nolock.patch mm-pgtable-delete-pmd_trans_unstable-and-friends.patch mm-swap-swap_vma_readahead-do-the-pte_offset_map.patch perf-core-allow-pte_offset_map-to-fail.patch