The patch titled Subject: arm: adjust_pte() use pte_offset_map_rw_nolock() has been added to the -mm mm-unstable branch. Its filename is arm-adjust_pte-use-pte_offset_map_rw_nolock.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/arm-adjust_pte-use-pte_offset_map_rw_nolock.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> Subject: arm: adjust_pte() use pte_offset_map_rw_nolock() Date: Thu, 26 Sep 2024 14:46:18 +0800 In do_adjust_pte(), we may modify the pte entry. The corresponding pmd entry may have been modified concurrently. Therefore, in order to ensure the stability if pmd entry, use pte_offset_map_rw_nolock() to replace pte_offset_map_nolock(), and do pmd_same() check after holding the PTL. All callers of update_mmu_cache_range() hold the vmf->ptl, so we can determined whether split PTE locks is being used by doing the following, just as we do elsewhere in the kernel. ptl != vmf->ptl And then we can delete the do_pte_lock() and do_pte_unlock(). Link: https://lkml.kernel.org/r/0eaf6b69aeb2fe35092a633fed12537efe645303.1727332572.git.zhengqi.arch@xxxxxxxxxxxxx Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> Acked-by: David Hildenbrand <david@xxxxxxxxxx> Reviewed-by: Muchun Song <muchun.song@xxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Mike Rapoport (Microsoft) <rppt@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Ryan Roberts <ryan.roberts@xxxxxxx> Cc: Vishal Moola (Oracle) <vishal.moola@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/arm/mm/fault-armv.c | 53 +++++++++++++++---------------------- 1 file changed, 22 insertions(+), 31 deletions(-) --- a/arch/arm/mm/fault-armv.c~arm-adjust_pte-use-pte_offset_map_rw_nolock +++ a/arch/arm/mm/fault-armv.c @@ -61,32 +61,8 @@ static int do_adjust_pte(struct vm_area_ return ret; } -#if defined(CONFIG_SPLIT_PTE_PTLOCKS) -/* - * If we are using split PTE locks, then we need to take the page - * lock here. Otherwise we are using shared mm->page_table_lock - * which is already locked, thus cannot take it. - */ -static inline void do_pte_lock(spinlock_t *ptl) -{ - /* - * Use nested version here to indicate that we are already - * holding one similar spinlock. - */ - spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); -} - -static inline void do_pte_unlock(spinlock_t *ptl) -{ - spin_unlock(ptl); -} -#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */ -static inline void do_pte_lock(spinlock_t *ptl) {} -static inline void do_pte_unlock(spinlock_t *ptl) {} -#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ - static int adjust_pte(struct vm_area_struct *vma, unsigned long address, - unsigned long pfn) + unsigned long pfn, struct vm_fault *vmf) { spinlock_t *ptl; pgd_t *pgd; @@ -94,6 +70,7 @@ static int adjust_pte(struct vm_area_str pud_t *pud; pmd_t *pmd; pte_t *pte; + pmd_t pmdval; int ret; pgd = pgd_offset(vma->vm_mm, address); @@ -112,20 +89,33 @@ static int adjust_pte(struct vm_area_str if (pmd_none_or_clear_bad(pmd)) return 0; +again: /* * This is called while another page table is mapped, so we * must use the nested version. This also means we need to * open-code the spin-locking. */ - pte = pte_offset_map_nolock(vma->vm_mm, pmd, address, &ptl); + pte = pte_offset_map_rw_nolock(vma->vm_mm, pmd, address, &pmdval, &ptl); if (!pte) return 0; - do_pte_lock(ptl); + /* + * If we are using split PTE locks, then we need to take the page + * lock here. Otherwise we are using shared mm->page_table_lock + * which is already locked, thus cannot take it. + */ + if (ptl != vmf->ptl) { + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) { + pte_unmap_unlock(pte, ptl); + goto again; + } + } ret = do_adjust_pte(vma, address, pfn, pte); - do_pte_unlock(ptl); + if (ptl != vmf->ptl) + spin_unlock(ptl); pte_unmap(pte); return ret; @@ -133,7 +123,8 @@ static int adjust_pte(struct vm_area_str static void make_coherent(struct address_space *mapping, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, unsigned long pfn) + unsigned long addr, pte_t *ptep, unsigned long pfn, + struct vm_fault *vmf) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; @@ -160,7 +151,7 @@ make_coherent(struct address_space *mapp if (!(mpnt->vm_flags & VM_MAYSHARE)) continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; - aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn); + aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf); } flush_dcache_mmap_unlock(mapping); if (aliases) @@ -203,7 +194,7 @@ void update_mmu_cache_range(struct vm_fa __flush_dcache_folio(mapping, folio); if (mapping) { if (cache_is_vivt()) - make_coherent(mapping, vma, addr, ptep, pfn); + make_coherent(mapping, vma, addr, ptep, pfn, vmf); else if (vma->vm_flags & VM_EXEC) __flush_icache_all(); } _ Patches currently in -mm which might be from zhengqi.arch@xxxxxxxxxxxxx are mm-pgtable-introduce-pte_offset_map_rorw_nolock.patch powerpc-assert_pte_locked-use-pte_offset_map_ro_nolock.patch mm-filemap-filemap_fault_recheck_pte_none-use-pte_offset_map_ro_nolock.patch mm-khugepaged-__collapse_huge_page_swapin-use-pte_offset_map_ro_nolock.patch arm-adjust_pte-use-pte_offset_map_rw_nolock.patch mm-handle_pte_fault-use-pte_offset_map_rw_nolock.patch mm-khugepaged-collapse_pte_mapped_thp-use-pte_offset_map_rw_nolock.patch mm-copy_pte_range-use-pte_offset_map_rw_nolock.patch mm-mremap-move_ptes-use-pte_offset_map_rw_nolock.patch mm-page_vma_mapped_walk-map_pte-use-pte_offset_map_rw_nolock.patch mm-userfaultfd-move_pages_pte-use-pte_offset_map_rw_nolock.patch mm-multi-gen-lru-walk_pte_range-use-pte_offset_map_rw_nolock.patch mm-pgtable-remove-pte_offset_map_nolock.patch