The patch titled Subject: mm/uffd: always wr-protect pte in pte|pmd_mkuffd_wp() has been added to the -mm mm-unstable branch. Its filename is mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Peter Xu <peterx@xxxxxxxxxx> Subject: mm/uffd: always wr-protect pte in pte|pmd_mkuffd_wp() Date: Wed, 14 Dec 2022 15:15:33 -0500 This patch is a cleanup to always wr-protect pte/pmd in mkuffd_wp paths. The reasons I still think this patch is worthwhile, are: (1) It is a cleanup already; diffstat tells. (2) It just feels natural after I thought about this, if the pte is uffd protected, let's remove the write bit no matter what it was. (2) Since x86 is the only arch that supports uffd-wp, it also redefines pte|pmd_mkuffd_wp() in that it should always contain removals of write bits. It means any future arch that want to implement uffd-wp should naturally follow this rule too. It's good to make it a default, even if with vm_page_prot changes on VM_UFFD_WP. (3) It covers more than vm_page_prot. So no chance of any potential future "accident" (like pte_mkdirty() sparc64 or loongarch, even though it just got its pte_mkdirty fixed <1 month ago). It'll be fairly clear when reading the code too that we don't worry anything before a pte_mkuffd_wp() on uncertainty of the write bit. We may call pte_wrprotect() one more time in some paths (e.g. thp split), but that should be fully local bitop instruction so the overhead should be negligible. Although this patch should logically also fix all the known issues on uffd-wp too recently on page migration (not for numa hint recovery - that may need another explcit pte_wrprotect), but this is not the plan for that fix. So no fixes, and stable doesn't need this. Link: https://lkml.kernel.org/r/20221214201533.1774616-1-peterx@xxxxxxxxxx Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> Acked-by: David Hildenbrand <david@xxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Ives van Hoorne <ives@xxxxxxxxxxxxxx> Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Nadav Amit <nadav.amit@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/x86/include/asm/pgtable.h | 24 ++++++++++++------------ include/asm-generic/hugetlb.h | 16 ++++++++-------- mm/huge_memory.c | 8 +++----- mm/hugetlb.c | 4 ++-- mm/memory.c | 8 +++----- mm/mprotect.c | 6 ++---- mm/userfaultfd.c | 18 ++---------------- 7 files changed, 32 insertions(+), 52 deletions(-) --- a/arch/x86/include/asm/pgtable.h~mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp +++ a/arch/x86/include/asm/pgtable.h @@ -289,6 +289,11 @@ static inline pte_t pte_clear_flags(pte_ return native_make_pte(v & ~clear); } +static inline pte_t pte_wrprotect(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_RW); +} + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { @@ -313,7 +318,7 @@ static inline int pte_uffd_wp(pte_t pte) static inline pte_t pte_mkuffd_wp(pte_t pte) { - return pte_set_flags(pte, _PAGE_UFFD_WP); + return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP)); } static inline pte_t pte_clear_uffd_wp(pte_t pte) @@ -332,11 +337,6 @@ static inline pte_t pte_mkold(pte_t pte) return pte_clear_flags(pte, _PAGE_ACCESSED); } -static inline pte_t pte_wrprotect(pte_t pte) -{ - return pte_clear_flags(pte, _PAGE_RW); -} - static inline pte_t pte_mkexec(pte_t pte) { return pte_clear_flags(pte, _PAGE_NX); @@ -401,6 +401,11 @@ static inline pmd_t pmd_clear_flags(pmd_ return native_make_pmd(v & ~clear); } +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_RW); +} + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pmd_uffd_wp(pmd_t pmd) { @@ -409,7 +414,7 @@ static inline int pmd_uffd_wp(pmd_t pmd) static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_UFFD_WP); + return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP)); } static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) @@ -428,11 +433,6 @@ static inline pmd_t pmd_mkclean(pmd_t pm return pmd_clear_flags(pmd, _PAGE_DIRTY); } -static inline pmd_t pmd_wrprotect(pmd_t pmd) -{ - return pmd_clear_flags(pmd, _PAGE_RW); -} - static inline pmd_t pmd_mkdirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); --- a/include/asm-generic/hugetlb.h~mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp +++ a/include/asm-generic/hugetlb.h @@ -25,6 +25,13 @@ static inline pte_t huge_pte_mkwrite(pte return pte_mkwrite(pte); } +#ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} +#endif + static inline pte_t huge_pte_mkdirty(pte_t pte) { return pte_mkdirty(pte); @@ -37,7 +44,7 @@ static inline pte_t huge_pte_modify(pte_ static inline pte_t huge_pte_mkuffd_wp(pte_t pte) { - return pte_mkuffd_wp(pte); + return huge_pte_wrprotect(pte_mkuffd_wp(pte)); } static inline pte_t huge_pte_clear_uffd_wp(pte_t pte) @@ -104,13 +111,6 @@ static inline int huge_pte_none_mostly(p return huge_pte_none(pte) || is_pte_marker(pte); } -#ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT -static inline pte_t huge_pte_wrprotect(pte_t pte) -{ - return pte_wrprotect(pte); -} -#endif - #ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) --- a/mm/huge_memory.c~mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp +++ a/mm/huge_memory.c @@ -1920,17 +1920,15 @@ int change_huge_pmd(struct mmu_gather *t oldpmd = pmdp_invalidate_ad(vma, addr, pmd); entry = pmd_modify(oldpmd, newprot); - if (uffd_wp) { - entry = pmd_wrprotect(entry); + if (uffd_wp) entry = pmd_mkuffd_wp(entry); - } else if (uffd_wp_resolve) { + else if (uffd_wp_resolve) /* * Leave the write bit to be handled by PF interrupt * handler, then things like COW could be properly * handled. */ entry = pmd_clear_uffd_wp(entry); - } /* See change_pte_range(). */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) && @@ -3275,7 +3273,7 @@ void remove_migration_pmd(struct page_vm if (is_writable_migration_entry(entry)) pmde = maybe_pmd_mkwrite(pmde, vma); if (pmd_swp_uffd_wp(*pvmw->pmd)) - pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde)); + pmde = pmd_mkuffd_wp(pmde); if (!is_migration_entry_young(entry)) pmde = pmd_mkold(pmde); /* NOTE: this may contain setting soft-dirty on some archs */ --- a/mm/hugetlb.c~mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp +++ a/mm/hugetlb.c @@ -5752,7 +5752,7 @@ static vm_fault_t hugetlb_no_page(struct * if populated. */ if (unlikely(pte_marker_uffd_wp(old_pte))) - new_pte = huge_pte_wrprotect(huge_pte_mkuffd_wp(new_pte)); + new_pte = huge_pte_mkuffd_wp(new_pte); set_huge_pte_at(mm, haddr, ptep, new_pte); hugetlb_count_add(pages_per_huge_page(h), mm); @@ -6558,7 +6558,7 @@ unsigned long hugetlb_change_protection( pte = huge_pte_modify(old_pte, newprot); pte = arch_make_huge_pte(pte, shift, vma->vm_flags); if (uffd_wp) - pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte)); + pte = huge_pte_mkuffd_wp(pte); else if (uffd_wp_resolve) pte = huge_pte_clear_uffd_wp(pte); huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte); --- a/mm/memory.c~mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp +++ a/mm/memory.c @@ -878,7 +878,7 @@ copy_present_page(struct vm_area_struct pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma); if (userfaultfd_pte_wp(dst_vma, *src_pte)) /* Uffd-wp needs to be delivered to dest pte as well */ - pte = pte_wrprotect(pte_mkuffd_wp(pte)); + pte = pte_mkuffd_wp(pte); set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); return 0; } @@ -3953,10 +3953,8 @@ vm_fault_t do_swap_page(struct vm_fault flush_icache_page(vma, page); if (pte_swp_soft_dirty(vmf->orig_pte)) pte = pte_mksoft_dirty(pte); - if (pte_swp_uffd_wp(vmf->orig_pte)) { + if (pte_swp_uffd_wp(vmf->orig_pte)) pte = pte_mkuffd_wp(pte); - pte = pte_wrprotect(pte); - } vmf->orig_pte = pte; /* ksm created a completely new copy */ @@ -4299,7 +4297,7 @@ void do_set_pte(struct vm_fault *vmf, st if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (unlikely(uffd_wp)) - entry = pte_mkuffd_wp(pte_wrprotect(entry)); + entry = pte_mkuffd_wp(entry); /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter(vma->vm_mm, MM_ANONPAGES); --- a/mm/mprotect.c~mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp +++ a/mm/mprotect.c @@ -177,12 +177,10 @@ static unsigned long change_pte_range(st oldpte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_modify(oldpte, newprot); - if (uffd_wp) { - ptent = pte_wrprotect(ptent); + if (uffd_wp) ptent = pte_mkuffd_wp(ptent); - } else if (uffd_wp_resolve) { + else if (uffd_wp_resolve) ptent = pte_clear_uffd_wp(ptent); - } /* * In some writable, shared mappings, we might want --- a/mm/userfaultfd.c~mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp +++ a/mm/userfaultfd.c @@ -74,24 +74,10 @@ int mfill_atomic_install_pte(struct mm_s _dst_pte = pte_mkdirty(_dst_pte); if (page_in_cache && !vm_shared) writable = false; - - /* - * Always mark a PTE as write-protected when needed, regardless of - * VM_WRITE, which the user might change. - */ - if (wp_copy) { - _dst_pte = pte_mkuffd_wp(_dst_pte); - writable = false; - } - if (writable) _dst_pte = pte_mkwrite(_dst_pte); - else - /* - * We need this to make sure write bit removed; as mk_pte() - * could return a pte with write bit set. - */ - _dst_pte = pte_wrprotect(_dst_pte); + if (wp_copy) + _dst_pte = pte_mkuffd_wp(_dst_pte); dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); _ Patches currently in -mm which might be from peterx@xxxxxxxxxx are mm-uffd-fix-pte-marker-when-fork-without-fork-event.patch mm-fix-a-few-rare-cases-of-using-swapin-error-pte-marker.patch mm-uffd-always-wr-protect-pte-in-ptepmd_mkuffd_wp.patch