The patch titled Subject: hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp has been added to the -mm tree. Its filename is hugetlb-userfaultfd-unshare-all-pmds-for-hugetlbfs-when-register-wp.patch This patch should soon appear at https://ozlabs.org/~akpm/mmots/broken-out/hugetlb-userfaultfd-unshare-all-pmds-for-hugetlbfs-when-register-wp.patch and later at https://ozlabs.org/~akpm/mmotm/broken-out/hugetlb-userfaultfd-unshare-all-pmds-for-hugetlbfs-when-register-wp.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Peter Xu <peterx@xxxxxxxxxx> Subject: hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp Huge pmd sharing for hugetlbfs is racy with userfaultfd-wp because userfaultfd-wp is always based on pgtable entries, so they cannot be shared. Walk the hugetlb range and unshare all such mappings if there is, right before UFFDIO_REGISTER will succeed and return to userspace. This will pair with want_pmd_share() in hugetlb code so that huge pmd sharing is completely disabled for userfaultfd-wp registered range. Link: https://lkml.kernel.org/r/20210218231206.15524-1-peterx@xxxxxxxxxx Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> Reviewed-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Axel Rasmussen <axelrasmussen@xxxxxxxxxx> Cc: Mike Rapoport <rppt@xxxxxxxxxxxxxxxxxx> Cc: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx> Cc: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Cc: Adam Ruprecht <ruprecht@xxxxxxxxxx> Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Alexey Dobriyan <adobriyan@xxxxxxxxx> Cc: Anshuman Khandual <anshuman.khandual@xxxxxxx> Cc: Cannon Matthews <cannonmatthews@xxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Chinwen Chang <chinwen.chang@xxxxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: "Dr . David Alan Gilbert" <dgilbert@xxxxxxxxxx> Cc: Huang Ying <ying.huang@xxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Jann Horn <jannh@xxxxxxxxxx> Cc: Jerome Glisse <jglisse@xxxxxxxxxx> Cc: Lokesh Gidra <lokeshgidra@xxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: "Michal Koutn" <mkoutny@xxxxxxxx> Cc: Michel Lespinasse <walken@xxxxxxxxxx> Cc: Mina Almasry <almasrymina@xxxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Oliver Upton <oupton@xxxxxxxxxx> Cc: Shaohua Li <shli@xxxxxx> Cc: Shawn Anastasio <shawn@xxxxxxxxxx> Cc: Steven Price <steven.price@xxxxxxx> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/userfaultfd.c | 4 ++ include/linux/hugetlb.h | 3 ++ mm/hugetlb.c | 51 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) --- a/fs/userfaultfd.c~hugetlb-userfaultfd-unshare-all-pmds-for-hugetlbfs-when-register-wp +++ a/fs/userfaultfd.c @@ -15,6 +15,7 @@ #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/mm.h> +#include <linux/mmu_notifier.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/seq_file.h> @@ -1449,6 +1450,9 @@ static int userfaultfd_register(struct u vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx.ctx = ctx; + if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) + hugetlb_unshare_all_pmds(vma); + skip: prev = vma; start = vma->vm_end; --- a/include/linux/hugetlb.h~hugetlb-userfaultfd-unshare-all-pmds-for-hugetlbfs-when-register-wp +++ a/include/linux/hugetlb.h @@ -188,6 +188,7 @@ unsigned long hugetlb_change_protection( unsigned long address, unsigned long end, pgprot_t newprot); bool is_hugetlb_entry_migration(pte_t pte); +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -369,6 +370,8 @@ static inline vm_fault_t hugetlb_fault(s return 0; } +static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } + #endif /* !CONFIG_HUGETLB_PAGE */ /* * hugepages at page global directory. If arch support --- a/mm/hugetlb.c~hugetlb-userfaultfd-unshare-all-pmds-for-hugetlbfs-when-register-wp +++ a/mm/hugetlb.c @@ -5597,6 +5597,57 @@ void move_hugetlb_state(struct page *old } } +/* + * This function will unconditionally remove all the shared pmd pgtable entries + * within the specific vma for a hugetlbfs memory range. + */ +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +{ + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); + struct mm_struct *mm = vma->vm_mm; + struct mmu_notifier_range range; + unsigned long address, start, end; + spinlock_t *ptl; + pte_t *ptep; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + start = ALIGN(vma->vm_start, PUD_SIZE); + end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); + + if (start >= end) + return; + + /* + * No need to call adjust_range_if_pmd_sharing_possible(), because + * we have already done the PUD_SIZE alignment. + */ + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, + start, end); + mmu_notifier_invalidate_range_start(&range); + i_mmap_lock_write(vma->vm_file->f_mapping); + for (address = start; address < end; address += PUD_SIZE) { + unsigned long tmp = address; + + ptep = huge_pte_offset(mm, address, sz); + if (!ptep) + continue; + ptl = huge_pte_lock(h, mm, ptep); + /* We don't want 'address' to be changed */ + huge_pmd_unshare(mm, vma, &tmp, ptep); + spin_unlock(ptl); + } + flush_hugetlb_tlb_range(vma, start, end); + i_mmap_unlock_write(vma->vm_file->f_mapping); + /* + * No need to call mmu_notifier_invalidate_range(), see + * Documentation/vm/mmu_notifier.rst. + */ + mmu_notifier_invalidate_range_end(&range); +} + #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; _ Patches currently in -mm which might be from peterx@xxxxxxxxxx are hugetlb-pass-vma-into-huge_pte_alloc-and-huge_pmd_share.patch hugetlb-userfaultfd-forbid-huge-pmd-sharing-when-uffd-enabled.patch mm-hugetlb-move-flush_hugetlb_tlb_range-into-hugetlbh.patch hugetlb-userfaultfd-unshare-all-pmds-for-hugetlbfs-when-register-wp.patch