The patch titled Subject: hugetlb: do not update address in huge_pmd_unshare has been added to the -mm mm-unstable branch. Its filename is hugetlb-do-not-update-address-in-huge_pmd_unshare.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/hugetlb-do-not-update-address-in-huge_pmd_unshare.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Subject: hugetlb: do not update address in huge_pmd_unshare Date: Tue, 21 Jun 2022 16:56:19 -0700 As an optimization for loops sequentially processing hugetlb address ranges, huge_pmd_unshare would update a passed address if it unshared a pmd. Updating a loop control variable outside the loop like this is generally a bad idea. These loops are now using hugetlb_mask_last_page to optimize scanning when non-present ptes are discovered. The same can be done when huge_pmd_unshare returns 1 indicating a pmd was unshared. Remove address update from huge_pmd_unshare. Change the passed argument type and update all callers. In loops sequentially processing addresses use hugetlb_mask_last_page to update address if pmd is unshared. Link: https://lkml.kernel.org/r/20220621235620.291305-4-mike.kravetz@xxxxxxxxxx Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Acked-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> Reviewed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> Cc: "Aneesh Kumar K.V" <aneesh.kumar@xxxxxxxxxxxxxxxxxx> Cc: Anshuman Khandual <anshuman.khandual@xxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Christian Borntraeger <borntraeger@xxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: James Houghton <jthoughton@xxxxxxxxxx> Cc: kernel test robot <lkp@xxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Mina Almasry <almasrymina@xxxxxxxxxx> Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Rolf Eike Beer <eike-kernel@xxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/hugetlb.h | 4 +-- mm/hugetlb.c | 46 +++++++++++++++----------------------- mm/rmap.c | 4 +-- 3 files changed, 23 insertions(+), 31 deletions(-) --- a/include/linux/hugetlb.h~hugetlb-do-not-update-address-in-huge_pmd_unshare +++ a/include/linux/hugetlb.h @@ -199,7 +199,7 @@ pte_t *huge_pte_offset(struct mm_struct unsigned long addr, unsigned long sz); unsigned long hugetlb_mask_last_page(struct hstate *h); int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep); + unsigned long addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, @@ -246,7 +246,7 @@ static inline struct address_space *huge static inline int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep) + unsigned long addr, pte_t *ptep) { return 0; } --- a/mm/hugetlb.c~hugetlb-do-not-update-address-in-huge_pmd_unshare +++ a/mm/hugetlb.c @@ -4945,7 +4945,6 @@ int move_hugetlb_page_tables(struct vm_a struct mm_struct *mm = vma->vm_mm; unsigned long old_end = old_addr + len; unsigned long last_addr_mask; - unsigned long old_addr_copy; pte_t *src_pte, *dst_pte; struct mmu_notifier_range range; bool shared_pmd = false; @@ -4973,14 +4972,10 @@ int move_hugetlb_page_tables(struct vm_a if (huge_pte_none(huge_ptep_get(src_pte))) continue; - /* old_addr arg to huge_pmd_unshare() is a pointer and so the - * arg may be modified. Pass a copy instead to preserve the - * value in old_addr. - */ - old_addr_copy = old_addr; - - if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte)) { + if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) { shared_pmd = true; + old_addr |= last_addr_mask; + new_addr |= last_addr_mask; continue; } @@ -5045,10 +5040,11 @@ static void __unmap_hugepage_range(struc } ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, vma, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, address, ptep)) { spin_unlock(ptl); tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); force_flush = true; + address |= last_addr_mask; continue; } @@ -6337,7 +6333,7 @@ unsigned long hugetlb_change_protection( continue; } ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, vma, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, address, ptep)) { /* * When uffd-wp is enabled on the vma, unshare * shouldn't happen at all. Warn about it if it @@ -6347,6 +6343,7 @@ unsigned long hugetlb_change_protection( pages++; spin_unlock(ptl); shared_pmd = true; + address |= last_addr_mask; continue; } pte = huge_ptep_get(ptep); @@ -6770,11 +6767,11 @@ out: * 0 the underlying pte page is not shared, or it is the last user */ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgd_t *pgd = pgd_offset(mm, *addr); - p4d_t *p4d = p4d_offset(pgd, *addr); - pud_t *pud = pud_offset(p4d, *addr); + pgd_t *pgd = pgd_offset(mm, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); i_mmap_assert_write_locked(vma->vm_file->f_mapping); BUG_ON(page_count(virt_to_page(ptep)) == 0); @@ -6784,14 +6781,6 @@ int huge_pmd_unshare(struct mm_struct *m pud_clear(pud); put_page(virt_to_page(ptep)); mm_dec_nr_pmds(mm); - /* - * This update of passed address optimizes loops sequentially - * processing addresses in increments of huge page size (PMD_SIZE - * in this case). By clearing the pud, a PUD_SIZE area is unmapped. - * Update address to the 'last page' in the cleared area so that - * calling loop can move to first page past this area. - */ - *addr |= PUD_SIZE - PMD_SIZE; return 1; } @@ -6803,7 +6792,7 @@ pte_t *huge_pmd_share(struct mm_struct * } int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep) + unsigned long addr, pte_t *ptep) { return 0; } @@ -6910,6 +6899,12 @@ unsigned long hugetlb_mask_last_page(str /* See description above. Architectures can provide their own version. */ __weak unsigned long hugetlb_mask_last_page(struct hstate *h) { + unsigned long hp_size = huge_page_size(h); + +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE + if (hp_size == PMD_SIZE) + return PUD_SIZE - PMD_SIZE; +#endif return 0UL; } @@ -7136,14 +7131,11 @@ void hugetlb_unshare_all_pmds(struct vm_ mmu_notifier_invalidate_range_start(&range); i_mmap_lock_write(vma->vm_file->f_mapping); for (address = start; address < end; address += PUD_SIZE) { - unsigned long tmp = address; - ptep = huge_pte_offset(mm, address, sz); if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - /* We don't want 'address' to be changed */ - huge_pmd_unshare(mm, vma, &tmp, ptep); + huge_pmd_unshare(mm, vma, address, ptep); spin_unlock(ptl); } flush_hugetlb_tlb_range(vma, start, end); --- a/mm/rmap.c~hugetlb-do-not-update-address-in-huge_pmd_unshare +++ a/mm/rmap.c @@ -1559,7 +1559,7 @@ static bool try_to_unmap_one(struct foli * do this outside rmap routines. */ VM_BUG_ON(!anon && !(flags & TTU_RMAP_LOCKED)); - if (!anon && huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { + if (!anon && huge_pmd_unshare(mm, vma, address, pvmw.pte)) { flush_tlb_range(vma, range.start, range.end); mmu_notifier_invalidate_range(mm, range.start, range.end); @@ -1922,7 +1922,7 @@ static bool try_to_migrate_one(struct fo * do this outside rmap routines. */ VM_BUG_ON(!anon && !(flags & TTU_RMAP_LOCKED)); - if (!anon && huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { + if (!anon && huge_pmd_unshare(mm, vma, address, pvmw.pte)) { flush_tlb_range(vma, range.start, range.end); mmu_notifier_invalidate_range(mm, range.start, range.end); _ Patches currently in -mm which might be from mike.kravetz@xxxxxxxxxx are hugetlb-skip-to-end-of-pt-page-mapping-when-pte-not-present.patch hugetlb-do-not-update-address-in-huge_pmd_unshare.patch hugetlb-lazy-page-table-copies-in-fork.patch