PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however, it is possible that HugeTLB VMAs are split without unsharing the PMDs first. In some (most?) cases, this is a non-issue, like userfaultfd_register and mprotect, where PMDs are unshared before anything is done. However, mbind() and madvise() (like MADV_DONTDUMP) can cause a split without unsharing first. It might seem ideal to unshare in hugetlb_vm_op_open, but that would only unshare PMDs in the new VMA. Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> --- mm/hugetlb.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b39b74e0591a..bf7a1f628357 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -94,6 +94,8 @@ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, unsigned long end); static inline bool subpool_is_free(struct hugepage_subpool *spool) { @@ -4828,6 +4830,23 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) { if (addr & ~(huge_page_mask(hstate_vma(vma)))) return -EINVAL; + + /* We require PUD_SIZE VMA alignment for PMD sharing. */ + if (addr & ~PUD_MASK) { + /* + * hugetlb_vm_op_split is called right before we attempt to + * split the VMA. We will need to unshare PMDs in the old and + * new VMAs, so let's unshare before we split. + */ + unsigned long floor = addr & PUD_MASK; + unsigned long ceil = floor + PUD_SIZE; + + if (floor < vma->vm_start || ceil >= vma->vm_end) + /* PMD sharing is already impossible. */ + return 0; + hugetlb_unshare_pmds(vma, floor, ceil); + } + return 0; } @@ -7313,26 +7332,21 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re } } -/* - * This function will unconditionally remove all the shared pmd pgtable entries - * within the specific vma for a hugetlbfs memory range. - */ -void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) { struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mm_struct *mm = vma->vm_mm; struct mmu_notifier_range range; - unsigned long address, start, end; + unsigned long address; spinlock_t *ptl; pte_t *ptep; if (!(vma->vm_flags & VM_MAYSHARE)) return; - start = ALIGN(vma->vm_start, PUD_SIZE); - end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); - if (start >= end) return; @@ -7364,6 +7378,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) mmu_notifier_invalidate_range_end(&range); } +/* + * This function will unconditionally remove all the shared pmd pgtable entries + * within the specific vma for a hugetlbfs memory range. + */ +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +{ + hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), + ALIGN_DOWN(vma->vm_end, PUD_SIZE)); +} + #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; -- 2.39.0.314.g84b9a713c41-goog