Currently, __split_huge_pmd_locked() uses page fault to handle file backed THP. This is required because splitting pmd requires allocating a new pgtable. This patch allows the caller of __split_huge_pmd_locked() and split_huge_pmd_address() to preallocate the pgtable, so that refault is not required. This is useful when the caller of split_huge_pmd_address() would like to use small pages before refault. Signed-off-by: Song Liu <songliubraving@xxxxxx> --- include/linux/huge_mm.h | 5 +++-- mm/huge_memory.c | 33 +++++++++++++++++++++++---------- mm/rmap.c | 2 +- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7cd5c150c21d..2d8a40fd06e4 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -161,7 +161,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, - bool freeze, struct page *page); + bool freeze, struct page *page, pgtable_t prealloc_pgtable); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); @@ -299,7 +299,8 @@ static inline void deferred_split_huge_page(struct page *page) {} static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct page *page) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, - unsigned long address, bool freeze, struct page *page) {} + unsigned long address, bool freeze, struct page *page, + pgtable_t prealloc_pgtable) {} #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9f8bce9a6b32..dcb0e30213af 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2118,7 +2118,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, } static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long haddr, bool freeze) + unsigned long haddr, bool freeze, pgtable_t prealloc_pgtable) { struct mm_struct *mm = vma->vm_mm; struct page *page; @@ -2133,10 +2133,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)); + /* only file backed vma need preallocate pgtable*/ + VM_BUG_ON(vma_is_anonymous(vma) && prealloc_pgtable); count_vm_event(THP_SPLIT_PMD); - if (!vma_is_anonymous(vma)) { + if (prealloc_pgtable) { + pgtable_trans_huge_deposit(mm, pmd, prealloc_pgtable); + mm_inc_nr_pmds(mm); + } else if (!vma_is_anonymous(vma)) { _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* * We are going to unmap this huge page. So @@ -2277,8 +2282,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } } -void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct page *page) +static void ____split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long address, bool freeze, struct page *page, + pgtable_t prealloc_pgtable) { spinlock_t *ptl; struct mmu_notifier_range range; @@ -2303,7 +2309,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, clear_page_mlock(page); } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd))) goto out; - __split_huge_pmd_locked(vma, pmd, range.start, freeze); + __split_huge_pmd_locked(vma, pmd, range.start, freeze, + prealloc_pgtable); out: spin_unlock(ptl); /* @@ -2322,8 +2329,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, mmu_notifier_invalidate_range_only_end(&range); } +void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long address, bool freeze, struct page *page) +{ + ____split_huge_pmd(vma, pmd, address, freeze, page, NULL); +} + void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, - bool freeze, struct page *page) + bool freeze, struct page *page, pgtable_t prealloc_pgtable) { pgd_t *pgd; p4d_t *p4d; @@ -2344,7 +2357,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, pmd = pmd_offset(pud, address); - __split_huge_pmd(vma, pmd, address, freeze, page); + ____split_huge_pmd(vma, pmd, address, freeze, page, prealloc_pgtable); } void vma_adjust_trans_huge(struct vm_area_struct *vma, @@ -2360,7 +2373,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, if (start & ~HPAGE_PMD_MASK && (start & HPAGE_PMD_MASK) >= vma->vm_start && (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end) - split_huge_pmd_address(vma, start, false, NULL); + split_huge_pmd_address(vma, start, false, NULL, NULL); /* * If the new end address isn't hpage aligned and it could @@ -2370,7 +2383,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, if (end & ~HPAGE_PMD_MASK && (end & HPAGE_PMD_MASK) >= vma->vm_start && (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end) - split_huge_pmd_address(vma, end, false, NULL); + split_huge_pmd_address(vma, end, false, NULL, NULL); /* * If we're also updating the vma->vm_next->vm_start, if the new @@ -2384,7 +2397,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, if (nstart & ~HPAGE_PMD_MASK && (nstart & HPAGE_PMD_MASK) >= next->vm_start && (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end) - split_huge_pmd_address(next, nstart, false, NULL); + split_huge_pmd_address(next, nstart, false, NULL, NULL); } } diff --git a/mm/rmap.c b/mm/rmap.c index e5dfe2ae6b0d..6970d732507c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1361,7 +1361,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (flags & TTU_SPLIT_HUGE_PMD) { split_huge_pmd_address(vma, address, - flags & TTU_SPLIT_FREEZE, page); + flags & TTU_SPLIT_FREEZE, page, NULL); } /* -- 2.17.1