From: Zi Yan <ziy@xxxxxxxxxx> COW on 1GB THPs will fall back to 2MB THPs if 1GB THP is not available. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- arch/x86/include/asm/pgalloc.h | 9 ++++++ include/linux/huge_mm.h | 5 ++++ mm/huge_memory.c | 54 ++++++++++++++++++++++++++++++++++ mm/memory.c | 2 +- mm/swapfile.c | 4 ++- 5 files changed, 72 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index fae13467d3e1..31221269c387 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -98,6 +98,15 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, #define pmd_pgtable(pmd) pmd_page(pmd) +static inline void pud_populate_with_pgtable(struct mm_struct *mm, pud_t *pud, + struct page *pte) +{ + unsigned long pfn = page_to_pfn(pte); + + paravirt_alloc_pmd(mm, pfn); + set_pud(pud, __pud(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); +} + #if CONFIG_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one_page_with_ptes(struct mm_struct *mm, unsigned long addr) { diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7528652400e4..0c20a8ea6911 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -19,6 +19,7 @@ extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); extern int do_huge_pud_anonymous_page(struct vm_fault *vmf); +extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud); #else static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) { @@ -27,6 +28,10 @@ extern int do_huge_pud_anonymous_page(struct vm_fault *vmf) { return VM_FAULT_FALLBACK; } +extern vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud) +{ + return VM_FAULT_FALLBACK; +} #endif extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ec3847392208..6da9b02501b7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1334,6 +1334,60 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) unlock: spin_unlock(vmf->ptl); } + +vm_fault_t do_huge_pud_wp_page(struct vm_fault *vmf, pud_t orig_pud) +{ + struct vm_area_struct *vma = vmf->vma; + struct page *page = NULL; + unsigned long haddr = vmf->address & HPAGE_PUD_MASK; + + vmf->ptl = pud_lockptr(vma->vm_mm, vmf->pud); + VM_BUG_ON_VMA(!vma->anon_vma, vma); + + if (is_huge_zero_pud(orig_pud)) + goto fallback; + + spin_lock(vmf->ptl); + + if (unlikely(!pud_same(*vmf->pud, orig_pud))) { + spin_unlock(vmf->ptl); + return 0; + } + + page = pud_page(orig_pud); + VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); + + /* Lock page for reuse_swap_page() */ + if (!trylock_page(page)) { + get_page(page); + spin_unlock(vmf->ptl); + lock_page(page); + spin_lock(vmf->ptl); + if (unlikely(!pud_same(*vmf->pud, orig_pud))) { + unlock_page(page); + put_page(page); + return 0; + } + put_page(page); + } + if (reuse_swap_page(page, NULL)) { + pud_t entry; + + entry = pud_mkyoung(orig_pud); + entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma); + if (pudp_set_access_flags(vma, haddr, vmf->pud, entry, 1)) + update_mmu_cache_pud(vma, vmf->address, vmf->pud); + unlock_page(page); + spin_unlock(vmf->ptl); + return VM_FAULT_WRITE; + } + unlock_page(page); + spin_unlock(vmf->ptl); +fallback: + __split_huge_pud(vma, vmf->pud, vmf->address); + return VM_FAULT_FALLBACK; +} + #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd) diff --git a/mm/memory.c b/mm/memory.c index 6f86294438fd..b88587256bc1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4165,7 +4165,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; + return do_huge_pud_wp_page(vmf, orig_pud); if (vmf->vma->vm_ops->huge_fault) return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 20012c0c0252..e3f771c2ad83 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1635,7 +1635,9 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount, /* hugetlbfs shouldn't call it */ VM_BUG_ON_PAGE(PageHuge(page), page); - if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) { + if (!IS_ENABLED(CONFIG_THP_SWAP) || + unlikely(compound_order(compound_head(page)) == HPAGE_PUD_ORDER) || + likely(!PageTransCompound(page))) { mapcount = page_trans_huge_mapcount(page, total_mapcount); if (PageSwapCache(page)) swapcount = page_swapcount(page); -- 2.28.0