From: Zi Yan <ziy@xxxxxxxxxx> We deposit 512 PMD pages, each of which has 512 PTE pages deposited in its ->deposit_head, to mm->deposit_head_pud. They will be withdrawn and used when a PUD THP split into 512 PMD THPs. In this way, when any of the 512 PMD THPs is split further, we will use the existing code path to withdraw PTE pages for use. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/mm.h | 2 ++ include/linux/mm_types.h | 3 +++ include/linux/pgtable.h | 3 +++ kernel/fork.c | 6 ++++++ mm/pgtable-generic.c | 23 +++++++++++++++++++++++ 5 files changed, 37 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 01b62da34794..8f54f06c8eb6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2321,6 +2321,8 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) return ptl; } +#define huge_pud_deposit_head(mm, pud) ((mm)->deposit_head_pud) + extern void __init pagecache_init(void); extern void __init free_area_init_memoryless_node(int nid); extern void free_initmem(void); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index be842926577a..5ff4dd6a3e32 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -515,6 +515,9 @@ struct mm_struct { /* pgtable deposit list head, protected by page_table_lock */ struct llist_head deposit_head_pmd; #endif +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + struct llist_head deposit_head_pud; /* protected by page_table_lock */ +#endif #ifdef CONFIG_NUMA_BALANCING /* * numa_next_scan is the next time that the PTEs will be marked diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 177eab8e1c31..1f6d46465c54 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -465,10 +465,13 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); +extern void pgtable_trans_huge_pud_deposit(struct mm_struct *mm, pud_t *pudp, + pgtable_t pgtable); #endif #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +extern pgtable_t pgtable_trans_huge_pud_withdraw(struct mm_struct *mm, pud_t *pudp); #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/kernel/fork.c b/kernel/fork.c index 9c8e880538de..86fbeec751ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -663,6 +663,9 @@ static void check_mm(struct mm_struct *mm) #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS VM_BUG_ON_MM(!llist_empty(&mm->deposit_head_pmd), mm); #endif +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + VM_BUG_ON_MM(!llist_empty(&mm->deposit_head_pud), mm); +#endif } #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) @@ -1023,6 +1026,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS init_llist_head(&mm->deposit_head_pmd); +#endif +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + init_llist_head(&mm->deposit_head_pud); #endif mm_init_uprobes_state(mm); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index dbb0154165f1..a014cf847067 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -166,6 +166,15 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, /* FIFO */ llist_add(&pgtable->deposit_node, &huge_pmd_deposit_head(mm, pmdp)); } + +void pgtable_trans_huge_pud_deposit(struct mm_struct *mm, pud_t *pudp, + pgtable_t pgtable) +{ + assert_spin_locked(pud_lockptr(mm, pudp)); + + /* FIFO */ + llist_add(&pgtable->deposit_node, &huge_pud_deposit_head(mm, pudp)); +} #endif #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW @@ -183,6 +192,20 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) struct page, deposit_node); return pgtable; } + +pgtable_t pgtable_trans_huge_pud_withdraw(struct mm_struct *mm, pud_t *pudp) +{ + pgtable_t pgtable; + + assert_spin_locked(pud_lockptr(mm, pudp)); + + /* only withdraw from a non empty list */ + VM_BUG_ON(llist_empty(&huge_pud_deposit_head(mm, pudp))); + /* FIFO */ + pgtable = llist_entry(llist_del_first(&huge_pud_deposit_head(mm, pmdp)), + struct page, deposit_node); + return pgtable; +} #endif #ifndef __HAVE_ARCH_PMDP_INVALIDATE -- 2.28.0