From: Zi Yan <ziy@xxxxxxxxxx> The old design uses the double linked list page->lru to chain all deposited page table pages when creating a THP and page->pmd_huge_pte to point to the first page of the list. As the second pointer in page->lru overlaps with page->pmd_huge_pte, the design prevents multi-level page table page deposit, which is useful for PUD and higher level THPs. The new design uses single linked list, where deposit_head points to a single linked list of deposited pages and deposit_node can be used to deposit the page itself to another list. For example, this allows us to have one PUD page points to a list of PMD pages, each of which points a list of PTE pages to support PUD level THP. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/mm.h | 9 +++++---- include/linux/mm_types.h | 8 +++++--- kernel/fork.c | 4 ++-- mm/pgtable-generic.c | 15 +++++---------- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 17e712207d74..01b62da34794 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -10,6 +10,7 @@ #include <linux/gfp.h> #include <linux/bug.h> #include <linux/list.h> +#include <linux/llist.h> #include <linux/mmzone.h> #include <linux/rbtree.h> #include <linux/atomic.h> @@ -2249,7 +2250,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) static inline bool pmd_ptlock_init(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - page->pmd_huge_pte = NULL; + init_llist_head(&page->deposit_head); #endif return ptlock_init(page); } @@ -2257,12 +2258,12 @@ static inline bool pmd_ptlock_init(struct page *page) static inline void pmd_ptlock_free(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON_PAGE(page->pmd_huge_pte, page); + VM_BUG_ON_PAGE(!llist_empty(&page->deposit_head), page); #endif ptlock_free(page); } -#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte) +#define huge_pmd_deposit_head(mm, pmd) (pmd_to_page(pmd)->deposit_head) #else @@ -2274,7 +2275,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) static inline bool pmd_ptlock_init(struct page *page) { return true; } static inline void pmd_ptlock_free(struct page *page) {} -#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) +#define huge_pmd_deposit_head(mm, pmd) ((mm)->deposit_head_pmd) #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 496c3ff97cce..be842926577a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -6,6 +6,7 @@ #include <linux/auxvec.h> #include <linux/list.h> +#include <linux/llist.h> #include <linux/spinlock.h> #include <linux/rbtree.h> #include <linux/rwsem.h> @@ -143,8 +144,8 @@ struct page { struct list_head deferred_list; }; struct { /* Page table pages */ - unsigned long _pt_pad_1; /* compound_head */ - pgtable_t pmd_huge_pte; /* protected by page->ptl */ + struct llist_head deposit_head; /* pgtable deposit list head */ + struct llist_node deposit_node; /* pgtable deposit list node */ unsigned long _pt_pad_2; /* mapping */ union { struct mm_struct *pt_mm; /* x86 pgds only */ @@ -511,7 +512,8 @@ struct mm_struct { struct mmu_notifier_subscriptions *notifier_subscriptions; #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page_table_lock */ + /* pgtable deposit list head, protected by page_table_lock */ + struct llist_head deposit_head_pmd; #endif #ifdef CONFIG_NUMA_BALANCING /* diff --git a/kernel/fork.c b/kernel/fork.c index 138cd6ca50da..9c8e880538de 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -661,7 +661,7 @@ static void check_mm(struct mm_struct *mm) mm_pgtables_bytes(mm)); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS - VM_BUG_ON_MM(mm->pmd_huge_pte, mm); + VM_BUG_ON_MM(!llist_empty(&mm->deposit_head_pmd), mm); #endif } @@ -1022,7 +1022,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS - mm->pmd_huge_pte = NULL; + init_llist_head(&mm->deposit_head_pmd); #endif mm_init_uprobes_state(mm); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 9578db83e312..dbb0154165f1 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -164,11 +164,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ - if (!pmd_huge_pte(mm, pmdp)) - INIT_LIST_HEAD(&pgtable->lru); - else - list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); - pmd_huge_pte(mm, pmdp) = pgtable; + llist_add(&pgtable->deposit_node, &huge_pmd_deposit_head(mm, pmdp)); } #endif @@ -180,12 +176,11 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) assert_spin_locked(pmd_lockptr(mm, pmdp)); + /* only withdraw from a non empty list */ + VM_BUG_ON(llist_empty(&huge_pmd_deposit_head(mm, pmdp))); /* FIFO */ - pgtable = pmd_huge_pte(mm, pmdp); - pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru, - struct page, lru); - if (pmd_huge_pte(mm, pmdp)) - list_del(&pgtable->lru); + pgtable = llist_entry(llist_del_first(&huge_pmd_deposit_head(mm, pmdp)), + struct page, deposit_node); return pgtable; } #endif -- 2.28.0