On Tue, 20 Jun 2023 00:51:19 -0700 (PDT) Hugh Dickins <hughd@xxxxxxxxxx> wrote: [...] > @@ -407,6 +429,77 @@ void __tlb_remove_table(void *_table) > __free_page(page); > } > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static void pte_free_now0(struct rcu_head *head); > +static void pte_free_now1(struct rcu_head *head); > + > +static void pte_free_pgste(struct rcu_head *head) > +{ > + unsigned long *table; > + struct page *page; > + > + page = container_of(head, struct page, rcu_head); > + table = (unsigned long *)page_to_virt(page); > + table = (unsigned long *)((unsigned long)table | 0x03U); > + __tlb_remove_table(table); > +} > + > +static void pte_free_half(struct rcu_head *head, unsigned int bit) > +{ > + unsigned long *table; > + struct page *page; > + unsigned int mask; > + > + page = container_of(head, struct page, rcu_head); > + mask = atomic_xor_bits(&page->_refcount, 0x04U << (bit + 24)); > + > + table = (unsigned long *)page_to_virt(page); > + table += bit * PTRS_PER_PTE; > + table = (unsigned long *)((unsigned long)table | (0x01U << bit)); > + __tlb_remove_table(table); > + > + /* If pte_free_defer() of the other half came in, queue it now */ > + if (mask & 0x0CU) > + call_rcu(&page->rcu_head, bit ? pte_free_now0 : pte_free_now1); > +} > + > +static void pte_free_now0(struct rcu_head *head) > +{ > + pte_free_half(head, 0); > +} > + > +static void pte_free_now1(struct rcu_head *head) > +{ > + pte_free_half(head, 1); > +} > + > +void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) > +{ > + unsigned int bit, mask; > + struct page *page; > + > + page = virt_to_page(pgtable); > + if (mm_alloc_pgste(mm)) { > + call_rcu(&page->rcu_head, pte_free_pgste); so is this now going to be used to free page tables instead of page_table_free_rcu? or will it be used instead of page_table_free? this is actually quite important for KVM on s390 > + return; > + } > + bit = ((unsigned long)pgtable & ~PAGE_MASK) / > + (PTRS_PER_PTE * sizeof(pte_t)); > + > + spin_lock_bh(&mm_pgtable_list_lock); > + mask = atomic_xor_bits(&page->_refcount, 0x15U << (bit + 24)); > + mask >>= 24; > + /* Other half not allocated? Other half not already pending free? */ > + if ((mask & 0x03U) == 0x00U && (mask & 0x30U) != 0x30U) > + list_del(&page->lru); > + spin_unlock_bh(&mm_pgtable_list_lock); > + > + /* Do not relink on rcu_head if other half already linked on rcu_head */ > + if ((mask & 0x0CU) != 0x0CU) > + call_rcu(&page->rcu_head, bit ? pte_free_now1 : pte_free_now0); > +} > +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > + > /* > * Base infrastructure required to generate basic asces, region, segment, > * and page tables that do not make use of enhanced features like EDAT1. > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 306a3d1a0fa6..1667a1bdb8a8 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -146,7 +146,7 @@ struct page { > pgtable_t pmd_huge_pte; /* protected by page->ptl */ > unsigned long _pt_pad_2; /* mapping */ > union { > - struct mm_struct *pt_mm; /* x86 pgds only */ > + struct mm_struct *pt_mm; /* x86 pgd, s390 */ > atomic_t pt_frag_refcount; /* powerpc */ > }; > #if ALLOC_SPLIT_PTLOCKS