On Tue, Sep 18, 2018 at 02:51:51PM +0200, Martin Schwidefsky wrote: > +#define pte_free_tlb pte_free_tlb > +#define pmd_free_tlb pmd_free_tlb > +#define p4d_free_tlb p4d_free_tlb > +#define pud_free_tlb pud_free_tlb > @@ -121,9 +62,18 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, > * page table from the tlb. > */ > static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, > + unsigned long address) > { > + __tlb_adjust_range(tlb, address, PAGE_SIZE); > + tlb->mm->context.flush_mm = 1; > + tlb->freed_tables = 1; > + tlb->cleared_ptes = 1; > + /* > + * page_table_free_rcu takes care of the allocation bit masks > + * of the 2K table fragments in the 4K page table page, > + * then calls tlb_remove_table. > + */ > + page_table_free_rcu(tlb, (unsigned long *) pte, address); (whitespace damage, fixed) Also, could you perhaps explain the need for that page_table_alloc/page_table_free code? That is, I get the comment about using 2K page-table fragments out of 4k physical page, but why this custom allocator instead of kmem_cache? It feels like there's a little extra complication, but it's not immediately obvious what. > } We _could_ use __pte_free_tlb() here I suppose, but... > /* > @@ -139,6 +89,10 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, > if (tlb->mm->context.asce_limit <= _REGION3_SIZE) > return; > pgtable_pmd_page_dtor(virt_to_page(pmd)); > + __tlb_adjust_range(tlb, address, PAGE_SIZE); > + tlb->mm->context.flush_mm = 1; > + tlb->freed_tables = 1; > + tlb->cleared_puds = 1; > tlb_remove_table(tlb, pmd); > } > > @@ -154,6 +108,10 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, > { > if (tlb->mm->context.asce_limit <= _REGION1_SIZE) > return; > + __tlb_adjust_range(tlb, address, PAGE_SIZE); > + tlb->mm->context.flush_mm = 1; > + tlb->freed_tables = 1; > + tlb->cleared_p4ds = 1; > tlb_remove_table(tlb, p4d); > } > > @@ -169,19 +127,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, > { > if (tlb->mm->context.asce_limit <= _REGION2_SIZE) > return; > + tlb->mm->context.flush_mm = 1; > + tlb->freed_tables = 1; > + tlb->cleared_puds = 1; > tlb_remove_table(tlb, pud); > } It's that ASCE limit that makes it impossible to use the generic helpers, right?