Add s390-specific pte_free_defer(), to call pte_free() via call_rcu(). pte_free_defer() will be called inside khugepaged's retract_page_tables() loop, where allocating extra memory cannot be relied upon. This precedes the generic version to avoid build breakage from incompatible pgtable_t. This version is more complicated than others: because page_table_free() needs to know which fragment is being freed, and which mm to link it to. page_table_free()'s fragment handling is clever, but I could too easily break it: what's done here in pte_free_defer() and pte_free_now() might be better integrated with page_table_free()'s cleverness, but not by me! By the time that page_table_free() gets called via RCU, it's conceivable that mm would already have been freed: so mmgrab() in pte_free_defer() and mmdrop() in pte_free_now(). No, that is not a good context to call mmdrop() from, so make mmdrop_async() public and use that. Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx> --- arch/s390/include/asm/pgalloc.h | 4 ++++ arch/s390/mm/pgalloc.c | 34 +++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 2 +- include/linux/sched/mm.h | 1 + kernel/fork.c | 2 +- 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 17eb618f1348..89a9d5ef94f8 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -143,6 +143,10 @@ static inline void pmd_populate(struct mm_struct *mm, #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) +/* arch use pte_free_defer() implementation in arch/s390/mm/pgalloc.c */ +#define pte_free_defer pte_free_defer +void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable); + void vmem_map_init(void); void *vmem_crst_alloc(unsigned long val); pte_t *vmem_pte_alloc(void); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 66ab68db9842..0129de9addfd 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -346,6 +346,40 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) __free_page(page); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void pte_free_now(struct rcu_head *head) +{ + struct page *page; + unsigned long mm_bit; + struct mm_struct *mm; + unsigned long *table; + + page = container_of(head, struct page, rcu_head); + table = (unsigned long *)page_to_virt(page); + mm_bit = (unsigned long)page->pt_mm; + /* 4K page has only two 2K fragments, but alignment allows eight */ + mm = (struct mm_struct *)(mm_bit & ~7); + table += PTRS_PER_PTE * (mm_bit & 7); + page_table_free(mm, table); + mmdrop_async(mm); +} + +void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) +{ + struct page *page; + unsigned long mm_bit; + + mmgrab(mm); + page = virt_to_page(pgtable); + /* Which 2K page table fragment of a 4K page? */ + mm_bit = ((unsigned long)pgtable & ~PAGE_MASK) / + (PTRS_PER_PTE * sizeof(pte_t)); + mm_bit += (unsigned long)mm; + page->pt_mm = (struct mm_struct *)mm_bit; + call_rcu(&page->rcu_head, pte_free_now); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, unsigned long vmaddr) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 306a3d1a0fa6..1667a1bdb8a8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -146,7 +146,7 @@ struct page { pgtable_t pmd_huge_pte; /* protected by page->ptl */ unsigned long _pt_pad_2; /* mapping */ union { - struct mm_struct *pt_mm; /* x86 pgds only */ + struct mm_struct *pt_mm; /* x86 pgd, s390 */ atomic_t pt_frag_refcount; /* powerpc */ }; #if ALLOC_SPLIT_PTLOCKS diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 8d89c8c4fac1..a9043d1a0d55 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -41,6 +41,7 @@ static inline void smp_mb__after_mmgrab(void) smp_mb__after_atomic(); } +extern void mmdrop_async(struct mm_struct *mm); extern void __mmdrop(struct mm_struct *mm); static inline void mmdrop(struct mm_struct *mm) diff --git a/kernel/fork.c b/kernel/fork.c index ed4e01daccaa..fa4486b65c56 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -942,7 +942,7 @@ static void mmdrop_async_fn(struct work_struct *work) __mmdrop(mm); } -static void mmdrop_async(struct mm_struct *mm) +void mmdrop_async(struct mm_struct *mm) { if (unlikely(atomic_dec_and_test(&mm->mm_count))) { INIT_WORK(&mm->async_put_work, mmdrop_async_fn); -- 2.35.3