The patch titled Subject: s390: convert various pgalloc functions to use ptdescs has been added to the -mm mm-unstable branch. Its filename is s390-convert-various-pgalloc-functions-to-use-ptdescs.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/s390-convert-various-pgalloc-functions-to-use-ptdescs.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: "Vishal Moola (Oracle)" <vishal.moola@xxxxxxxxx> Subject: s390: convert various pgalloc functions to use ptdescs Date: Mon, 31 Jul 2023 10:03:15 -0700 As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Link: https://lkml.kernel.org/r/20230731170332.69404-15-vishal.moola@xxxxxxxxx Signed-off-by: Vishal Moola (Oracle) <vishal.moola@xxxxxxxxx> Acked-by: Mike Rapoport (IBM) <rppt@xxxxxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Cc: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: David S. Miller <davem@xxxxxxxxxxxxx> Cc: Dinh Nguyen <dinguyen@xxxxxxxxxx> Cc: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx> Cc: Geert Uytterhoeven <geert+renesas@xxxxxxxxx> Cc: Guo Ren <guoren@xxxxxxxxxx> Cc: Huacai Chen <chenhuacai@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: John Paul Adrian Glaubitz <glaubitz@xxxxxxxxxxxxxxxxxxx> Cc: Jonas Bonn <jonas@xxxxxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Palmer Dabbelt <palmer@xxxxxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Richard Weinberger <richard@xxxxxx> Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx> Cc: Yoshinori Sato <ysato@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/s390/include/asm/pgalloc.h | 4 arch/s390/include/asm/tlb.h | 4 arch/s390/mm/pgalloc.c | 128 +++++++++++++++--------------- 3 files changed, 69 insertions(+), 67 deletions(-) --- a/arch/s390/include/asm/pgalloc.h~s390-convert-various-pgalloc-functions-to-use-ptdescs +++ a/arch/s390/include/asm/pgalloc.h @@ -86,7 +86,7 @@ static inline pmd_t *pmd_alloc_one(struc if (!table) return NULL; crst_table_init(table, _SEGMENT_ENTRY_EMPTY); - if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) { crst_table_free(mm, table); return NULL; } @@ -97,7 +97,7 @@ static inline void pmd_free(struct mm_st { if (mm_pmd_folded(mm)) return; - pgtable_pmd_page_dtor(virt_to_page(pmd)); + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); crst_table_free(mm, (unsigned long *) pmd); } --- a/arch/s390/include/asm/tlb.h~s390-convert-various-pgalloc-functions-to-use-ptdescs +++ a/arch/s390/include/asm/tlb.h @@ -89,12 +89,12 @@ static inline void pmd_free_tlb(struct m { if (mm_pmd_folded(tlb->mm)) return; - pgtable_pmd_page_dtor(virt_to_page(pmd)); + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_puds = 1; - tlb_remove_table(tlb, pmd); + tlb_remove_ptdesc(tlb, pmd); } /* --- a/arch/s390/mm/pgalloc.c~s390-convert-various-pgalloc-functions-to-use-ptdescs +++ a/arch/s390/mm/pgalloc.c @@ -43,17 +43,17 @@ __initcall(page_table_register_sysctl); unsigned long *crst_table_alloc(struct mm_struct *mm) { - struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); - if (!page) + if (!ptdesc) return NULL; - arch_set_page_dat(page, CRST_ALLOC_ORDER); - return (unsigned long *) page_to_virt(page); + arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER); + return (unsigned long *) ptdesc_to_virt(ptdesc); } void crst_table_free(struct mm_struct *mm, unsigned long *table) { - free_pages((unsigned long)table, CRST_ALLOC_ORDER); + pagetable_free(virt_to_ptdesc(table)); } static void __crst_table_upgrade(void *arg) @@ -140,21 +140,21 @@ static inline unsigned int atomic_xor_bi struct page *page_table_alloc_pgste(struct mm_struct *mm) { - struct page *page; + struct ptdesc *ptdesc; u64 *table; - page = alloc_page(GFP_KERNEL); - if (page) { - table = (u64 *)page_to_virt(page); + ptdesc = pagetable_alloc(GFP_KERNEL, 0); + if (ptdesc) { + table = (u64 *)ptdesc_to_virt(ptdesc); memset64(table, _PAGE_INVALID, PTRS_PER_PTE); memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } - return page; + return ptdesc_page(ptdesc); } void page_table_free_pgste(struct page *page) { - __free_page(page); + pagetable_free(page_ptdesc(page)); } #endif /* CONFIG_PGSTE */ @@ -242,7 +242,7 @@ void page_table_free_pgste(struct page * unsigned long *page_table_alloc(struct mm_struct *mm) { unsigned long *table; - struct page *page; + struct ptdesc *ptdesc; unsigned int mask, bit; /* Try to get a fragment of a 4K page as a 2K page table */ @@ -250,9 +250,9 @@ unsigned long *page_table_alloc(struct m table = NULL; spin_lock_bh(&mm->context.lock); if (!list_empty(&mm->context.pgtable_list)) { - page = list_first_entry(&mm->context.pgtable_list, - struct page, lru); - mask = atomic_read(&page->_refcount) >> 24; + ptdesc = list_first_entry(&mm->context.pgtable_list, + struct ptdesc, pt_list); + mask = atomic_read(&ptdesc->_refcount) >> 24; /* * The pending removal bits must also be checked. * Failure to do so might lead to an impossible @@ -264,13 +264,13 @@ unsigned long *page_table_alloc(struct m */ mask = (mask | (mask >> 4)) & 0x03U; if (mask != 0x03U) { - table = (unsigned long *) page_to_virt(page); + table = (unsigned long *) ptdesc_to_virt(ptdesc); bit = mask & 1; /* =1 -> second 2K */ if (bit) table += PTRS_PER_PTE; - atomic_xor_bits(&page->_refcount, + atomic_xor_bits(&ptdesc->_refcount, 0x01U << (bit + 24)); - list_del_init(&page->lru); + list_del_init(&ptdesc->pt_list); } } spin_unlock_bh(&mm->context.lock); @@ -278,28 +278,28 @@ unsigned long *page_table_alloc(struct m return table; } /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL); - if (!page) + ptdesc = pagetable_alloc(GFP_KERNEL, 0); + if (!ptdesc) return NULL; - if (!pgtable_pte_page_ctor(page)) { - __free_page(page); + if (!pagetable_pte_ctor(ptdesc)) { + pagetable_free(ptdesc); return NULL; } - arch_set_page_dat(page, 0); + arch_set_page_dat(ptdesc_page(ptdesc), 0); /* Initialize page table */ - table = (unsigned long *) page_to_virt(page); + table = (unsigned long *) ptdesc_to_virt(ptdesc); if (mm_alloc_pgste(mm)) { /* Return 4K page table with PGSTEs */ - INIT_LIST_HEAD(&page->lru); - atomic_xor_bits(&page->_refcount, 0x03U << 24); + INIT_LIST_HEAD(&ptdesc->pt_list); + atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } else { /* Return the first 2K fragment of the page */ - atomic_xor_bits(&page->_refcount, 0x01U << 24); + atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24); memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); spin_lock_bh(&mm->context.lock); - list_add(&page->lru, &mm->context.pgtable_list); + list_add(&ptdesc->pt_list, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.lock); } return table; @@ -322,19 +322,18 @@ static void page_table_release_check(str static void pte_free_now(struct rcu_head *head) { - struct page *page; + struct ptdesc *ptdesc; - page = container_of(head, struct page, rcu_head); - pgtable_pte_page_dtor(page); - __free_page(page); + ptdesc = container_of(head, struct ptdesc, pt_rcu_head); + pagetable_pte_dtor(ptdesc); + pagetable_free(ptdesc); } void page_table_free(struct mm_struct *mm, unsigned long *table) { unsigned int mask, bit, half; - struct page *page; + struct ptdesc *ptdesc = virt_to_ptdesc(table); - page = virt_to_page(table); if (!mm_alloc_pgste(mm)) { /* Free 2K page table fragment of a 4K page */ bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); @@ -344,51 +343,50 @@ void page_table_free(struct mm_struct *m * will happen outside of the critical section from this * function or from __tlb_remove_table() */ - mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); + mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); mask >>= 24; - if ((mask & 0x03U) && !PageActive(page)) { + if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { /* * Other half is allocated, and neither half has had * its free deferred: add page to head of list, to make * this freed half available for immediate reuse. */ - list_add(&page->lru, &mm->context.pgtable_list); + list_add(&ptdesc->pt_list, &mm->context.pgtable_list); } else { /* If page is on list, now remove it. */ - list_del_init(&page->lru); + list_del_init(&ptdesc->pt_list); } spin_unlock_bh(&mm->context.lock); - mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24)); + mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24)); mask >>= 24; if (mask != 0x00U) return; half = 0x01U << bit; } else { half = 0x03U; - mask = atomic_xor_bits(&page->_refcount, 0x03U << 24); + mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); mask >>= 24; } - page_table_release_check(page, table, half, mask); - if (TestClearPageActive(page)) - call_rcu(&page->rcu_head, pte_free_now); + page_table_release_check(ptdesc_page(ptdesc), table, half, mask); + if (folio_test_clear_active(ptdesc_folio(ptdesc))) + call_rcu(&ptdesc->pt_rcu_head, pte_free_now); else - pte_free_now(&page->rcu_head); + pte_free_now(&ptdesc->pt_rcu_head); } void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, unsigned long vmaddr) { struct mm_struct *mm; - struct page *page; unsigned int bit, mask; + struct ptdesc *ptdesc = virt_to_ptdesc(table); mm = tlb->mm; - page = virt_to_page(table); if (mm_alloc_pgste(mm)) { gmap_unlink(mm, table, vmaddr); table = (unsigned long *) ((unsigned long)table | 0x03U); - tlb_remove_table(tlb, table); + tlb_remove_ptdesc(tlb, table); return; } bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); @@ -398,19 +396,19 @@ void page_table_free_rcu(struct mmu_gath * outside of the critical section from __tlb_remove_table() or from * page_table_free() */ - mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); + mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); mask >>= 24; - if ((mask & 0x03U) && !PageActive(page)) { + if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { /* * Other half is allocated, and neither half has had * its free deferred: add page to end of list, to make * this freed half available for reuse once its pending * bit has been cleared by __tlb_remove_table(). */ - list_add_tail(&page->lru, &mm->context.pgtable_list); + list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list); } else { /* If page is on list, now remove it. */ - list_del_init(&page->lru); + list_del_init(&ptdesc->pt_list); } spin_unlock_bh(&mm->context.lock); table = (unsigned long *) ((unsigned long) table | (0x01U << bit)); @@ -421,30 +419,30 @@ void __tlb_remove_table(void *_table) { unsigned int mask = (unsigned long) _table & 0x03U, half = mask; void *table = (void *)((unsigned long) _table ^ mask); - struct page *page = virt_to_page(table); + struct ptdesc *ptdesc = virt_to_ptdesc(table); switch (half) { case 0x00U: /* pmd, pud, or p4d */ - free_pages((unsigned long)table, CRST_ALLOC_ORDER); + pagetable_free(ptdesc); return; case 0x01U: /* lower 2K of a 4K page table */ case 0x02U: /* higher 2K of a 4K page table */ - mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24)); + mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24)); mask >>= 24; if (mask != 0x00U) return; break; case 0x03U: /* 4K page table with pgstes */ - mask = atomic_xor_bits(&page->_refcount, 0x03U << 24); + mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); mask >>= 24; break; } - page_table_release_check(page, table, half, mask); - if (TestClearPageActive(page)) - call_rcu(&page->rcu_head, pte_free_now); + page_table_release_check(ptdesc_page(ptdesc), table, half, mask); + if (folio_test_clear_active(ptdesc_folio(ptdesc))) + call_rcu(&ptdesc->pt_rcu_head, pte_free_now); else - pte_free_now(&page->rcu_head); + pte_free_now(&ptdesc->pt_rcu_head); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -488,16 +486,20 @@ static void base_pgt_free(unsigned long static unsigned long *base_crst_alloc(unsigned long val) { unsigned long *table; + struct ptdesc *ptdesc; - table = (unsigned long *)__get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); + ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, CRST_ALLOC_ORDER); + if (!ptdesc) + return NULL; + table = ptdesc_address(ptdesc); + + crst_table_init(table, val); return table; } static void base_crst_free(unsigned long *table) { - free_pages((unsigned long)table, CRST_ALLOC_ORDER); + pagetable_free(virt_to_ptdesc(table)); } #define BASE_ADDR_END_FUNC(NAME, SIZE) \ _ Patches currently in -mm which might be from vishal.moola@xxxxxxxxx are mm-add-page_type_op-folio-functions.patch pgtable-create-struct-ptdesc.patch mm-add-utility-functions-for-ptdesc.patch mm-convert-pmd_pgtable_page-callers-to-use-pmd_ptdesc.patch mm-convert-ptlock_alloc-to-use-ptdescs.patch mm-convert-ptlock_ptr-to-use-ptdescs.patch mm-convert-pmd_ptlock_init-to-use-ptdescs.patch mm-convert-ptlock_init-to-use-ptdescs.patch mm-convert-pmd_ptlock_free-to-use-ptdescs.patch mm-convert-ptlock_free-to-use-ptdescs.patch mm-create-ptdesc-equivalents-for-pgtable_ptepmd_page_ctordtor.patch powerpc-convert-various-functions-to-use-ptdescs.patch x86-convert-various-functions-to-use-ptdescs.patch s390-convert-various-pgalloc-functions-to-use-ptdescs.patch mm-remove-page-table-members-from-struct-page.patch pgalloc-convert-various-functions-to-use-ptdescs.patch arm-convert-various-functions-to-use-ptdescs.patch arm64-convert-various-functions-to-use-ptdescs.patch csky-convert-__pte_free_tlb-to-use-ptdescs.patch hexagon-convert-__pte_free_tlb-to-use-ptdescs.patch loongarch-convert-various-functions-to-use-ptdescs.patch m68k-convert-various-functions-to-use-ptdescs.patch mips-convert-various-functions-to-use-ptdescs.patch nios2-convert-__pte_free_tlb-to-use-ptdescs.patch openrisc-convert-__pte_free_tlb-to-use-ptdescs.patch riscv-convert-alloc_pmd-pte_late-to-use-ptdescs.patch sh-convert-pte_free_tlb-to-use-ptdescs.patch sparc64-convert-various-functions-to-use-ptdescs.patch sparc-convert-pgtable_pte_page_ctor-dtor-to-ptdesc-equivalents.patch um-convert-pmd-pte_free_tlb-to-use-ptdescs.patch mm-remove-pgtable_pmd-pte_page_ctor-dtor-wrappers.patch