In addition to the PGD/PMD table size (128*4) add a PTE table size (64*4) to the table allocator. This completely removes the pte-table overhead compared to the old code, even for dense tables. Notes: - the allocator gained __flush_page_to_ram(), since the old page-based allocator had that. - the allocator gained a list_empty() check to deal with there not being any pages at all. - the free mask is extended to cover more than the 8 bits required for the (512 byte) PGD/PMD tables. - NR_PAGETABLE accounting is restored. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> --- arch/m68k/include/asm/motorola_pgalloc.h | 24 +++++----- arch/m68k/mm/init.c | 6 +- arch/m68k/mm/memory.c | 70 ++++++++++++++++++++----------- 3 files changed, 61 insertions(+), 39 deletions(-) --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -5,61 +5,61 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> -extern pmd_t *get_pointer_table(void); -extern int free_pointer_table(pmd_t *); +extern void *get_pointer_table(int type); +extern int free_pointer_table(void *table, int type); static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - return (pte_t *)get_pointer_table(); + return get_pointer_table(1); } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - free_pointer_table((void *)pte); + free_pointer_table(pte, 1); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { - return (pte_t *)get_pointer_table(); + return get_pointer_table(1); } static inline void pte_free(struct mm_struct *mm, pgtable_t pgtable) { - free_pointer_table((void *)pgtable); + free_pointer_table(pgtable, 1); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pgtable, unsigned long address) { - free_pointer_table((void *)pgtable); + free_pointer_table(pgtable, 1); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - return get_pointer_table(); + return get_pointer_table(0); } static inline int pmd_free(struct mm_struct *mm, pmd_t *pmd) { - return free_pointer_table(pmd); + return free_pointer_table(pmd, 0); } static inline int __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { - return free_pointer_table(pmd); + return free_pointer_table(pmd, 0); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - pmd_free(mm, (pmd_t *)pgd); + free_pointer_table(pgd, 0); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return (pgd_t *)get_pointer_table(); + return get_pointer_table(0); } --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -41,7 +41,7 @@ void *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); #if !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE) -extern void init_pointer_table(unsigned long ptable); +extern void init_pointer_table(unsigned long ptable, int type); #endif #ifdef CONFIG_MMU @@ -127,12 +127,12 @@ static inline void init_pointer_tables(v int i; /* insert pointer tables allocated so far into the tablelist */ - init_pointer_table((unsigned long)kernel_pg_dir); + init_pointer_table((unsigned long)kernel_pg_dir, 0); for (i = 0; i < PTRS_PER_PGD; i++) { pud_t *pud = (pud_t *)(&kernel_pg_dir[i]); if (pud_present(*pud)) - init_pointer_table(pgd_page_vaddr(kernel_pg_dir[i])); + init_pointer_table(pgd_page_vaddr(kernel_pg_dir[i]), 0); } #endif } --- a/arch/m68k/mm/memory.c +++ b/arch/m68k/mm/memory.c @@ -27,24 +27,34 @@ arch/sparc/mm/srmmu.c ... */ typedef struct list_head ptable_desc; -static LIST_HEAD(ptable_list); + +static struct list_head ptable_list[2] = { + LIST_HEAD_INIT(ptable_list[0]), + LIST_HEAD_INIT(ptable_list[1]), +}; #define PD_PTABLE(page) ((ptable_desc *)&(virt_to_page(page)->lru)) #define PD_PAGE(ptable) (list_entry(ptable, struct page, lru)) -#define PD_MARKBITS(dp) (*(unsigned char *)&PD_PAGE(dp)->index) +#define PD_MARKBITS(dp) (*(unsigned int *)&PD_PAGE(dp)->index) + +static const int ptable_shift[2] = { + 7+2, /* PGD, PMD */ + 6+2, /* PTE */ +}; -#define PTABLE_SIZE (PTRS_PER_PMD * sizeof(pmd_t)) +#define ptable_size(type) (1U << ptable_shift[type]) +#define ptable_mask(type) ((1U << (PAGE_SIZE / ptable_size(type))) - 1) -void __init init_pointer_table(unsigned long ptable) +void __init init_pointer_table(unsigned long ptable, int type) { ptable_desc *dp; unsigned long page = ptable & PAGE_MASK; - unsigned char mask = 1 << ((ptable - page)/PTABLE_SIZE); + unsigned int mask = 1U << ((ptable - page)/ptable_size(type)); dp = PD_PTABLE(page); if (!(PD_MARKBITS(dp) & mask)) { - PD_MARKBITS(dp) = 0xff; - list_add(dp, &ptable_list); + PD_MARKBITS(dp) = ptable_mask(type); + list_add(dp, &ptable_list[type]); } PD_MARKBITS(dp) &= ~mask; @@ -57,12 +67,10 @@ void __init init_pointer_table(unsigned return; } -pmd_t *get_pointer_table (void) +void *get_pointer_table (int type) { - ptable_desc *dp = ptable_list.next; - unsigned char mask = PD_MARKBITS (dp); - unsigned char tmp; - unsigned int off; + ptable_desc *dp = ptable_list[type].next; + unsigned int mask, tmp, off; /* * For a pointer table for a user process address space, a @@ -70,38 +78,50 @@ pmd_t *get_pointer_table (void) * page can hold 8 pointer tables. The page is remapped in * virtual address space to be noncacheable. */ - if (mask == 0) { + if (list_empty(&ptable_list[type]) || PD_MARKBITS(dp) == 0) { void *page; ptable_desc *new; if (!(page = (void *)get_zeroed_page(GFP_KERNEL))) return NULL; + if (type) { + /* + * m68k doesn't have SPLIT_PTE_PTLOCKS for not having + * SMP. + */ + pgtable_pte_page_ctor(virt_to_page(page)); + } + + __flush_page_to_ram(page); flush_tlb_kernel_page(page); nocache_page(page); new = PD_PTABLE(page); - PD_MARKBITS(new) = 0xfe; + PD_MARKBITS(new) = ptable_mask(type) - 1; list_add_tail(new, dp); return (pmd_t *)page; } - for (tmp = 1, off = 0; (mask & tmp) == 0; tmp <<= 1, off += PTABLE_SIZE) + mask = PD_MARKBITS(dp); + for (tmp = 1, off = 0; (mask & tmp) == 0; tmp <<= 1, off += ptable_size(type)) ; - PD_MARKBITS(dp) = mask & ~tmp; - if (!PD_MARKBITS(dp)) { + mask &= ~tmp; + PD_MARKBITS(dp) = mask; + if (!mask) { /* move to end of list */ - list_move_tail(dp, &ptable_list); + list_move_tail(dp, &ptable_list[type]); } - return (pmd_t *) (page_address(PD_PAGE(dp)) + off); + + return page_address(PD_PAGE(dp)) + off; } -int free_pointer_table (pmd_t *ptable) +int free_pointer_table (void *ptable, int type) { ptable_desc *dp; unsigned long page = (unsigned long)ptable & PAGE_MASK; - unsigned char mask = 1 << (((unsigned long)ptable - page)/PTABLE_SIZE); + unsigned int mask = 1U << (((unsigned long)ptable - page)/ptable_size(type)); dp = PD_PTABLE(page); if (PD_MARKBITS (dp) & mask) @@ -109,18 +129,20 @@ int free_pointer_table (pmd_t *ptable) PD_MARKBITS (dp) |= mask; - if (PD_MARKBITS(dp) == 0xff) { + if (PD_MARKBITS(dp) == ptable_mask(type)) { /* all tables in page are free, free page */ list_del(dp); cache_page((void *)page); + if (type) + pgtable_pte_page_dtor(virt_to_page(page)); free_page (page); return 1; - } else if (ptable_list.next != dp) { + } else if (ptable_list[type].next != dp) { /* * move this descriptor to the front of the list, since * it has one or more free tables. */ - list_move(dp, &ptable_list); + list_move(dp, &ptable_list[type]); } return 0; }