From: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx> Subject: mm: percpu: add generic pcpu_populate_pte() function With NEED_PER_CPU_PAGE_FIRST_CHUNK enabled, we need a function to populate pte, this patch adds a generic pcpu populate pte function, pcpu_populate_pte(), which is marked __weak and used on most architectures, but it is overridden on x86, which has its own implementation. Link: https://lkml.kernel.org/r/20211216112359.103822-5-wangkefeng.wang@xxxxxxxxxx Signed-off-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> Cc: Paul Mackerras <paulus@xxxxxxxxx> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: "Rafael J. Wysocki" <rafael@xxxxxxxxxx> Cc: Dennis Zhou <dennis@xxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Cc: Albert Ou <aou@xxxxxxxxxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/powerpc/kernel/setup_64.c | 47 ------------------- arch/sparc/kernel/smp_64.c | 56 ---------------------- arch/x86/kernel/setup_percpu.c | 5 -- drivers/base/arch_numa.c | 51 -------------------- include/linux/percpu.h | 5 -- mm/percpu.c | 76 ++++++++++++++++++++++++++++--- 6 files changed, 78 insertions(+), 162 deletions(-) --- a/arch/powerpc/kernel/setup_64.c~mm-percpu-add-generic-pcpu_populate_pte-function +++ a/arch/powerpc/kernel/setup_64.c @@ -787,51 +787,6 @@ static __init int pcpu_cpu_to_node(int c unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static void __init pcpu_populate_pte(unsigned long addr) -{ - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); - if (!new) - goto err_alloc; - p4d_populate(&init_mm, p4d, new); - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); - if (!new) - goto err_alloc; - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); - if (!new) - goto err_alloc; - pmd_populate_kernel(&init_mm, pmd, new); - } - - return; - -err_alloc: - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); -} - - void __init setup_per_cpu_areas(void) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -860,7 +815,7 @@ void __init setup_per_cpu_areas(void) } if (rc < 0) - rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node, pcpu_populate_pte); + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); --- a/arch/sparc/kernel/smp_64.c~mm-percpu-add-generic-pcpu_populate_pte-function +++ a/arch/sparc/kernel/smp_64.c @@ -1539,59 +1539,6 @@ static int __init pcpu_cpu_to_node(int c return cpu_to_node(cpu); } -static void __init pcpu_populate_pte(unsigned long addr) -{ - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - if (pgd_none(*pgd)) { - pud_t *new; - - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - if (!new) - goto err_alloc; - pgd_populate(&init_mm, pgd, new); - } - - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - if (!new) - goto err_alloc; - p4d_populate(&init_mm, p4d, new); - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - if (!new) - goto err_alloc; - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); - if (!new) - goto err_alloc; - pmd_populate_kernel(&init_mm, pmd, new); - } - - return; - -err_alloc: - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); -} - void __init setup_per_cpu_areas(void) { unsigned long delta; @@ -1610,8 +1557,7 @@ void __init setup_per_cpu_areas(void) } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, - pcpu_cpu_to_node, - pcpu_populate_pte); + pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); --- a/arch/x86/kernel/setup_percpu.c~mm-percpu-add-generic-pcpu_populate_pte-function +++ a/arch/x86/kernel/setup_percpu.c @@ -101,7 +101,7 @@ static int __init pcpu_cpu_to_node(int c return early_cpu_to_node(cpu); } -static void __init pcpup_populate_pte(unsigned long addr) +void __init pcpu_populate_pte(unsigned long addr) { populate_extra_pte(addr); } @@ -163,8 +163,7 @@ void __init setup_per_cpu_areas(void) } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - pcpu_cpu_to_node, - pcpup_populate_pte); + pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); --- a/drivers/base/arch_numa.c~mm-percpu-add-generic-pcpu_populate_pte-function +++ a/drivers/base/arch_numa.c @@ -14,7 +14,6 @@ #include <linux/of.h> #include <asm/sections.h> -#include <asm/pgalloc.h> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -155,52 +154,6 @@ static int __init pcpu_cpu_distance(unsi return node_distance(early_cpu_to_node(from), early_cpu_to_node(to)); } -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK -static void __init pcpu_populate_pte(unsigned long addr) -{ - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!new) - goto err_alloc; - p4d_populate(&init_mm, p4d, new); - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!new) - goto err_alloc; - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!new) - goto err_alloc; - pmd_populate_kernel(&init_mm, pmd, new); - } - - return; - -err_alloc: - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); -} -#endif - void __init setup_per_cpu_areas(void) { unsigned long delta; @@ -225,9 +178,7 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK if (rc < 0) - rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, - early_cpu_to_node, - pcpu_populate_pte); + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node); #endif if (rc < 0) panic("Failed to initialize percpu areas (err=%d).", rc); --- a/include/linux/percpu.h~mm-percpu-add-generic-pcpu_populate_pte-function +++ a/include/linux/percpu.h @@ -95,7 +95,6 @@ extern const char * const pcpu_fc_names[ extern enum pcpu_fc pcpu_chosen_fc; typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu); -typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, @@ -113,9 +112,9 @@ extern int __init pcpu_embed_first_chunk #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK +void __init pcpu_populate_pte(unsigned long addr); extern int __init pcpu_page_first_chunk(size_t reserved_size, - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn, - pcpu_fc_populate_pte_fn_t populate_pte_fn); + pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); #endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1); --- a/mm/percpu.c~mm-percpu-add-generic-pcpu_populate_pte-function +++ a/mm/percpu.c @@ -3174,11 +3174,79 @@ out_free: #endif /* BUILD_EMBED_FIRST_CHUNK */ #ifdef BUILD_PAGE_FIRST_CHUNK +#include <asm/pgalloc.h> + +#ifndef P4D_TABLE_SIZE +#define P4D_TABLE_SIZE PAGE_SIZE +#endif + +#ifndef PUD_TABLE_SIZE +#define PUD_TABLE_SIZE PAGE_SIZE +#endif + +#ifndef PMD_TABLE_SIZE +#define PMD_TABLE_SIZE PAGE_SIZE +#endif + +#ifndef PTE_TABLE_SIZE +#define PTE_TABLE_SIZE PAGE_SIZE +#endif +void __init __weak pcpu_populate_pte(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + if (pgd_none(*pgd)) { + p4d_t *new; + + new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE); + if (!new) + goto err_alloc; + pgd_populate(&init_mm, pgd, new); + } + + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + if (!new) + goto err_alloc; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + if (!new) + goto err_alloc; + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); + if (!new) + goto err_alloc; + pmd_populate_kernel(&init_mm, pmd, new); + } + + return; + +err_alloc: + panic("%s: Failed to allocate memory\n", __func__); +} + /** * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages * @reserved_size: the size of reserved percpu area in bytes * @cpu_to_nd_fn: callback to convert cpu to it's node, optional - * @populate_pte_fn: function to populate pte * * This is a helper to ease setting up page-remapped first percpu * chunk and can be called where pcpu_setup_first_chunk() is expected. @@ -3189,9 +3257,7 @@ out_free: * RETURNS: * 0 on success, -errno on failure. */ -int __init pcpu_page_first_chunk(size_t reserved_size, - pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn, - pcpu_fc_populate_pte_fn_t populate_pte_fn) +int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn) { static struct vm_struct vm; struct pcpu_alloc_info *ai; @@ -3255,7 +3321,7 @@ int __init pcpu_page_first_chunk(size_t (unsigned long)vm.addr + unit * ai->unit_size; for (i = 0; i < unit_pages; i++) - populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); + pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], _