* Helge Deller <deller@xxxxxx>: > On 08.04.19 16:29, James Bottomley wrote: > > On Mon, 2019-04-08 at 10:52 +0100, Mel Gorman wrote: > >> First, if pa-risc is !NUMA then why are separate local ranges > >> represented as separate nodes? Is it because of DISCONTIGMEM or > >> something else? DISCONTIGMEM is before my time so I'm not familiar > >> with it and I consider it "essentially dead" but the arch init code > >> seems to setup pgdats for each physical contiguous range so it's a > >> possibility. The most likely explanation is pa-risc does not have > >> hardware with addressing limitations smaller than the CPUs physical > >> address limits and it's possible to have more ranges than available > >> zones but clarification would be nice. > > > > Let me try, since I remember the ancient history. In the early days, > > there had to be a single mem_map array covering all of physical memory. > > Some pa-risc systems had huge gaps in the physical memory; I think one > > gap was somewhere around 1GB, so this lead us to wasting huge amounts > > of space in mem_map on non-existent memory. What CONFIG_DISCONTIGMEM > > did was allow you to represent this discontinuity on a non-NUMA system > > using numa nodes, so we effectively got one node per discontiguous > > range. It's hacky, but it worked. I thought we finally got converted > > to sparsemem by the NUMA people, but I can't find the commit. > > James, you tried once: > https://patchwork.kernel.org/patch/729441/ > > It seems we better should move over to sparsemem now? Below is an updated patch to convert parisc from DISCONTIGMEM to SPARSEMEM. It builds and boots for me on 32- and 64-bit machines. Mikulas, could you try if you still see the the cache limited to 1GiB with this patch applied ? Helge --------------------- >From 2c30c3a61bbfb56850862a7f7127416325fe126f Mon Sep 17 00:00:00 2001 From: Helge Deller <deller@xxxxxx> Date: Tue, 9 Apr 2019 21:52:35 +0200 Subject: [PATCH] parisc: Switch from DISCONTIGMEM to SPARSEMEM Signed-off-by: Helge Deller <deller@xxxxxx> diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index c8e6212..4f1397f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -36,6 +36,7 @@ config PARISC select GENERIC_STRNCPY_FROM_USER select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE + select ARCH_DISCARD_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC select VIRT_TO_BUS select MODULES_USE_ELF_RELA @@ -311,21 +312,16 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on 64BIT -config ARCH_DISCONTIGMEM_ENABLE +config ARCH_SPARSEMEM_ENABLE def_bool y depends on 64BIT config ARCH_FLATMEM_ENABLE def_bool y -config ARCH_DISCONTIGMEM_DEFAULT +config ARCH_SPARSEMEM_DEFAULT def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE - -config NODES_SHIFT - int - default "3" - depends on NEED_MULTIPLE_NODES + depends on ARCH_SPARSEMEM_ENABLE source "kernel/Kconfig.hz" diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h index fafa389..8d39040 100644 --- a/arch/parisc/include/asm/mmzone.h +++ b/arch/parisc/include/asm/mmzone.h @@ -2,62 +2,6 @@ #ifndef _PARISC_MMZONE_H #define _PARISC_MMZONE_H -#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */ +#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */ -#ifdef CONFIG_DISCONTIGMEM - -extern int npmem_ranges; - -struct node_map_data { - pg_data_t pg_data; -}; - -extern struct node_map_data node_data[]; - -#define NODE_DATA(nid) (&node_data[nid].pg_data) - -/* We have these possible memory map layouts: - * Astro: 0-3.75, 67.75-68, 4-64 - * zx1: 0-1, 257-260, 4-256 - * Stretch (N-class): 0-2, 4-32, 34-xxx - */ - -/* Since each 1GB can only belong to one region (node), we can create - * an index table for pfn to nid lookup; each entry in pfnnid_map - * represents 1GB, and contains the node that the memory belongs to. */ - -#define PFNNID_SHIFT (30 - PAGE_SHIFT) -#define PFNNID_MAP_MAX 512 /* support 512GB */ -extern signed char pfnnid_map[PFNNID_MAP_MAX]; - -#ifndef CONFIG_64BIT -#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT)) -#else -/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */ -#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT)) -#endif - -static inline int pfn_to_nid(unsigned long pfn) -{ - unsigned int i; - - if (unlikely(pfn_is_io(pfn))) - return 0; - - i = pfn >> PFNNID_SHIFT; - BUG_ON(i >= ARRAY_SIZE(pfnnid_map)); - - return pfnnid_map[i]; -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#endif #endif /* _PARISC_MMZONE_H */ diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index b77f49c..93caf17 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -147,9 +147,9 @@ extern int npmem_ranges; #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM #define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_DISCONTIGMEM */ +#endif #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */ diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h new file mode 100644 index 0000000..b7d1dc9 --- /dev/null +++ b/arch/parisc/include/asm/sparsemem.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_PARISC_SPARSEMEM_H +#define ASM_PARISC_SPARSEMEM_H + +/* We have these possible memory map layouts: + * Astro: 0-3.75, 67.75-68, 4-64 + * zx1: 0-1, 257-260, 4-256 + * Stretch (N-class): 0-2, 4-32, 34-xxx + */ + +#define MAX_PHYSMEM_BITS 42 +#define SECTION_SIZE_BITS 37 + +#endif diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 7baa226..174213b 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -138,12 +138,6 @@ extern void $$dyncall(void); EXPORT_SYMBOL($$dyncall); #endif -#ifdef CONFIG_DISCONTIGMEM -#include <asm/mmzone.h> -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(pfnnid_map); -#endif - #ifdef CONFIG_FUNCTION_TRACER extern void _mcount(void); EXPORT_SYMBOL(_mcount); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index d0b1662..9523394 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -48,11 +48,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE))); pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE))); -#ifdef CONFIG_DISCONTIGMEM -struct node_map_data node_data[MAX_NUMNODES] __read_mostly; -signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; -#endif - static struct resource data_resource = { .name = "Kernel data", .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, @@ -76,11 +71,11 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly; * information retrieved in kernel/inventory.c. */ -physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; -int npmem_ranges __read_mostly; +physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata; +int npmem_ranges __initdata; #ifdef CONFIG_64BIT -#define MAX_MEM (~0UL) +#define MAX_MEM (1UL << MAX_PHYSMEM_BITS) #else /* !CONFIG_64BIT */ #define MAX_MEM (3584U*1024U*1024U) #endif /* !CONFIG_64BIT */ @@ -119,7 +114,7 @@ static void __init mem_limit_func(void) static void __init setup_bootmem(void) { unsigned long mem_max; -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1]; int npmem_holes; #endif @@ -137,23 +132,20 @@ static void __init setup_bootmem(void) int j; for (j = i; j > 0; j--) { - unsigned long tmp; + physmem_range_t tmp; if (pmem_ranges[j-1].start_pfn < pmem_ranges[j].start_pfn) { break; } - tmp = pmem_ranges[j-1].start_pfn; - pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn; - pmem_ranges[j].start_pfn = tmp; - tmp = pmem_ranges[j-1].pages; - pmem_ranges[j-1].pages = pmem_ranges[j].pages; - pmem_ranges[j].pages = tmp; + tmp = pmem_ranges[j-1]; + pmem_ranges[j-1] = pmem_ranges[j]; + pmem_ranges[j] = tmp; } } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* * Throw out ranges that are too far apart (controlled by * MAX_GAP). @@ -165,7 +157,7 @@ static void __init setup_bootmem(void) pmem_ranges[i-1].pages) > MAX_GAP) { npmem_ranges = i; printk("Large gap in memory detected (%ld pages). " - "Consider turning on CONFIG_DISCONTIGMEM\n", + "Consider turning on CONFIG_SPARSEMEM\n", pmem_ranges[i].start_pfn - (pmem_ranges[i-1].start_pfn + pmem_ranges[i-1].pages)); @@ -230,9 +222,8 @@ static void __init setup_bootmem(void) printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* Merge the ranges, keeping track of the holes */ - { unsigned long end_pfn; unsigned long hole_pages; @@ -255,18 +246,6 @@ static void __init setup_bootmem(void) } #endif -#ifdef CONFIG_DISCONTIGMEM - for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { - memset(NODE_DATA(i), 0, sizeof(pg_data_t)); - } - memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); - - for (i = 0; i < npmem_ranges; i++) { - node_set_state(i, N_NORMAL_MEMORY); - node_set_online(i); - } -#endif - /* * Initialize and free the full range of memory in each range. */ @@ -314,7 +293,7 @@ static void __init setup_bootmem(void) memblock_reserve(__pa(KERNEL_BINARY_TEXT_START), (unsigned long)(_end - KERNEL_BINARY_TEXT_START)); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* reserve the holes */ @@ -360,6 +339,9 @@ static void __init setup_bootmem(void) /* Initialize Page Deallocation Table (PDT) and check for bad memory. */ pdc_pdt_init(); + + memblock_allow_resize(); + memblock_dump_all(); } static int __init parisc_text_address(unsigned long vaddr) @@ -709,37 +691,46 @@ static void __init gateway_init(void) PAGE_SIZE, PAGE_GATEWAY, 1); } -void __init paging_init(void) +static void __init parisc_bootmem_free(void) { + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; + unsigned long holes_size[MAX_NR_ZONES] = { 0, }; + unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0; int i; + for (i = 0; i < npmem_ranges; i++) { + unsigned long start = pmem_ranges[i].start_pfn; + unsigned long size = pmem_ranges[i].pages; + unsigned long end = start + size; + + if (mem_start_pfn > start) + mem_start_pfn = start; + if (mem_end_pfn < end) + mem_end_pfn = end; + mem_size_pfn += size; + } + + zones_size[0] = mem_end_pfn - mem_start_pfn; + holes_size[0] = zones_size[0] - mem_size_pfn; + + free_area_init_node(0, zones_size, mem_start_pfn, holes_size); +} + +void __init paging_init(void) +{ setup_bootmem(); pagetable_init(); gateway_init(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); - for (i = 0; i < npmem_ranges; i++) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - - zones_size[ZONE_NORMAL] = pmem_ranges[i].pages; - -#ifdef CONFIG_DISCONTIGMEM - /* Need to initialize the pfnnid_map before we can initialize - the zone */ - { - int j; - for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT); - j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT); - j++) { - pfnnid_map[j] = i; - } - } -#endif - - free_area_init_node(i, zones_size, - pmem_ranges[i].start_pfn, NULL); - } + /* + * Mark all memblocks as present for sparsemem using + * memory_present() and then initialize sparsemem. + */ + memblocks_present(); + sparse_init(); + parisc_bootmem_free(); } #ifdef CONFIG_PA20