Re: Memory management broken by "mm: reclaim small amounts of memory when an external fragmentation event occurs"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



* Helge Deller <deller@xxxxxx>:
> On 08.04.19 16:29, James Bottomley wrote:
> > On Mon, 2019-04-08 at 10:52 +0100, Mel Gorman wrote:
> >> First, if pa-risc is !NUMA then why are separate local ranges
> >> represented as separate nodes? Is it because of DISCONTIGMEM or
> >> something else? DISCONTIGMEM is before my time so I'm not familiar
> >> with it and I consider it "essentially dead" but the arch init code
> >> seems to setup pgdats for each physical contiguous range so it's a
> >> possibility. The most likely explanation is pa-risc does not have
> >> hardware with addressing limitations smaller than the CPUs physical
> >> address limits and it's possible to have more ranges than available
> >> zones but clarification would be nice.
> >
> > Let me try, since I remember the ancient history.  In the early days,
> > there had to be a single mem_map array covering all of physical memory.
> >  Some pa-risc systems had huge gaps in the physical memory; I think one
> > gap was somewhere around 1GB, so this lead us to wasting huge amounts
> > of space in mem_map on non-existent memory.  What CONFIG_DISCONTIGMEM
> > did was allow you to represent this discontinuity on a non-NUMA system
> > using numa nodes, so we effectively got one node per discontiguous
> > range.  It's hacky, but it worked.  I thought we finally got converted
> > to sparsemem by the NUMA people, but I can't find the commit.
>
> James, you tried once:
> https://patchwork.kernel.org/patch/729441/
>
> It seems we better should move over to sparsemem now?

Below is an updated patch to convert parisc from DISCONTIGMEM to
SPARSEMEM. It builds and boots for me on 32- and 64-bit machines.
Mikulas, could you try if you still see the the cache limited to 1GiB
with this patch applied ?

Helge

---------------------

>From 2c30c3a61bbfb56850862a7f7127416325fe126f Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@xxxxxx>
Date: Tue, 9 Apr 2019 21:52:35 +0200
Subject: [PATCH] parisc: Switch from DISCONTIGMEM to SPARSEMEM

Signed-off-by: Helge Deller <deller@xxxxxx>

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c8e6212..4f1397f 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -36,6 +36,7 @@ config PARISC
 	select GENERIC_STRNCPY_FROM_USER
 	select SYSCTL_ARCH_UNALIGN_ALLOW
 	select SYSCTL_EXCEPTION_TRACE
+	select ARCH_DISCARD_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC
 	select VIRT_TO_BUS
 	select MODULES_USE_ELF_RELA
@@ -311,21 +312,16 @@ config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on 64BIT

-config ARCH_DISCONTIGMEM_ENABLE
+config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on 64BIT

 config ARCH_FLATMEM_ENABLE
 	def_bool y

-config ARCH_DISCONTIGMEM_DEFAULT
+config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
-	depends on ARCH_DISCONTIGMEM_ENABLE
-
-config NODES_SHIFT
-	int
-	default "3"
-	depends on NEED_MULTIPLE_NODES
+	depends on ARCH_SPARSEMEM_ENABLE

 source "kernel/Kconfig.hz"

diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h
index fafa389..8d39040 100644
--- a/arch/parisc/include/asm/mmzone.h
+++ b/arch/parisc/include/asm/mmzone.h
@@ -2,62 +2,6 @@
 #ifndef _PARISC_MMZONE_H
 #define _PARISC_MMZONE_H

-#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */
+#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */

-#ifdef CONFIG_DISCONTIGMEM
-
-extern int npmem_ranges;
-
-struct node_map_data {
-    pg_data_t pg_data;
-};
-
-extern struct node_map_data node_data[];
-
-#define NODE_DATA(nid)          (&node_data[nid].pg_data)
-
-/* We have these possible memory map layouts:
- * Astro: 0-3.75, 67.75-68, 4-64
- * zx1: 0-1, 257-260, 4-256
- * Stretch (N-class): 0-2, 4-32, 34-xxx
- */
-
-/* Since each 1GB can only belong to one region (node), we can create
- * an index table for pfn to nid lookup; each entry in pfnnid_map
- * represents 1GB, and contains the node that the memory belongs to. */
-
-#define PFNNID_SHIFT (30 - PAGE_SHIFT)
-#define PFNNID_MAP_MAX  512     /* support 512GB */
-extern signed char pfnnid_map[PFNNID_MAP_MAX];
-
-#ifndef CONFIG_64BIT
-#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT))
-#else
-/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */
-#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT))
-#endif
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-	unsigned int i;
-
-	if (unlikely(pfn_is_io(pfn)))
-		return 0;
-
-	i = pfn >> PFNNID_SHIFT;
-	BUG_ON(i >= ARRAY_SIZE(pfnnid_map));
-
-	return pfnnid_map[i];
-}
-
-static inline int pfn_valid(int pfn)
-{
-	int nid = pfn_to_nid(pfn);
-
-	if (nid >= 0)
-		return (pfn < node_end_pfn(nid));
-	return 0;
-}
-
-#endif
 #endif /* _PARISC_MMZONE_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index b77f49c..93caf17 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -147,9 +147,9 @@ extern int npmem_ranges;
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))

-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
-#endif /* CONFIG_DISCONTIGMEM */
+#endif

 #ifdef CONFIG_HUGETLB_PAGE
 #define HPAGE_SHIFT		PMD_SHIFT /* fixed for transparent huge pages */
diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h
new file mode 100644
index 0000000..b7d1dc9
--- /dev/null
+++ b/arch/parisc/include/asm/sparsemem.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_PARISC_SPARSEMEM_H
+#define ASM_PARISC_SPARSEMEM_H
+
+/* We have these possible memory map layouts:
+ * Astro: 0-3.75, 67.75-68, 4-64
+ * zx1: 0-1, 257-260, 4-256
+ * Stretch (N-class): 0-2, 4-32, 34-xxx
+ */
+
+#define MAX_PHYSMEM_BITS	42
+#define SECTION_SIZE_BITS	37
+
+#endif
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7baa226..174213b 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -138,12 +138,6 @@ extern void $$dyncall(void);
 EXPORT_SYMBOL($$dyncall);
 #endif

-#ifdef CONFIG_DISCONTIGMEM
-#include <asm/mmzone.h>
-EXPORT_SYMBOL(node_data);
-EXPORT_SYMBOL(pfnnid_map);
-#endif
-
 #ifdef CONFIG_FUNCTION_TRACER
 extern void _mcount(void);
 EXPORT_SYMBOL(_mcount);
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index d0b1662..9523394 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -48,11 +48,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE)));
 pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE)));

-#ifdef CONFIG_DISCONTIGMEM
-struct node_map_data node_data[MAX_NUMNODES] __read_mostly;
-signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
-#endif
-
 static struct resource data_resource = {
 	.name	= "Kernel data",
 	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
@@ -76,11 +71,11 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly;
  * information retrieved in kernel/inventory.c.
  */

-physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly;
-int npmem_ranges __read_mostly;
+physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata;
+int npmem_ranges __initdata;

 #ifdef CONFIG_64BIT
-#define MAX_MEM         (~0UL)
+#define MAX_MEM         (1UL << MAX_PHYSMEM_BITS)
 #else /* !CONFIG_64BIT */
 #define MAX_MEM         (3584U*1024U*1024U)
 #endif /* !CONFIG_64BIT */
@@ -119,7 +114,7 @@ static void __init mem_limit_func(void)
 static void __init setup_bootmem(void)
 {
 	unsigned long mem_max;
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
 	physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1];
 	int npmem_holes;
 #endif
@@ -137,23 +132,20 @@ static void __init setup_bootmem(void)
 		int j;

 		for (j = i; j > 0; j--) {
-			unsigned long tmp;
+			physmem_range_t tmp;

 			if (pmem_ranges[j-1].start_pfn <
 			    pmem_ranges[j].start_pfn) {

 				break;
 			}
-			tmp = pmem_ranges[j-1].start_pfn;
-			pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn;
-			pmem_ranges[j].start_pfn = tmp;
-			tmp = pmem_ranges[j-1].pages;
-			pmem_ranges[j-1].pages = pmem_ranges[j].pages;
-			pmem_ranges[j].pages = tmp;
+			tmp = pmem_ranges[j-1];
+			pmem_ranges[j-1] = pmem_ranges[j];
+			pmem_ranges[j] = tmp;
 		}
 	}

-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
 	/*
 	 * Throw out ranges that are too far apart (controlled by
 	 * MAX_GAP).
@@ -165,7 +157,7 @@ static void __init setup_bootmem(void)
 			 pmem_ranges[i-1].pages) > MAX_GAP) {
 			npmem_ranges = i;
 			printk("Large gap in memory detected (%ld pages). "
-			       "Consider turning on CONFIG_DISCONTIGMEM\n",
+			       "Consider turning on CONFIG_SPARSEMEM\n",
 			       pmem_ranges[i].start_pfn -
 			       (pmem_ranges[i-1].start_pfn +
 			        pmem_ranges[i-1].pages));
@@ -230,9 +222,8 @@ static void __init setup_bootmem(void)

 	printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20);

-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
 	/* Merge the ranges, keeping track of the holes */
-
 	{
 		unsigned long end_pfn;
 		unsigned long hole_pages;
@@ -255,18 +246,6 @@ static void __init setup_bootmem(void)
 	}
 #endif

-#ifdef CONFIG_DISCONTIGMEM
-	for (i = 0; i < MAX_PHYSMEM_RANGES; i++) {
-		memset(NODE_DATA(i), 0, sizeof(pg_data_t));
-	}
-	memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
-
-	for (i = 0; i < npmem_ranges; i++) {
-		node_set_state(i, N_NORMAL_MEMORY);
-		node_set_online(i);
-	}
-#endif
-
 	/*
 	 * Initialize and free the full range of memory in each range.
 	 */
@@ -314,7 +293,7 @@ static void __init setup_bootmem(void)
 	memblock_reserve(__pa(KERNEL_BINARY_TEXT_START),
 			(unsigned long)(_end - KERNEL_BINARY_TEXT_START));

-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM

 	/* reserve the holes */

@@ -360,6 +339,9 @@ static void __init setup_bootmem(void)

 	/* Initialize Page Deallocation Table (PDT) and check for bad memory. */
 	pdc_pdt_init();
+
+	memblock_allow_resize();
+	memblock_dump_all();
 }

 static int __init parisc_text_address(unsigned long vaddr)
@@ -709,37 +691,46 @@ static void __init gateway_init(void)
 		  PAGE_SIZE, PAGE_GATEWAY, 1);
 }

-void __init paging_init(void)
+static void __init parisc_bootmem_free(void)
 {
+	unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+	unsigned long holes_size[MAX_NR_ZONES] = { 0, };
+	unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0;
 	int i;

+	for (i = 0; i < npmem_ranges; i++) {
+		unsigned long start = pmem_ranges[i].start_pfn;
+		unsigned long size = pmem_ranges[i].pages;
+		unsigned long end = start + size;
+
+		if (mem_start_pfn > start)
+			mem_start_pfn = start;
+		if (mem_end_pfn < end)
+			mem_end_pfn = end;
+		mem_size_pfn += size;
+	}
+
+	zones_size[0] = mem_end_pfn - mem_start_pfn;
+	holes_size[0] = zones_size[0] - mem_size_pfn;
+
+	free_area_init_node(0, zones_size, mem_start_pfn, holes_size);
+}
+
+void __init paging_init(void)
+{
 	setup_bootmem();
 	pagetable_init();
 	gateway_init();
 	flush_cache_all_local(); /* start with known state */
 	flush_tlb_all_local(NULL);

-	for (i = 0; i < npmem_ranges; i++) {
-		unsigned long zones_size[MAX_NR_ZONES] = { 0, };
-
-		zones_size[ZONE_NORMAL] = pmem_ranges[i].pages;
-
-#ifdef CONFIG_DISCONTIGMEM
-		/* Need to initialize the pfnnid_map before we can initialize
-		   the zone */
-		{
-		    int j;
-		    for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT);
-			 j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT);
-			 j++) {
-			pfnnid_map[j] = i;
-		    }
-		}
-#endif
-
-		free_area_init_node(i, zones_size,
-				pmem_ranges[i].start_pfn, NULL);
-	}
+	/*
+	 * Mark all memblocks as present for sparsemem using
+	 * memory_present() and then initialize sparsemem.
+	 */
+	memblocks_present();
+	sparse_init();
+	parisc_bootmem_free();
 }

 #ifdef CONFIG_PA20





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux