[PATCH] KHO: always have a lowmem scratch region

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



During initialization, some callers need to allocate low memory from
memblock. One such caller is swiotlb_memblock_alloc() on x86. The global
and per-node scratch regions are allocated without any constraints on
the range. This can lead to having no scratch region in lowmem. If that
happens, the lowmem allocations can fail, leading to failures during
boot or later down the line.

Always ensure there is some scratch memory available in low memory by
having a separate scratch area for it, along with the global and
per-node ones, and allow specifying its size via the command line.

To more accurately guess suitable scratch sizes, add
memblock_reserved_kern_lowmem_size() and
memblock_reserved_kern_highmem_size() which calculate how much memory
was allocated in low and high memory, along with some helper functions
to calculate scratch sizes.

Signed-off-by: Pratyush Yadav <ptyadav@xxxxxxxxx>
---
 .../admin-guide/kernel-parameters.txt         |  9 +-
 Documentation/kho/usage.rst                   | 10 +--
 include/linux/memblock.h                      |  2 +
 kernel/kexec_handover.c                       | 83 ++++++++++++++-----
 mm/memblock.c                                 | 28 +++++++
 5 files changed, 103 insertions(+), 29 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ed656e2fb05ef..7c5afd45ad9dc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2705,7 +2705,7 @@
 			"1" | "on" | "y" - kexec handover is enabled
 
 	kho_scratch=	[KEXEC,EARLY]
-			Format: nn[KMG],mm[KMG] | nn%
+			Format: ll[KMG],nn[KMG],mm[KMG] | nn%
 			Defines the size of the KHO scratch region. The KHO
 			scratch regions are physically contiguous memory
 			ranges that can only be used for non-kernel
@@ -2715,9 +2715,10 @@
 			bootstrap itself.
 
 			It is possible to specify the exact amount of
-			memory in the form of "nn[KMG],mm[KMG]" where the
-			first parameter defines the size of a global
-			scratch area and the second parameter defines the
+			memory in the form of "ll[KMG],nn[KMG],mm[KMG]" where the
+			first parameter defines the size of a low memory scratch
+			area, the second parameter defines the size of a global
+			scratch area and the third parameter defines the
 			size of additional per-node scratch areas.
 			The form "nn%" defines scale factor (in percents)
 			of memory that was used during boot.
diff --git a/Documentation/kho/usage.rst b/Documentation/kho/usage.rst
index e7300fbb309c1..6a6011809795d 100644
--- a/Documentation/kho/usage.rst
+++ b/Documentation/kho/usage.rst
@@ -19,11 +19,11 @@ at compile time. Every KHO producer may have its own config option that you
 need to enable if you would like to preserve their respective state across
 kexec.
 
-To use KHO, please boot the kernel with the ``kho=on`` command line
-parameter. You may use ``kho_scratch`` parameter to define size of the
-scratch regions. For example ``kho_scratch=512M,512M`` will reserve a 512
-MiB for a global scratch region and 512 MiB per NUMA node scratch regions
-on boot.
+To use KHO, please boot the kernel with the ``kho=on`` command line parameter.
+You may use ``kho_scratch`` parameter to define size of the scratch regions. For
+example ``kho_scratch=128M,512M,512M`` will reserve a 128 MiB low memory scratch
+region, a 512 MiB global scratch region and 512 MiB per NUMA node scratch
+regions on boot.
 
 Perform a KHO kexec
 -------------------
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 20887e199cdbd..9f5c5aec4b1d4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -504,6 +504,8 @@ static inline __init_memblock bool memblock_bottom_up(void)
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_reserved_kern_size(int nid);
+phys_addr_t memblock_reserved_kern_lowmem_size(void);
+phys_addr_t memblock_reserved_kern_highmem_size(void);
 unsigned long memblock_estimated_nr_free_pages(void);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index c26753d613cbc..29eeed09ceb31 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -547,20 +547,21 @@ late_initcall(kho_init);
  *
  * kho_scratch=N%
  *
- * It is also possible to explicitly define size for a global and per-node
- * scratch areas:
+ * It is also possible to explicitly define size for a lowmem, a global and
+ * per-node scratch areas:
  *
- * kho_scratch=n[KMG],m[KMG]
+ * kho_scratch=l[KMG],n[KMG],m[KMG]
  *
  * The explicit size definition takes precedence over scale definition.
  */
 static unsigned int scratch_scale __initdata = 200;
 static phys_addr_t scratch_size_global __initdata;
 static phys_addr_t scratch_size_pernode __initdata;
+static phys_addr_t scratch_size_lowmem __initdata;
 
 static int __init kho_parse_scratch_size(char *p)
 {
-	unsigned long size, size_pernode;
+	unsigned long size, size_pernode, size_global;
 	char *endptr, *oldp = p;
 
 	if (!p)
@@ -578,15 +579,25 @@ static int __init kho_parse_scratch_size(char *p)
 		if (*p != ',')
 			return -EINVAL;
 
+		oldp = p;
+		size_global = memparse(p + 1, &p);
+		if (!size_global || p == oldp)
+			return -EINVAL;
+
+		if (*p != ',')
+			return -EINVAL;
+
 		size_pernode = memparse(p + 1, &p);
 		if (!size_pernode)
 			return -EINVAL;
 
-		scratch_size_global = size;
+		scratch_size_lowmem = size;
+		scratch_size_global = size_global;
 		scratch_size_pernode = size_pernode;
 		scratch_scale = 0;
 
-		pr_notice("scratch areas: global: %lluMB pernode: %lldMB\n",
+		pr_notice("scratch areas: lowmem: %lluMB global: %lluMB pernode: %lldMB\n",
+			  (u64)(scratch_size_lowmem >> 20),
 			  (u64)(scratch_size_global >> 20),
 			  (u64)(scratch_size_pernode >> 20));
 	}
@@ -595,18 +606,38 @@ static int __init kho_parse_scratch_size(char *p)
 }
 early_param("kho_scratch", kho_parse_scratch_size);
 
-static phys_addr_t __init scratch_size(int nid)
+static phys_addr_t __init scratch_size_low(void)
+{
+	phys_addr_t size;
+
+	if (scratch_scale)
+		size = memblock_reserved_kern_lowmem_size() * scratch_scale / 100;
+	else
+		size = scratch_size_lowmem;
+
+	return round_up(size, CMA_MIN_ALIGNMENT_BYTES);
+}
+
+static phys_addr_t __init scratch_size_high(void)
+{
+	phys_addr_t size;
+
+	if (scratch_scale)
+		size = memblock_reserved_kern_highmem_size() * scratch_scale / 100;
+	else
+		size = scratch_size_global;
+
+	return round_up(size, CMA_MIN_ALIGNMENT_BYTES);
+}
+
+static phys_addr_t __init scratch_size_node(int nid)
 {
 	phys_addr_t size;
 
-	if (scratch_scale) {
+	if (scratch_scale)
 		size = memblock_reserved_kern_size(nid) * scratch_scale / 100;
-	} else {
-		if (numa_valid_node(nid))
-			size = scratch_size_pernode;
-		else
-			size = scratch_size_global;
-	}
+	else
+		size = scratch_size_pernode;
 
 	return round_up(size, CMA_MIN_ALIGNMENT_BYTES);
 }
@@ -623,29 +654,41 @@ static phys_addr_t __init scratch_size(int nid)
 static void kho_reserve_scratch(void)
 {
 	phys_addr_t addr, size;
-	int nid, i = 1;
+	int nid, i = 0;
 
 	if (!kho_enable)
 		return;
 
 	/* FIXME: deal with node hot-plug/remove */
-	kho_scratch_cnt = num_online_nodes() + 1;
+	kho_scratch_cnt = num_online_nodes() + 2;
 	size = kho_scratch_cnt * sizeof(*kho_scratch);
 	kho_scratch = memblock_alloc(size, PAGE_SIZE);
 	if (!kho_scratch)
 		goto err_disable_kho;
 
-	/* reserve large contiguous area for allocations without nid */
-	size = scratch_size(NUMA_NO_NODE);
-	addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES);
+	/* reserve area for lowmem allocations. */
+	size = scratch_size_low();
+	addr = memblock_phys_alloc_range(size, CMA_MIN_ALIGNMENT_BYTES, 0,
+					 ARCH_LOW_ADDRESS_LIMIT);
 	if (!addr)
 		goto err_free_scratch_desc;
 
 	kho_scratch[0].addr = addr;
 	kho_scratch[0].size = size;
+	i++;
+
+	/* reserve large contiguous area for allocations without nid */
+	size = scratch_size_high();
+	addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES);
+	if (!addr)
+		goto err_free_scratch_areas;
+
+	kho_scratch[1].addr = addr;
+	kho_scratch[1].size = size;
+	i++;
 
 	for_each_online_node(nid) {
-		size = scratch_size(nid);
+		size = scratch_size_node(nid);
 		addr = memblock_alloc_range_nid(size, CMA_MIN_ALIGNMENT_BYTES,
 						0, MEMBLOCK_ALLOC_ACCESSIBLE,
 						nid, true);
diff --git a/mm/memblock.c b/mm/memblock.c
index fdb08b60efc17..da7abf5e5e504 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1837,6 +1837,34 @@ phys_addr_t __init_memblock memblock_reserved_size(void)
 	return memblock.reserved.total_size;
 }
 
+phys_addr_t __init_memblock memblock_reserved_kern_lowmem_size(void)
+{
+	struct memblock_region *r;
+	phys_addr_t total = 0;
+
+	for_each_reserved_mem_region(r) {
+		if ((r->flags & MEMBLOCK_RSRV_KERN) &&
+		    (r->base + r->size <= ARCH_LOW_ADDRESS_LIMIT))
+			total += r->size;
+	}
+
+	return total;
+}
+
+phys_addr_t __init_memblock memblock_reserved_kern_highmem_size(void)
+{
+	struct memblock_region *r;
+	phys_addr_t total = 0;
+
+	for_each_reserved_mem_region(r) {
+		if ((r->flags & MEMBLOCK_RSRV_KERN) &&
+		    (r->base + r->size > ARCH_LOW_ADDRESS_LIMIT))
+			total += r->size;
+	}
+
+	return total;
+}
+
 phys_addr_t __init_memblock memblock_reserved_kern_size(int nid)
 {
 	struct memblock_region *r;
-- 
Regards,
Pratyush Yadav




[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux