Re: [PATCH v2 2/3] arm64: Support page mapping percpu first chunk allocator

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2021/8/1 23:53, Catalin Marinas wrote:
On Tue, Jul 20, 2021 at 10:51:04AM +0800, Kefeng Wang wrote:
Percpu embedded first chunk allocator is the firstly option, but it
could fails on ARM64, eg,
   "percpu: max_distance=0x5fcfdc640000 too large for vmalloc space 0x781fefff0000"
   "percpu: max_distance=0x600000540000 too large for vmalloc space 0x7dffb7ff0000"
   "percpu: max_distance=0x5fff9adb0000 too large for vmalloc space 0x5dffb7ff0000"

then we could meet "WARNING: CPU: 15 PID: 461 at vmalloc.c:3087 pcpu_get_vm_areas+0x488/0x838",
even the system could not boot successfully.

Let's implement page mapping percpu first chunk allocator as a fallback
to the embedding allocator to increase the robustness of the system.
It looks like x86, powerpc and sparc implement their own
setup_per_cpu_areas(). I had a quick look on finding some commonalities
but I think it's a lot more hassle to make a generic version out of them
(powerpc looks the simplest though). I think we could add a generic
variant with the arm64 support and later migrate other architectures to
it if possible.
Ok, let's do it later, I could try to make some cleanup after the patchset is merged ;)
The patch looks ok to me otherwise but I'd need an ack from Greg as it
touches drivers/.

the arch_numa is only used ARM64 and riscv, the NEED_PER_CPU_PAGE_FIRST_CHUNK

is not enabled on RISCV, so it's no bad effect.


BTW, do we need something similar for the non-NUMA
setup_per_cpu_areas()? I can see this patch only enables
NEED_PER_CPU_PAGE_FIRST_CHUNK if NUMA.

Leaving the rest of the patch below for Greg.

Signed-off-by: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx>
---
  arch/arm64/Kconfig       |  4 ++
  drivers/base/arch_numa.c | 82 +++++++++++++++++++++++++++++++++++-----
  2 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b5b13a932561..eacb5873ded1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1045,6 +1045,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
  	def_bool y
  	depends on NUMA
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+	def_bool y
+	depends on NUMA
+
  source "kernel/Kconfig.hz"
config ARCH_SPARSEMEM_ENABLE
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 4cc4e117727d..563b2013b75a 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -14,6 +14,7 @@
  #include <linux/of.h>
#include <asm/sections.h>
+#include <asm/pgalloc.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
  EXPORT_SYMBOL(node_data);
@@ -168,22 +169,83 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
  	memblock_free_early(__pa(ptr), size);
  }
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		pud_t *new;
+
+		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		p4d_populate(&init_mm, p4d, new);
+	}
+
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		pmd_t *new;
+
+		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pud_populate(&init_mm, pud, new);
+	}
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd)) {
+		pte_t *new;
+
+		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+		if (!new)
+			goto err_alloc;
+		pmd_populate_kernel(&init_mm, pmd, new);
+	}
+
+	return;
+
+err_alloc:
+	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+#endif
+
  void __init setup_per_cpu_areas(void)
  {
  	unsigned long delta;
  	unsigned int cpu;
-	int rc;
+	int rc = -EINVAL;
+
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		/*
+		 * Always reserve area for module percpu variables.  That's
+		 * what the legacy allocator did.
+		 */
+		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+					    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+					    pcpu_cpu_distance,
+					    pcpu_fc_alloc, pcpu_fc_free);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+		if (rc < 0)
+			pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
+				   pcpu_fc_names[pcpu_chosen_fc], rc);
+#endif
+	}
- /*
-	 * Always reserve area for module percpu variables.  That's
-	 * what the legacy allocator did.
-	 */
-	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
-				    pcpu_cpu_distance,
-				    pcpu_fc_alloc, pcpu_fc_free);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
+					   pcpu_fc_alloc,
+					   pcpu_fc_free,
+					   pcpu_populate_pte);
+#endif
  	if (rc < 0)
-		panic("Failed to initialize percpu areas.");
+		panic("Failed to initialize percpu areas (err=%d).", rc);
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
  	for_each_possible_cpu(cpu)
--
2.26.2
.





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux