Re: [PATCH V2 1/2] mm/sparsemem: Enable vmem_altmap support in vmemmap_populate_basepages()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2020-03-04 2:10 pm, Anshuman Khandual wrote:
vmemmap_populate_basepages() is used across platforms to allocate backing
memory for vmemmap mapping. This is used as a standard default choice or
as a fallback when intended huge pages allocation fails. This just creates
entire vmemmap mapping with base pages (PAGE_SIZE).

On arm64 platforms, vmemmap_populate_basepages() is called instead of the
platform specific vmemmap_populate() when ARM64_SWAPPER_USES_SECTION_MAPS
is not enabled as in case for ARM64_16K_PAGES and ARM64_64K_PAGES configs.

At present vmemmap_populate_basepages() does not support allocating from
driver defined struct vmem_altmap while trying to create vmemmap mapping
for a device memory range. It prevents ARM64_16K_PAGES and ARM64_64K_PAGES
configs on arm64 from supporting device memory with vmemap_altmap request.

This enables vmem_altmap support in vmemmap_populate_basepages() unlocking
device memory allocation for vmemap mapping on arm64 platforms with 16K or
64K base page configs.

Each architecture should evaluate and decide on subscribing device memory
based base page allocation through vmemmap_populate_basepages(). Hence lets
keep it disabled on all archs in order to preserve the existing semantics.
A subsequent patch enables it on arm64.

I guess buy-in for this change largely depends on whether any other architectures are likely to want to share it. The existing altmap users don't look like they would, so that's probably more a question for the likes of S390 and RISC-V.

Failing that, simply decoupling arm64 from vmemmap_populate_basepages() seems viable - I tried hacking up a quick proof-of-concept (attached at the end) and it doesn't come out looking *too* disgusting.

Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx>
Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx>
Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
Cc: linux-ia64@xxxxxxxxxxxxxxx
Cc: linux-riscv@xxxxxxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx

Acked-by: Will Deacon <will@xxxxxxxxxx>
Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
---
  arch/arm64/mm/mmu.c      |  2 +-
  arch/ia64/mm/discontig.c |  2 +-
  arch/riscv/mm/init.c     |  2 +-
  arch/x86/mm/init_64.c    |  6 +++---
  include/linux/mm.h       |  5 +++--
  mm/sparse-vmemmap.c      | 16 +++++++++++-----
  6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9b08f7c7e6f0..27cb95c471eb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1036,7 +1036,7 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
  		struct vmem_altmap *altmap)
  {
-	return vmemmap_populate_basepages(start, end, node);
+	return vmemmap_populate_basepages(start, end, node, NULL);
  }
  #else	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 4f33f6e7e206..20409f3afea8 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -656,7 +656,7 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
  		struct vmem_altmap *altmap)
  {
-	return vmemmap_populate_basepages(start, end, node);
+	return vmemmap_populate_basepages(start, end, node, NULL);
  }
void vmemmap_free(unsigned long start, unsigned long end,
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 965a8cf4829c..1d7451c91982 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -501,6 +501,6 @@ void __init paging_init(void)
  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
  			       struct vmem_altmap *altmap)
  {
-	return vmemmap_populate_basepages(start, end, node);
+	return vmemmap_populate_basepages(start, end, node, NULL);
  }
  #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index abbdecb75fad..3272fe0d844a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1471,7 +1471,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
  			vmemmap_verify((pte_t *)pmd, node, addr, next);
  			continue;
  		}
-		if (vmemmap_populate_basepages(addr, next, node))
+		if (vmemmap_populate_basepages(addr, next, node, NULL))
  			return -ENOMEM;
  	}
  	return 0;
@@ -1483,7 +1483,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
  	int err;
if (end - start < PAGES_PER_SECTION * sizeof(struct page))
-		err = vmemmap_populate_basepages(start, end, node);
+		err = vmemmap_populate_basepages(start, end, node, NULL);
  	else if (boot_cpu_has(X86_FEATURE_PSE))
  		err = vmemmap_populate_hugepages(start, end, node, altmap);
  	else if (altmap) {
@@ -1491,7 +1491,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
  				__func__);
  		err = -ENOMEM;
  	} else
-		err = vmemmap_populate_basepages(start, end, node);
+		err = vmemmap_populate_basepages(start, end, node, NULL);
  	if (!err)
  		sync_global_pgds(start, end - 1);
  	return err;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52269e56c514..42f99c8d63c0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2780,14 +2780,15 @@ pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
  p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
  pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
  pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
-pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
+pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
+			    struct vmem_altmap *altmap);
  void *vmemmap_alloc_block(unsigned long size, int node);
  struct vmem_altmap;
  void *vmemmap_alloc_block_buf(unsigned long size, int node);
  void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
  void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
  int vmemmap_populate_basepages(unsigned long start, unsigned long end,
-			       int node);
+			       int node, struct vmem_altmap *altmap);
  int vmemmap_populate(unsigned long start, unsigned long end, int node,
  		struct vmem_altmap *altmap);
  void vmemmap_populate_print_last(void);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 200aef686722..a407abc9b46c 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -140,12 +140,18 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
  			start, end - 1);
  }
-pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
+pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
+				       struct vmem_altmap *altmap)
  {
  	pte_t *pte = pte_offset_kernel(pmd, addr);
  	if (pte_none(*pte)) {
  		pte_t entry;
-		void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
+		void *p;
+
+		if (altmap)
+			p = altmap_alloc_block_buf(PAGE_SIZE, altmap);
+		else
+			p = vmemmap_alloc_block_buf(PAGE_SIZE, node);

This pattern ends up appearing a number of times by the end - if we do go down the generic code route, might it be worth pushing it down into vmmemmap_alloc_block_buf() itself to make it automatic? (possibly even including the powerpc fallback behaviour too?)

Robin.

  		if (!p)
  			return NULL;
  		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -213,8 +219,8 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
  	return pgd;
  }
-int __meminit vmemmap_populate_basepages(unsigned long start,
-					 unsigned long end, int node)
+int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
+					 int node, struct vmem_altmap *altmap)
  {
  	unsigned long addr = start;
  	pgd_t *pgd;
@@ -236,7 +242,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
  		pmd = vmemmap_pmd_populate(pud, addr, node);
  		if (!pmd)
  			return -ENOMEM;
-		pte = vmemmap_pte_populate(pmd, addr, node);
+		pte = vmemmap_pte_populate(pmd, addr, node, altmap);
  		if (!pte)
  			return -ENOMEM;
  		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);


----->8-----
From: Robin Murphy <robin.murphy@xxxxxxx>
Subject: [PATCH] arm64/mm: Consolidate vmemmap_populate()

Since we already have a custom vmemmap_populate() implementation, fold
the non-section-map case into that as well, so that we can easily add
altmap support for both cases without having to mess with core code.

Signed-off-by: Robin Murphy <robin.murphy@xxxxxxx>
---
 arch/arm64/mm/mmu.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 128f70852bf3..e250fd414b2b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -725,13 +725,6 @@ int kern_addr_valid(unsigned long addr)
 	return pfn_valid(pte_pfn(pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-#if !ARM64_SWAPPER_USES_SECTION_MAPS
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
-		struct vmem_altmap *altmap)
-{
-	return vmemmap_populate_basepages(start, end, node);
-}
-#else	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
@@ -740,6 +733,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	pgd_t *pgdp;
 	pud_t *pudp;
 	pmd_t *pmdp;
+	pte_t *ptep;

 	do {
 		next = pmd_addr_end(addr, end);
@@ -752,22 +746,36 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		if (!pudp)
 			return -ENOMEM;

+#if ARM64_SWAPPER_USES_SECTION_MAPS
 		pmdp = pmd_offset(pudp, addr);
 		if (pmd_none(READ_ONCE(*pmdp))) {
-			void *p = NULL;
-
-			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+			void *p = vmemmap_alloc_block_buf(PMD_SIZE, node);
 			if (!p)
 				return -ENOMEM;

 			pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
-		} else
-			vmemmap_verify((pte_t *)pmdp, node, addr, next);
+			continue;
+		}
+#else
+		pmdp = vmemmap_pmd_populate(pmdp, addr, node);
+		if (!pmdp)
+			return -ENOMEM;
+
+		ptep = pte_offset_kernel(pmdp, addr);
+		if (pte_none(READ_ONCE(*ptep))) {
+			void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
+			if (!p)
+				return -ENOMEM;
+
+			set_pte(ptep, pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL));
+		}
+#endif
+		vmemmap_verify((pte_t *)pmdp, node, addr, next);
 	} while (addr = next, addr != end);

 	return 0;
 }
-#endif	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
+
 void vmemmap_free(unsigned long start, unsigned long end,
 		struct vmem_altmap *altmap)
 {




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux