Re: [PATCH v5 6/7] mm/hotplug: Embed vmem_altmap details in memory block

David Hildenbrand <david@xxxxxxxxxx> · Wed, 26 Jul 2023 18:43:44 +0200

On 26.07.23 12:31, Aneesh Kumar K.V wrote:
David Hildenbrand <david@xxxxxxxxxx> writes:

On 25.07.23 12:02, Aneesh Kumar K.V wrote:
With memmap on memory, some architecture needs more details w.r.t altmap
such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of
computing them again when we remove a memory block, embed vmem_altmap
details in struct memory_block if we are using memmap on memory block
feature.

No functional change in this patch

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx>
---

[...]

   
   static int add_memory_block(unsigned long block_id, unsigned long state,
-			    unsigned long nr_vmemmap_pages,
+			    struct vmem_altmap *altmap,
   			    struct memory_group *group)
   {
   	struct memory_block *mem;
@@ -744,7 +751,14 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
   	mem->start_section_nr = block_id * sections_per_block;
   	mem->state = state;
   	mem->nid = NUMA_NO_NODE;
-	mem->nr_vmemmap_pages = nr_vmemmap_pages;
+	if (altmap) {
+		mem->altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL);
+		if (!mem->altmap) {
+			kfree(mem);
+			return -ENOMEM;
+		}
+		memcpy(mem->altmap, altmap, sizeof(*altmap));
+	}

I'm wondering if we should instead let the caller do the alloc/free. So we would alloc
int the caller and would only store the pointer.

Before removing the memory block, we would clear the pointer and free it in the caller.

IOW, when removing a memory block and we still have an altmap set, something would be wrong.

See below on try_remove_memory() handling.

[...]

-static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
+static int get_vmemmap_altmap_cb(struct memory_block *mem, void *arg)
   {
+	struct vmem_altmap *altmap = (struct vmem_altmap *)arg;
   	/*
-	 * If not set, continue with the next block.
+	 * If we have any pages allocated from altmap
+	 * return the altmap details and break callback.
   	 */
-	return mem->nr_vmemmap_pages;
+	if (mem->altmap) {
+		memcpy(altmap, mem->altmap, sizeof(struct vmem_altmap));
+		return 1;
+	}
+	return 0;
   }
   
   static int check_cpu_on_node(int nid)
@@ -2146,9 +2152,8 @@ EXPORT_SYMBOL(try_offline_node);
   
   static int __ref try_remove_memory(u64 start, u64 size)
   {
-	struct vmem_altmap mhp_altmap = {};
-	struct vmem_altmap *altmap = NULL;
-	unsigned long nr_vmemmap_pages;
+	int ret;
+	struct vmem_altmap mhp_altmap, *altmap = NULL;
   	int rc = 0, nid = NUMA_NO_NODE;
   
   	BUG_ON(check_hotplug_memory_range(start, size));
@@ -2171,24 +2176,15 @@ static int __ref try_remove_memory(u64 start, u64 size)
   	 * the same granularity it was added - a single memory block.
   	 */
   	if (mhp_memmap_on_memory()) {
-		nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
-						      get_nr_vmemmap_pages_cb);
-		if (nr_vmemmap_pages) {
+		ret = walk_memory_blocks(start, size, &mhp_altmap,
+					 get_vmemmap_altmap_cb);
+		if (ret) {
   			if (size != memory_block_size_bytes()) {
   				pr_warn("Refuse to remove %#llx - %#llx,"
   					"wrong granularity\n",
   					start, start + size);
   				return -EINVAL;
   			}
-
-			/*
-			 * Let remove_pmd_table->free_hugepage_table do the
-			 * right thing if we used vmem_altmap when hot-adding
-			 * the range.
-			 */
-			mhp_altmap.base_pfn = PHYS_PFN(start);
-			mhp_altmap.free = nr_vmemmap_pages;
-			mhp_altmap.alloc = nr_vmemmap_pages;
   			altmap = &mhp_altmap;
   		}


Instead of that, I suggest (whitespace damage expected):

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3f231cf1b410..f6860df64549 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1956,12 +1956,19 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
          return 0;
   }
   
-static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
+static int test_has_altmap_cb(struct memory_block *mem, void *arg)
   {
-       /*
-        * If not set, continue with the next block.
-        */
-       return mem->nr_vmemmap_pages;
+       struct memory_block **mem_ptr = (struct memory_block **)arg;
+
+       if (mem->altmap) {
+               /*
+                * We're not taking a reference on the memory block; it
+                * it cannot vanish while we're about to that memory ourselves.
+                */
+               *mem_ptr = mem;
+               return 1;
+       }
+       return 0;
   }
   
   static int check_cpu_on_node(int nid)
@@ -2036,9 +2043,7 @@ EXPORT_SYMBOL(try_offline_node);
   
   static int __ref try_remove_memory(u64 start, u64 size)
   {
-       struct vmem_altmap mhp_altmap = {};
          struct vmem_altmap *altmap = NULL;
-       unsigned long nr_vmemmap_pages;
          int rc = 0, nid = NUMA_NO_NODE;
   
          BUG_ON(check_hotplug_memory_range(start, size));
@@ -2061,9 +2066,9 @@ static int __ref try_remove_memory(u64 start, u64 size)
           * the same granularity it was added - a single memory block.
           */
          if (mhp_memmap_on_memory()) {
-               nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
-                                                     get_nr_vmemmap_pages_cb);
-               if (nr_vmemmap_pages) {
+               struct memory_block *mem;
+
+               if (walk_memory_blocks(start, size, &mem, test_has_altmap_cb)) {
                          if (size != memory_block_size_bytes()) {
                                  pr_warn("Refuse to remove %#llx - %#llx,"
                                          "wrong granularity\n",
@@ -2072,12 +2077,11 @@ static int __ref try_remove_memory(u64 start, u64 size)
                          }
   
                          /*
-                        * Let remove_pmd_table->free_hugepage_table do the
-                        * right thing if we used vmem_altmap when hot-adding
-                        * the range.
+                        * Clear the altmap from the memory block before we
+                        * remove it; we'll take care of freeing the altmap.
                           */
-                       mhp_altmap.alloc = nr_vmemmap_pages;
-                       altmap = &mhp_altmap;
+                       altmap = mem->altmap;
+                       mem->altmap = NULL;
                  }
          }
   
@@ -2094,6 +2098,9 @@ static int __ref try_remove_memory(u64 start, u64 size)
   
          arch_remove_memory(start, size, altmap);
   
+       if (altmap)
+               kfree(altmap);
+
          if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
                  memblock_phys_free(start, size);
                  memblock_remove(start, size);


Is this any better. Any specific reason we want the alloc and free in
the caller?


IMHO if you only have a single instance you don't have to worry about 
any inconsistencies. At least to me it looks cleaner than this copying 
back and forth.

Below is better, but as you pointed out, your get_vmemmap_altmap_cb() 
change is problematic/insufficient.

--
Cheers,

David / dhildenb