David Hildenbrand <david@xxxxxxxxxx> writes: > On 25.07.23 12:02, Aneesh Kumar K.V wrote: >> With memmap on memory, some architecture needs more details w.r.t altmap >> such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of >> computing them again when we remove a memory block, embed vmem_altmap >> details in struct memory_block if we are using memmap on memory block >> feature. >> >> No functional change in this patch >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> >> --- > > [...] > >> >> static int add_memory_block(unsigned long block_id, unsigned long state, >> - unsigned long nr_vmemmap_pages, >> + struct vmem_altmap *altmap, >> struct memory_group *group) >> { >> struct memory_block *mem; >> @@ -744,7 +751,14 @@ static int add_memory_block(unsigned long block_id, unsigned long state, >> mem->start_section_nr = block_id * sections_per_block; >> mem->state = state; >> mem->nid = NUMA_NO_NODE; >> - mem->nr_vmemmap_pages = nr_vmemmap_pages; >> + if (altmap) { >> + mem->altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL); >> + if (!mem->altmap) { >> + kfree(mem); >> + return -ENOMEM; >> + } >> + memcpy(mem->altmap, altmap, sizeof(*altmap)); >> + } > > I'm wondering if we should instead let the caller do the alloc/free. So we would alloc > int the caller and would only store the pointer. > > Before removing the memory block, we would clear the pointer and free it in the caller. > > IOW, when removing a memory block and we still have an altmap set, something would be wrong. > > See below on try_remove_memory() handling. > > [...] > >> -static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg) >> +static int get_vmemmap_altmap_cb(struct memory_block *mem, void *arg) >> { >> + struct vmem_altmap *altmap = (struct vmem_altmap *)arg; >> /* >> - * If not set, continue with the next block. >> + * If we have any pages allocated from altmap >> + * return the altmap details and break callback. >> */ >> - return mem->nr_vmemmap_pages; >> + if (mem->altmap) { >> + memcpy(altmap, mem->altmap, sizeof(struct vmem_altmap)); >> + return 1; >> + } >> + return 0; >> } >> >> static int check_cpu_on_node(int nid) >> @@ -2146,9 +2152,8 @@ EXPORT_SYMBOL(try_offline_node); >> >> static int __ref try_remove_memory(u64 start, u64 size) >> { >> - struct vmem_altmap mhp_altmap = {}; >> - struct vmem_altmap *altmap = NULL; >> - unsigned long nr_vmemmap_pages; >> + int ret; >> + struct vmem_altmap mhp_altmap, *altmap = NULL; >> int rc = 0, nid = NUMA_NO_NODE; >> >> BUG_ON(check_hotplug_memory_range(start, size)); >> @@ -2171,24 +2176,15 @@ static int __ref try_remove_memory(u64 start, u64 size) >> * the same granularity it was added - a single memory block. >> */ >> if (mhp_memmap_on_memory()) { >> - nr_vmemmap_pages = walk_memory_blocks(start, size, NULL, >> - get_nr_vmemmap_pages_cb); >> - if (nr_vmemmap_pages) { >> + ret = walk_memory_blocks(start, size, &mhp_altmap, >> + get_vmemmap_altmap_cb); >> + if (ret) { >> if (size != memory_block_size_bytes()) { >> pr_warn("Refuse to remove %#llx - %#llx," >> "wrong granularity\n", >> start, start + size); >> return -EINVAL; >> } >> - >> - /* >> - * Let remove_pmd_table->free_hugepage_table do the >> - * right thing if we used vmem_altmap when hot-adding >> - * the range. >> - */ >> - mhp_altmap.base_pfn = PHYS_PFN(start); >> - mhp_altmap.free = nr_vmemmap_pages; >> - mhp_altmap.alloc = nr_vmemmap_pages; >> altmap = &mhp_altmap; >> } > > > Instead of that, I suggest (whitespace damage expected): > > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c > index 3f231cf1b410..f6860df64549 100644 > --- a/mm/memory_hotplug.c > +++ b/mm/memory_hotplug.c > @@ -1956,12 +1956,19 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) > return 0; > } > > -static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg) > +static int test_has_altmap_cb(struct memory_block *mem, void *arg) > { > - /* > - * If not set, continue with the next block. > - */ > - return mem->nr_vmemmap_pages; > + struct memory_block **mem_ptr = (struct memory_block **)arg; > + > + if (mem->altmap) { > + /* > + * We're not taking a reference on the memory block; it > + * it cannot vanish while we're about to that memory ourselves. > + */ > + *mem_ptr = mem; > + return 1; > + } > + return 0; > } > > static int check_cpu_on_node(int nid) > @@ -2036,9 +2043,7 @@ EXPORT_SYMBOL(try_offline_node); > > static int __ref try_remove_memory(u64 start, u64 size) > { > - struct vmem_altmap mhp_altmap = {}; > struct vmem_altmap *altmap = NULL; > - unsigned long nr_vmemmap_pages; > int rc = 0, nid = NUMA_NO_NODE; > > BUG_ON(check_hotplug_memory_range(start, size)); > @@ -2061,9 +2066,9 @@ static int __ref try_remove_memory(u64 start, u64 size) > * the same granularity it was added - a single memory block. > */ > if (mhp_memmap_on_memory()) { > - nr_vmemmap_pages = walk_memory_blocks(start, size, NULL, > - get_nr_vmemmap_pages_cb); > - if (nr_vmemmap_pages) { > + struct memory_block *mem; > + > + if (walk_memory_blocks(start, size, &mem, test_has_altmap_cb)) { > if (size != memory_block_size_bytes()) { > pr_warn("Refuse to remove %#llx - %#llx," > "wrong granularity\n", > @@ -2072,12 +2077,11 @@ static int __ref try_remove_memory(u64 start, u64 size) > } > > /* > - * Let remove_pmd_table->free_hugepage_table do the > - * right thing if we used vmem_altmap when hot-adding > - * the range. > + * Clear the altmap from the memory block before we > + * remove it; we'll take care of freeing the altmap. > */ > - mhp_altmap.alloc = nr_vmemmap_pages; > - altmap = &mhp_altmap; > + altmap = mem->altmap; > + mem->altmap = NULL; > } > } > > @@ -2094,6 +2098,9 @@ static int __ref try_remove_memory(u64 start, u64 size) > > arch_remove_memory(start, size, altmap); > > + if (altmap) > + kfree(altmap); > + > if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { > memblock_phys_free(start, size); > memblock_remove(start, size); > Is this any better. Any specific reason we want the alloc and free in the caller? diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 0210ed7b7696..271cfdf8f6b6 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -106,7 +106,7 @@ static void memory_block_release(struct device *dev) { struct memory_block *mem = to_memory_block(dev); - kfree(mem->altmap); + WARN_ON(mem->altmap); kfree(mem); } @@ -751,14 +751,8 @@ static int add_memory_block(unsigned long block_id, unsigned long state, mem->start_section_nr = block_id * sections_per_block; mem->state = state; mem->nid = NUMA_NO_NODE; - if (altmap) { - mem->altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL); - if (!mem->altmap) { - kfree(mem); - return -ENOMEM; - } - memcpy(mem->altmap, altmap, sizeof(*altmap)); - } + if (altmap) + mem->altmap = altmap; INIT_LIST_HEAD(&mem->group_next); #ifndef CONFIG_NUMA diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2bad1bf0e9e3..1c7d88332e0e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1445,8 +1445,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) */ if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { if (mhp_supports_memmap_on_memory(size)) { + mhp_altmap.free = memory_block_memmap_on_memory_pages(); - params.altmap = &mhp_altmap; + params.altmap = kzalloc(sizeof(struct vmem_altmap), GFP_KERNEL); + if (!params.altmap) + goto error; + + memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap)); } /* fallback to not using altmap */ } @@ -2067,13 +2072,14 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) static int get_vmemmap_altmap_cb(struct memory_block *mem, void *arg) { - struct vmem_altmap *altmap = (struct vmem_altmap *)arg; + struct vmem_altmap **altmap = (struct vmem_altmap **)arg; /* * If we have any pages allocated from altmap * return the altmap details and break callback. */ if (mem->altmap) { - memcpy(altmap, mem->altmap, sizeof(struct vmem_altmap)); + *altmap = mem->altmap; + mem->altmap = NULL; return 1; } return 0; @@ -2152,7 +2158,7 @@ EXPORT_SYMBOL(try_offline_node); static int __ref try_remove_memory(u64 start, u64 size) { int ret; - struct vmem_altmap mhp_altmap, *altmap = NULL; + struct vmem_altmap *altmap = NULL; int rc = 0, nid = NUMA_NO_NODE; BUG_ON(check_hotplug_memory_range(start, size)); @@ -2174,7 +2180,7 @@ static int __ref try_remove_memory(u64 start, u64 size) * We only support removing memory added with MHP_MEMMAP_ON_MEMORY in * the same granularity it was added - a single memory block. */ - ret = walk_memory_blocks(start, size, &mhp_altmap, + ret = walk_memory_blocks(start, size, &altmap, get_vmemmap_altmap_cb); if (ret) { if (size != memory_block_size_bytes()) { @@ -2183,7 +2189,6 @@ static int __ref try_remove_memory(u64 start, u64 size) start, start + size); return -EINVAL; } - altmap = &mhp_altmap; } /* remove memmap entry */ @@ -2203,8 +2208,10 @@ static int __ref try_remove_memory(u64 start, u64 size) * Now that we are tracking alloc and free correctly * we can add check to verify altmap free pages. */ - if (altmap) + if (altmap) { WARN(altmap->alloc, "Altmap not fully unmapped"); + kfree(altmap); + } if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { memblock_phys_free(start, size);