On Sat, Aug 03, 2024 at 11:58:13AM -0700, Andrew Morton wrote: > On Fri, 2 Aug 2024 10:49:22 +0100 Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> wrote: > > > > --- a/mm/mm_init.c > > > +++ b/mm/mm_init.c > > > @@ -1838,11 +1838,10 @@ void __init free_area_init(unsigned long *max_zone_pfn) > > > > > > if (!node_online(nid)) { > > > /* Allocator not initialized yet */ > > > - pgdat = arch_alloc_nodedata(nid); > > > + pgdat = memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); > > > if (!pgdat) > > > panic("Cannot allocate %zuB for node %d.\n", > > > sizeof(*pgdat), nid); > > > - arch_refresh_nodedata(nid, pgdat); > > > > This allocates pgdat but never sets node_data[nid] to it > > and promptly leaks it on the line below. > > > > Just to sanity check this I spun up a qemu machine with no memory > > initially present on some nodes and it went boom as you'd expect. > > > > I tested with addition of > > NODE_DATA(nid) = pgdat; > > and it all seems to work as expected. > > Thanks, I added that. It blew up on x86_64 allnoconfig because > node_data[] (and hence NODE_DATA()) isn't an lvalue when CONFIG_NUMA=n. > > I'll put some #ifdef CONFIG_NUMAs in there for now but > > a) NODE_DATA() is upper-case. Implies "constant". Shouldn't be assigned to. > > b) NODE_DATA() should be non-lvalue when CONFIG_NUMA=y also. But no, > we insist on implementing things in cpp instead of in C. This looks like a candidate for a separate tree-wide cleanup. > c) In fact assigning to anything which ends in "()" is nuts. Please > clean up my tempfix. > > c) Mike, generally I'm wondering if there's a bunch of code here > which isn't needed on CONFIG_NUMA=n. Please check all of this for > unneeded bloatiness. I believe the patch addresses your concerns, just with this the commit log needs update. Instead of Replace the call to arch_alloc_nodedata() in free_area_init() with memblock_alloc(), remove arch_refresh_nodedata() and cleanup include/linux/memory_hotplug.h from the associated ifdefery. it should be Replace the call to arch_alloc_nodedata() in free_area_init() with a new helper alloc_offline_node_data(), remove arch_refresh_nodedata() and cleanup include/linux/memory_hotplug.h from the associated ifdefery. I can send an updated patch if you prefer. diff --git a/include/linux/numa.h b/include/linux/numa.h index 3b12d8ca0afd..5a749fd67f39 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -34,6 +34,7 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) void __init alloc_node_data(int nid); +void __init alloc_offline_node_data(int nit); /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); @@ -62,6 +63,8 @@ static inline int phys_to_target_node(u64 start) { return 0; } + +static inline void alloc_offline_node_data(int nit) {} #endif #define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) diff --git a/mm/mm_init.c b/mm/mm_init.c index bcc2f2dd8021..2785be04e7bb 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1836,13 +1836,8 @@ void __init free_area_init(unsigned long *max_zone_pfn) for_each_node(nid) { pg_data_t *pgdat; - if (!node_online(nid)) { - /* Allocator not initialized yet */ - pgdat = memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); - if (!pgdat) - panic("Cannot allocate %zuB for node %d.\n", - sizeof(*pgdat), nid); - } + if (!node_online(nid)) + alloc_offline_node_data(nid); pgdat = NODE_DATA(nid); free_area_init_node(nid); diff --git a/mm/numa.c b/mm/numa.c index da27eb151dc5..07e486a977c7 100644 --- a/mm/numa.c +++ b/mm/numa.c @@ -34,6 +34,18 @@ void __init alloc_node_data(int nid) memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); } +void __init alloc_offline_node_data(int nit) +{ + pg_data_t *pgdat; + + pgdat = memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); + if (!pgdat) + panic("Cannot allocate %zuB for node %d.\n", + sizeof(*pgdat), nid); + + node_data[nid] = pgdat; +} + /* Stub functions: */ #ifndef memory_add_physaddr_to_nid -- Sincerely yours, Mike.