Alexey, this is still not finalized but it would really help if you could give it a spin on your setup. I still have to think about how to transition from a memoryless node to standard node (in hotplug code). Also there might be other surprises on the way. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c5952749ad40..8ed8db2ccb13 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6382,7 +6382,11 @@ static void __build_all_zonelists(void *data) if (self && !node_online(self->node_id)) { build_zonelists(self); } else { - for_each_online_node(nid) { + /* + * All possible nodes have pgdat preallocated + * free_area_init + */ + for_each_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); build_zonelists(pgdat); @@ -8032,8 +8036,32 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); + for_each_node(nid) { + pg_data_t *pgdat; + + if (!node_online(nid)) { + pr_warn("Node %d uninitialized by the platform. Please report with boot dmesg.\n", nid); + + /* Allocator not initialized yet */ + pgdat = memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); + if (!pgdat) { + pr_err("Cannot allocate %zuB for node %d.\n", + sizeof(*pgdat), nid); + continue; + } + /* TODO do we need this for memoryless nodes */ + pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); + arch_refresh_nodedata(nid, pgdat); + free_area_init_memoryless_node(nid); + /* + * not marking this node online because we do not want to + * confuse userspace by sysfs files/directories for node + * without any memory attached to it (see topology_init) + */ + continue; + } + + pgdat = NODE_DATA(nid); free_area_init_node(nid); /* Any memory on that node */ -- Michal Hocko SUSE Labs