Similar to how generic memory_add_physaddr_to_nid() interrogates memblock data for numa information, introduce get_reserved_pfn_range_from_nid() to enable the same operation for reserved memory ranges. Example memory ranges that are reserved, but still have associated numa-info are persistent memory or Soft Reserved (EFI_MEMORY_SP) memory. Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx> Cc: Jia He <justin.he@xxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- include/linux/memblock.h | 4 +++ include/linux/mm.h | 2 + include/linux/numa.h | 2 + mm/memblock.c | 22 ++++++++++++++-- mm/page_alloc.c | 63 +++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 87 insertions(+), 6 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 017fae833d4a..0655e8376c72 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -234,6 +234,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) +#define for_each_reserved_pfn_range(i, nid, p_start, p_end, p_nid) \ + for (i = -1, __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid); \ + i >= 0; __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid)) + #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, unsigned long *out_spfn, diff --git a/include/linux/mm.h b/include/linux/mm.h index 1e76ee5da20b..82dac9f42c46 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2438,6 +2438,8 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn, extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); +extern void get_reserved_pfn_range_for_nid(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn); extern unsigned long find_min_pfn_with_active_regions(void); extern void sparse_memory_present_with_active_regions(int nid); diff --git a/include/linux/numa.h b/include/linux/numa.h index 5d25c5de1322..52b2430bc759 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -19,7 +19,7 @@ int numa_map_to_online_node(int node); /* * Optional architecture specific implementation, users need a "depends - * on $ARCH" + * on $ARCH" or depends on CONFIG_MEMBLOCK_NUMA_INFO */ int phys_to_target_node(phys_addr_t addr); #else diff --git a/mm/memblock.c b/mm/memblock.c index 39aceafc57f6..43c3abab705e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1200,11 +1200,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, /* * Common iterator interface used to define for_each_mem_pfn_range(). */ -void __init_memblock __next_mem_pfn_range(int *idx, int nid, +static void __init_memblock __next_memblock_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, - unsigned long *out_end_pfn, int *out_nid) + unsigned long *out_end_pfn, int *out_nid, + struct memblock_type *type) { - struct memblock_type *type = &memblock.memory; struct memblock_region *r; int r_nid; @@ -1230,6 +1230,22 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, *out_nid = r_nid; } +void __init_memblock __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid, + &memblock.memory); +} + +void __init_memblock __next_reserved_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid, + &memblock.reserved); +} + /** * memblock_set_node - set node ID on memblock regions * @base: base of area to set node ID for diff --git a/mm/page_alloc.c b/mm/page_alloc.c index df8bd169dbb4..94ad77c0c338 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6369,12 +6369,39 @@ void __init_or_memblock get_pfn_range_for_nid(unsigned int nid, *start_pfn = 0; } +/** + * get_reserved_pfn_range_for_nid - Return the start and end page frames for a node + * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. + * @start_pfn: Passed by reference. On return, it will have the node start_pfn. + * @end_pfn: Passed by reference. On return, it will have the node end_pfn. + * + * Mostly identical to get_pfn_range_for_nid() except it operates on + * reserved ranges rather than online memory. + */ +void __init_or_memblock get_reserved_pfn_range_for_nid(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + unsigned long this_start_pfn, this_end_pfn; + int i; + + *start_pfn = -1UL; + *end_pfn = 0; + + for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { + *start_pfn = min(*start_pfn, this_start_pfn); + *end_pfn = max(*end_pfn, this_end_pfn); + } + + if (*start_pfn == -1UL) + *start_pfn = 0; +} + /* * Generic implementation of memory_add_physaddr_to_nid() depends on * architecture using memblock data for numa information. */ #ifdef CONFIG_MEMBLOCK_NUMA_INFO -int __init_or_memblock memory_add_physaddr_to_nid(u64 addr) +static int __init_or_memblock __memory_add_physaddr_to_nid(u64 addr) { unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr); int nid; @@ -6384,10 +6411,42 @@ int __init_or_memblock memory_add_physaddr_to_nid(u64 addr) if (pfn >= start_pfn && pfn <= end_pfn) return nid; } + return NUMA_NO_NODE; +} + +int __init_or_memblock memory_add_physaddr_to_nid(u64 addr) +{ + int nid = __memory_add_physaddr_to_nid(addr); + /* Default to node0 as not all callers are prepared for this to fail */ - return 0; + if (nid == NUMA_NO_NODE) + return 0; + return nid; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); + +int __init_or_memblock phys_to_target_node(u64 addr) +{ + unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr); + int nid = __memory_add_physaddr_to_nid(addr); + + if (nid != NUMA_NO_NODE) + return nid; + + /* + * Search reserved memory ranges since the memory address does + * not appear to be online + */ + for_each_possible_node(nid) { + if (node_online(nid)) + continue; + get_reserved_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + if (pfn >= start_pfn && pfn <= end_pfn) + return nid; + } + return NUMA_NO_NODE; +} +EXPORT_SYMBOL_GPL(phys_to_target_node); #endif /* CONFIG_MEMBLOCK_NUMA_INFO */ /*