On Tue, Mar 5, 2024 at 1:24 PM Sunil V L <sunilvl@xxxxxxxxxxxxxxxx> wrote: > > On Wed, Jan 31, 2024 at 10:32:00AM +0800, Haibo Xu wrote: > > Add acpi_numa.c file to enable parse NUMA information from > > ACPI SRAT and SLIT tables. SRAT table provide CPUs(Hart) and > > memory nodes to proximity domain mapping, while SLIT table > > provide the distance metrics between proximity domains. > > > > Signed-off-by: Haibo Xu <haibo1.xu@xxxxxxxxx> > > --- > > arch/riscv/include/asm/acpi.h | 15 +++- > > arch/riscv/kernel/Makefile | 1 + > > arch/riscv/kernel/acpi.c | 5 -- > > arch/riscv/kernel/acpi_numa.c | 133 ++++++++++++++++++++++++++++++++++ > > arch/riscv/kernel/setup.c | 4 +- > > arch/riscv/kernel/smpboot.c | 2 - > > drivers/acpi/numa/srat.c | 3 +- > > include/linux/acpi.h | 4 + > > 8 files changed, 156 insertions(+), 11 deletions(-) > > create mode 100644 arch/riscv/kernel/acpi_numa.c > > > > diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h > > index 7dad0cf9d701..e0a1f84404f3 100644 > > --- a/arch/riscv/include/asm/acpi.h > > +++ b/arch/riscv/include/asm/acpi.h > > @@ -61,11 +61,14 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { } > > > > void acpi_init_rintc_map(void); > > struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu); > > -u32 get_acpi_id_for_cpu(int cpu); > > +static inline u32 get_acpi_id_for_cpu(int cpu) > > +{ > > + return acpi_cpu_get_madt_rintc(cpu)->uid; > > +} > > + > > int acpi_get_riscv_isa(struct acpi_table_header *table, > > unsigned int cpu, const char **isa); > > > > -static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } > > void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size, > > u32 *cboz_size, u32 *cbop_size); > > #else > > @@ -87,4 +90,12 @@ static inline void acpi_get_cbo_block_size(struct acpi_table_header *table, > > > > #endif /* CONFIG_ACPI */ > > > > +#ifdef CONFIG_ACPI_NUMA > > +int acpi_numa_get_nid(unsigned int cpu); > > +void acpi_map_cpus_to_nodes(void); > > +#else > > +static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; } > > +static inline void acpi_map_cpus_to_nodes(void) { } > > +#endif /* CONFIG_ACPI_NUMA */ > > + > > #endif /*_ASM_ACPI_H*/ > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile > > index f71910718053..5d3e9cf89b76 100644 > > --- a/arch/riscv/kernel/Makefile > > +++ b/arch/riscv/kernel/Makefile > > @@ -105,3 +105,4 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ > > > > obj-$(CONFIG_64BIT) += pi/ > > obj-$(CONFIG_ACPI) += acpi.o > > +obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o > > diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c > > index e619edc8b0cc..040bdbfea2b4 100644 > > --- a/arch/riscv/kernel/acpi.c > > +++ b/arch/riscv/kernel/acpi.c > > @@ -191,11 +191,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) > > return &cpu_madt_rintc[cpu]; > > } > > > > -u32 get_acpi_id_for_cpu(int cpu) > > -{ > > - return acpi_cpu_get_madt_rintc(cpu)->uid; > > -} > > - > > /* > > * __acpi_map_table() will be called before paging_init(), so early_ioremap() > > * or early_memremap() should be called here to for ACPI table mapping. > > diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c > > new file mode 100644 > > index 000000000000..493642a61457 > > --- /dev/null > > +++ b/arch/riscv/kernel/acpi_numa.c > > @@ -0,0 +1,133 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * ACPI 6.6 based NUMA setup for RISCV > > + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c > > + * > > + * Copyright 2004 Andi Kleen, SuSE Labs. > > + * Copyright (C) 2013-2016, Linaro Ltd. > > + * Author: Hanjun Guo <hanjun.guo@xxxxxxxxxx> > > + * Copyright (C) 2024 Intel Corporation. > > + * > > + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. > > + * > > + * Called from acpi_numa_init while reading the SRAT and SLIT tables. > > + * Assumes all memory regions belonging to a single proximity domain > > + * are in one chunk. Holes between them will be included in the node. > > + */ > > + > > +#define pr_fmt(fmt) "ACPI: NUMA: " fmt > > + > > +#include <linux/acpi.h> > > +#include <linux/bitmap.h> > > +#include <linux/kernel.h> > > +#include <linux/mm.h> > > +#include <linux/memblock.h> > > +#include <linux/mmzone.h> > > +#include <linux/module.h> > > +#include <linux/topology.h> > > + > > +#include <asm/numa.h> > > + > > +static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; > > + > > +int __init acpi_numa_get_nid(unsigned int cpu) > > +{ > > + return acpi_early_node_map[cpu]; > > +} > > + > > +static inline int get_cpu_for_acpi_id(u32 uid) > > +{ > > + int cpu; > > + > > + for (cpu = 0; cpu < nr_cpu_ids; cpu++) > > + if (uid == get_acpi_id_for_cpu(cpu)) > > + return cpu; > > + > > + return -EINVAL; > > +} > > + > > +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header, > > + const unsigned long end) > > Please check alignment. > Sure. > > +{ > > + struct acpi_srat_rintc_affinity *pa; > > + int cpu, pxm, node; > > + > > + if (srat_disabled()) > > + return -EINVAL; > > + > > + pa = (struct acpi_srat_rintc_affinity *)header; > > + if (!pa) > > + return -EINVAL; > > + > > + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) > > + return 0; > > + > > + pxm = pa->proximity_domain; > > + node = pxm_to_node(pxm); > > + > > + /* > > + * If we can't map the UID to a logical cpu this > > + * means that the UID is not part of possible cpus > > + * so we do not need a NUMA mapping for it, skip > > + * the SRAT entry and keep parsing. > > + */ > > + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); > > + if (cpu < 0) > > + return 0; > > + > > + acpi_early_node_map[cpu] = node; > > + pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm, > > + cpuid_to_hartid_map(cpu), node); > > + > > + return 0; > > +} > > + > > +void __init acpi_map_cpus_to_nodes(void) > > +{ > > + int i; > > + > > + /* > > + * In ACPI, SMP and CPU NUMA information is provided in separate > > + * static tables, namely the MADT and the SRAT. > > + * > > + * Thus, it is simpler to first create the cpu logical map through > > + * an MADT walk and then map the logical cpus to their node ids > > + * as separate steps. > > + */ > > + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), > > + ACPI_SRAT_TYPE_RINTC_AFFINITY, > > + acpi_parse_rintc_pxm, 0); > > + > Alignment here as well. > Sure. > > + for (i = 0; i < nr_cpu_ids; i++) > > + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); > > +} > > + > > +/* Callback for Proximity Domain -> logical node ID mapping */ > > +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) > > +{ > > + int pxm, node; > > + > > + if (srat_disabled()) > > + return; > > + > > + if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) { > > + pr_err("SRAT: Invalid SRAT header length: %d\n", > > + pa->header.length); > Can we merge these into single line? > > > + bad_srat(); > > + return; > > + } > > + > > + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) > > + return; > > + > > + pxm = pa->proximity_domain; > > + node = acpi_map_pxm_to_node(pxm); > > + > > + if (node == NUMA_NO_NODE) { > > + pr_err("SRAT: Too many proximity domains %d\n", pxm); > > + bad_srat(); > > + return; > > + } > > + > > + node_set(node, numa_nodes_parsed); > > +} > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > > index 4f73c0ae44b2..a2cde65b69e9 100644 > > --- a/arch/riscv/kernel/setup.c > > +++ b/arch/riscv/kernel/setup.c > > @@ -281,8 +281,10 @@ void __init setup_arch(char **cmdline_p) > > setup_smp(); > > #endif > > > > - if (!acpi_disabled) > > + if (!acpi_disabled) { > > acpi_init_rintc_map(); > > + acpi_map_cpus_to_nodes(); > Is it not possible to fill up both in single parsing of MADT? > I think it's not possible to fill both in a single MADT parse since the NUMA info is provided in a separate SRAT table. For RISC-V, currently we parsed 2 times of the MADT. FIrst one in setup_smp()->acpi_parse_and_init_cpus() call to build up cpuid_to_hartid_map. Second one in acpi_init_rintc_map call to build the cpu_madt_rintc[] cache structure. Since the first one depends on the CONFIG_SMP, I am not sure whether it's possible to combine these two parts into one. > > + } > > > > riscv_init_cbo_blocksizes(); > > riscv_fill_hwcap(); > > diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c > > index 519b6bd946e5..b188d83d1ec4 100644 > > --- a/arch/riscv/kernel/smpboot.c > > +++ b/arch/riscv/kernel/smpboot.c > > @@ -101,7 +101,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un > > if (hart == cpuid_to_hartid_map(0)) { > > BUG_ON(found_boot_cpu); > > found_boot_cpu = true; > > - early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); > > return 0; > > } > > > > @@ -111,7 +110,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un > > } > > > > cpuid_to_hartid_map(cpu_count) = hart; > > - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); > > cpu_count++; > > > > return 0; > > diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c > > index 503abcf6125d..1f0462cef47c 100644 > > --- a/drivers/acpi/numa/srat.c > > +++ b/drivers/acpi/numa/srat.c > > @@ -219,7 +219,8 @@ int __init srat_disabled(void) > > return acpi_numa < 0; > > } > > > > -#if defined(CONFIG_X86) || defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) > > +#if defined(CONFIG_X86) || defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) || \ > > + defined(CONFIG_RISCV) > Please check alignment. Or make it single line if fits in 100 chars. > Also, it looks it covers most of the architectures now. Is it possible > to simplify / remove the condition? I hope IA64 is removed now? > Good catch! Since IA64 support was removed in commit cf8e8658100d4(https://lwn.net/Articles/923376/). I think it's possible to remove the condition. Will fix it in v2. Thanks, Haibo > May be you need to update the comment at #endif too. > > Thanks > Sunil > > > /* > > * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for > > * I/O localities since SRAT does not list them. I/O localities are > > diff --git a/include/linux/acpi.h b/include/linux/acpi.h > > index a65273db55c6..be78a9d28927 100644 > > --- a/include/linux/acpi.h > > +++ b/include/linux/acpi.h > > @@ -269,8 +269,12 @@ acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } > > > > int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); > > > > +#ifdef CONFIG_RISCV > > +void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa); > > +#else > > static inline void > > acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) { } > > +#endif > > > > #ifndef PHYS_CPUID_INVALID > > typedef u32 phys_cpuid_t; > > -- > > 2.34.1 > >