On Tue, Nov 17, 2015 at 11:55:32PM +0530, Ganapatrao Kulkarni wrote: [...] > diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile > index 7987763..555c4a5 100644 > --- a/arch/arm64/kernel/Makefile > +++ b/arch/arm64/kernel/Makefile > @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_PCI) += pci.o > arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o > arm64-obj-$(CONFIG_ACPI) += acpi.o > arm64-obj-$(CONFIG_OF_NUMA) += of_numa.o > +arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o Isn't it better to merge ACPI and DT support in one file (a bit like what we did for smp.c) to remove some of this iffdeffery ? > > obj-y += $(arm64-obj-y) vdso/ > obj-m += $(arm64-obj-m) > diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c > new file mode 100644 > index 0000000..8aee205 > --- /dev/null > +++ b/arch/arm64/kernel/acpi_numa.c > @@ -0,0 +1,215 @@ > +/* > + * ACPI 5.1 based NUMA setup for ARM64 > + * Lots of code was borrowed from arch/x86/mm/srat.c > + * > + * Copyright 2004 Andi Kleen, SuSE Labs. > + * Copyright (C) 2013-2014, Linaro Ltd. > + * Author: Hanjun Guo <hanjun.guo@xxxxxxxxxx> > + * > + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. > + * > + * Called from acpi_numa_init while reading the SRAT and SLIT tables. > + * Assumes all memory regions belonging to a single proximity domain > + * are in one chunk. Holes between them will be included in the node. > + */ > + > +#define pr_fmt(fmt) "ACPI: NUMA: " fmt > + > +#include <linux/acpi.h> > +#include <linux/bitmap.h> > +#include <linux/bootmem.h> > +#include <linux/kernel.h> > +#include <linux/mm.h> > +#include <linux/memblock.h> > +#include <linux/mmzone.h> > +#include <linux/module.h> > +#include <linux/topology.h> > + > +#include <acpi/processor.h> > +#include <asm/numa.h> > + > +int acpi_numa __initdata; > + > +static __init int setup_node(int pxm) > +{ > + return acpi_map_pxm_to_node(pxm); > +} This function is not that useful given how it is used in the patch. > + > +static __init void bad_srat(void) > +{ > + pr_err("SRAT not used.\n"); > + acpi_numa = -1; > +} > + > +static __init inline int srat_disabled(void) > +{ > + return acpi_numa < 0; > +} > + > +/* > + * Callback for SLIT parsing. > + * It will get the distance information presented by SLIT > + * and init the distance matrix of numa nodes > + */ > +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) > +{ > + int i, j; > + > + for (i = 0; i < slit->locality_count; i++) { > + const int from_node = pxm_to_node(i); > + > + if (from_node == NUMA_NO_NODE) > + continue; > + > + for (j = 0; j < slit->locality_count; j++) { > + const int to_node = pxm_to_node(j); > + > + if (to_node == NUMA_NO_NODE) > + continue; > + > + pr_info("SLIT: Distance[%d][%d] = %d\n", > + from_node, to_node, > + slit->entry[ > + slit->locality_count * i + j]); > + numa_set_distance(from_node, to_node, > + slit->entry[slit->locality_count * i + j]); > + } > + } > +} This function is an x86 copy'n'paste. ia64 just requires this callback to save a slit table pointer (that can be moved to generic code and it is initdata anyway), so my question is, do we really need to duplicate it ? > +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr) > +{ Looks familiar. I guess you can't reuse the code in drivers/acpi (acpi_map_cpuid()) only because that implies permanent table mappings to be in place and you need to call this function before acpi_gbl_permanent_mmap is set ? > + unsigned long madt_end, entry; > + struct acpi_table_madt *madt; > + acpi_size tbl_size; > + > + if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0, > + (struct acpi_table_header **)&madt, &tbl_size))) > + return -ENODEV; > + > + entry = (unsigned long)madt; > + madt_end = entry + madt->header.length; > + > + /* Parse all entries looking for a match. */ > + entry += sizeof(struct acpi_table_madt); > + while (entry + sizeof(struct acpi_subtable_header) < madt_end) { > + struct acpi_subtable_header *header = > + (struct acpi_subtable_header *)entry; > + > + if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) { > + struct acpi_madt_generic_interrupt *gicc = > + container_of(header, > + struct acpi_madt_generic_interrupt, header); > + > + if ((gicc->flags & ACPI_MADT_ENABLED) && > + (gicc->uid == acpi_id)) { > + *mpidr = gicc->arm_mpidr; > + early_acpi_os_unmap_memory(madt, tbl_size); > + return 0; > + } > + } > + entry += header->length; > + } > + > + early_acpi_os_unmap_memory(madt, tbl_size); > + return -ENODEV; > +} > + > +/* Callback for Proximity Domain -> ACPI processor UID mapping */ > +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) > +{ > + int pxm, node; > + u64 mpidr; > + static int cpus_in_srat; > + > + if (srat_disabled()) > + return; > + > + if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) { > + bad_srat(); > + return; > + } > + > + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) > + return; > + > + if (cpus_in_srat >= NR_CPUS) { > + pr_warn_once("SRAT: cpu_to_node_map[%ld] is too small, may not be able to use all cpus\n", > + NR_CPUS); > + return; > + } > + > + pxm = pa->proximity_domain; > + node = setup_node(pxm); > + > + if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { > + pr_err("SRAT: Too many proximity domains %d\n", pxm); > + bad_srat(); > + return; > + } > + > + if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) { > + pr_warn("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", > + pxm, pa->acpi_processor_uid); > + bad_srat(); > + return; > + } > + > + cpu_to_node_map[cpus_in_srat] = node; This looks wrong. cpus_in_srat is a logical index, but I do not see why it has to be sequential. You retrieve the mpidr for a given SRAT entry and with that value you should retrieve the cpu_logical index that corresponds to it (get_logical_index()), or maybe I am missing something from the ACPI specs that enforce a SRAT entries ordering, on which we should not rely upon anyway. > + node_set(node, numa_nodes_parsed); > + acpi_numa = 1; What's the point if you are checking for < 0 in srat_disabled() ? > + cpus_in_srat++; > + pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n", > + pxm, mpidr, node, cpus_in_srat); > +} > + > +/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ > +int __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) > +{ > + u64 start, end; > + int node, pxm; > + > + if (srat_disabled()) > + return -EINVAL; > + > + if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { > + bad_srat(); > + return -EINVAL; > + } > + > + start = ma->base_address; > + end = start + ma->length; > + pxm = ma->proximity_domain; > + > + node = setup_node(pxm); > + > + if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { > + pr_err("SRAT: Too many proximity domains.\n"); > + bad_srat(); > + return -EINVAL; > + } > + > + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", > + node, pxm, > + (unsigned long long) start, (unsigned long long) end - 1); > + > + if (numa_add_memblk(node, start, (end - start)) < 0) { > + bad_srat(); > + return -EINVAL; > + } > + > + return 0; > +} I do not see again any major changes compared to x86, numa_add_memblk() has a different interface (size vs end) but apart from that it would be nice to avoid rewriting the same code time and again. > + > +void __init acpi_numa_arch_fixup(void) { } Sigh. How many of these useless callbacks are we forced to define ? > + > +int __init arm64_acpi_numa_init(void) > +{ > + int ret; > + > + ret = acpi_numa_init(); > + if (ret < 0) > + return ret; > + > + return srat_disabled() ? -EINVAL : 0; > +} > diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c > index 209c7a9..c2950fc 100644 > --- a/arch/arm64/mm/numa.c > +++ b/arch/arm64/mm/numa.c > @@ -17,6 +17,7 @@ > * along with this program. If not, see <http://www.gnu.org/licenses/>. > */ > > +#include <linux/acpi.h> > #include <linux/bootmem.h> > #include <linux/ctype.h> > #include <linux/init.h> > @@ -383,9 +384,13 @@ void __init arm64_numa_init(void) > int ret = -ENODEV; > > #ifdef CONFIG_OF_NUMA > - if (!numa_off) > + if (!numa_off && acpi_disabled) > ret = numa_init(arm64_of_numa_init); > #endif > +#ifdef CONFIG_ACPI_NUMA > + if (!numa_off && !acpi_disabled) > + ret = numa_init(arm64_acpi_numa_init); > +#endif See my comment above on DT/ACPI, this ifdeffery does not look great. Thanks, Lorenzo > > if (ret) > numa_init(dummy_numa_init); > -- > 1.8.1.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html