From: Noam Camus <noamca@xxxxxxxxxxxx> Now it is used for NPS SoC for multi-core of 256 cores and SMT of 16 HW threads per core. This way with topology the scheduler is much efficient in creating domains and later using them. Signed-off-by: Noam Camus <noamca at mellanox.com> --- arch/arc/Kconfig | 27 ++++++++ arch/arc/include/asm/Kbuild | 1 - arch/arc/include/asm/topology.h | 34 +++++++++++ arch/arc/kernel/Makefile | 1 + arch/arc/kernel/setup.c | 4 +- arch/arc/kernel/smp.c | 5 ++ arch/arc/kernel/topology.c | 125 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 194 insertions(+), 3 deletions(-) create mode 100644 arch/arc/include/asm/topology.h create mode 100644 arch/arc/kernel/topology.c diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index f464f97..08a9003 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -202,6 +202,33 @@ config ARC_SMP_HALT_ON_RESET at designated entry point. For other case, all jump to common entry point and spin wait for Master's signal. +config NPS_CPU_TOPOLOGY + bool "Support cpu topology definition" + depends on EZNPS_MTM_EXT + default y + help + Support NPS cpu topology definition. + NPS400 got 16 clusters of cores. + NPS400 cluster got 16 cores. + NPS core got 16 symetrical threads. + Totally there are such 4096 threads (NR_CPUS=4096) + +config SCHED_MC + bool "Multi-core scheduler support" + depends on NPS_CPU_TOPOLOGY + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config SCHED_SMT + bool "SMT scheduler support" + depends on NPS_CPU_TOPOLOGY + help + Improves the CPU scheduler's decision making when dealing with + MultiThreading at a cost of slightly increased overhead in some + places. If unsure say N here. + endif #SMP config ARC_MCIP diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 7bee4e4..d8cb607 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -43,7 +43,6 @@ generic-y += stat.h generic-y += statfs.h generic-y += termbits.h generic-y += termios.h -generic-y += topology.h generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h diff --git a/arch/arc/include/asm/topology.h b/arch/arc/include/asm/topology.h new file mode 100644 index 0000000..a9be3f8 --- /dev/null +++ b/arch/arc/include/asm/topology.h @@ -0,0 +1,34 @@ +#ifndef _ASM_ARC_TOPOLOGY_H +#define _ASM_ARC_TOPOLOGY_H + +#ifdef CONFIG_NPS_CPU_TOPOLOGY + +#include <linux/cpumask.h> + +struct cputopo_nps { + int thread_id; + int core_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +extern struct cputopo_nps cpu_topology[NR_CPUS]; + +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) +#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); + +#else + +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { } + +#endif + +#include <asm-generic/topology.h> + +#endif /* _ASM_ARC_TOPOLOGY_H */ diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 8942c5c..46af80a 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_NPS_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o CFLAGS_fpu.o += -mdpfp diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 8494b31..5256205 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -571,14 +571,14 @@ static void c_stop(struct seq_file *m, void *v) .show = show_cpuinfo }; -static DEFINE_PER_CPU(struct cpu, cpu_topology); +static DEFINE_PER_CPU(struct cpu, cpu_topo_info); static int __init topology_init(void) { int cpu; for_each_present_cpu(cpu) - register_cpu(&per_cpu(cpu_topology, cpu), cpu); + register_cpu(&per_cpu(cpu_topo_info, cpu), cpu); return 0; } diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index d1aa917..167a620 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -67,6 +67,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { int i; + init_cpu_topology(); + store_cpu_topology(smp_processor_id()); + /* * if platform didn't set the present map already, do it now * boot cpu is set to present already by init/main.c @@ -151,6 +154,8 @@ void start_kernel_secondary(void) if (machine_desc->init_per_cpu) machine_desc->init_per_cpu(cpu); + store_cpu_topology(cpu); + notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/arc/kernel/topology.c b/arch/arc/kernel/topology.c new file mode 100644 index 0000000..3feb7c9 --- /dev/null +++ b/arch/arc/kernel/topology.c @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sched/topology.h> +#include <plat/smp.h> + +/* + * cpu topology table + */ +struct cputopo_nps cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_nps *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + struct global_id global_topo, global_id_topo; + + global_id_topo.value = cpuid; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + global_topo.value = cpu; + + if (global_id_topo.cluster != global_topo.cluster) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + + /* Do not proceed before masks are written */ + smp_wmb(); +} + +/* + * store_cpu_topology is called at boot when only one cpu is running + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, + * which prevents simultaneous write access to cpu_topology array + */ +void store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_nps *cpuid_topo = &cpu_topology[cpuid]; + struct global_id gid; + + /* If the cpu topology has been already set, just return */ + if (cpuid_topo->core_id != -1) + return; + + gid.value = cpuid; + + cpuid_topo->thread_id = gid.thread; + cpuid_topo->core_id = ((gid.cluster << 4) | gid.core); + + update_siblings_masks(cpuid); + + pr_debug("CPU%u: thread %d, core %d\n", + cpuid, cpu_topology[cpuid].thread_id, + cpu_topology[cpuid].core_id); +} + +static struct sched_domain_topology_level nps_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask */ + for_each_possible_cpu(cpu) { + struct cputopo_nps *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + } + + /* Do not proceed before masks are written */ + smp_wmb(); + + /* Set scheduler topology descriptor */ + set_sched_topology(nps_topology); +} -- 1.7.1