On Tue, 31 May 2022, Helge Deller wrote: > Hello Mikulas, > > On 5/31/22 12:43, Mikulas Patocka wrote: > > The kernel 5.18 doesn't boot on my C8000 with two dual-core CPUs. It hangs > > at this point: > > > > [ 0.000000] NR_IRQS: 80 > > [ 0.000002] sched_clock: 64 bits at 1000MHz, resolution 1ns, wraps every 4398046511103ns > > [ 0.107049] Console: colour dummy device 160x64 > > [ 0.166791] Calibrating delay loop... 1991.60 BogoMIPS (lpj=3317760) > > [ 0.270079] pid_max: default: 32768 minimum: 301 > > [ 0.330158] random: get_random_bytes called from net_ns_init+0x118/0x4c8 with crng_init=0 > > [ 0.330394] Mount-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) > > [ 0.536892] Mountpoint-cache hash table entries: 16384 (order: 5, 131072 bytes, linear) > > [ 0.644729] cblist_init_generic: Setting adjustable number of callback queues. > > [ 0.740083] cblist_init_generic: Setting shift to 2 and lim to 1. > > [ 0.820179] TOC handler registered > > [ 0.866852] rcu: Hierarchical SRCU implementation. > > [ 0.930550] smp: Bringing up secondary CPUs ... > > [ 0.990084] smp: Brought up 1 node, 1 CPU > > > > I bisected it and it is caused by the commit > > 62773112acc55d29727465d075fc61ed08a0a532 ("parisc: Switch from > > GENERIC_CPU_DEVICES to GENERIC_ARCH_TOPOLOGY") > > > > When I revert this patch on the kernel 5.18 (and resolve several > > conflcits), the kernel boots. > > You are really sure that you test v5.18-final, right? Yes. > If not there were multiple hickups during the -rc phases of v5.18, and > bisecting through the -rc versions won't help much because we had multiple > issues which conflicted with each other and prevented a boot. I hit some crashes when running userspace when bisecting, but I bisected them as "good" and I only bisected the hang at "smp: Brought up 1 node, 1 CPU" as "bad". And bisect showed the faulty commit - reverting that commit makes the kernel bool. > But those should have been resolved with my "for-5.18/parisc-3" push, which e.g. > partly re-enabled GENERIC_CPU_DEVICES: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f002488d80b557c7dc540457b176011449895fcb > Esp. this commit ("Re-enable GENERIC_CPU_DEVICES for !SMP") fixed it for non-SMP: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1955c4f879a130c7822f483cf593338ad747aed4 These commits are already present in 5.18. They don't fix the hang. > So, right now I'm somehow lost how I can help. If you don't know what causes it, I can try to add some debug printks into the kernel and analyze it when I have time. > You could you send me your .config, then I'll check locally. > Please also try a "make oldconfig" with your .config so that the options don't have any leftovers. I uploaded my config here: https://people.redhat.com/~mpatocka/testcases/parisc-config-5.18.txt > Helge This is quick-and-dirty revert of the patch 62773112acc55d29727465d075fc61ed08a0a532. When it is applied on 5.18, the kernel boots fine. I don't intend to try to push this into the kernel, but it may help with debugging the problem. Mikulas --- arch/parisc/Kconfig | 11 ++++- arch/parisc/include/asm/topology.h | 23 ++++++++++- arch/parisc/kernel/Makefile | 2 - arch/parisc/kernel/processor.c | 5 -- arch/parisc/kernel/smp.c | 2 - arch/parisc/kernel/topology.c | 72 +++++++++++++++++++++++++++++-------- 6 files changed, 89 insertions(+), 26 deletions(-) Index: linux-5.18.1/arch/parisc/Kconfig =================================================================== --- linux-5.18.1.orig/arch/parisc/Kconfig 2022-05-31 12:12:11.000000000 +0200 +++ linux-5.18.1/arch/parisc/Kconfig 2022-05-31 12:12:11.000000000 +0200 @@ -37,7 +37,7 @@ config PARISC select GENERIC_PCI_IOMAP select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD - select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_CPU_DEVICES select GENERIC_CPU_DEVICES if !SMP select GENERIC_LIB_DEVMEM_IS_ALLOWED select SYSCTL_ARCH_UNALIGN_ALLOW @@ -281,9 +281,16 @@ config SMP If you don't know what to do here, say N. +config PARISC_CPU_TOPOLOGY + bool "Support cpu topology definition" + depends on SMP + default y + help + Support PARISC cpu topology definition. + config SCHED_MC bool "Multi-core scheduler support" - depends on GENERIC_ARCH_TOPOLOGY && PA8X00 + depends on PARISC_CPU_TOPOLOGY && PA8X00 help Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly Index: linux-5.18.1/arch/parisc/include/asm/topology.h =================================================================== --- linux-5.18.1.orig/arch/parisc/include/asm/topology.h 2022-05-31 12:12:11.000000000 +0200 +++ linux-5.18.1/arch/parisc/include/asm/topology.h 2022-05-31 12:12:11.000000000 +0200 @@ -1,16 +1,33 @@ #ifndef _ASM_PARISC_TOPOLOGY_H #define _ASM_PARISC_TOPOLOGY_H -#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY +#ifdef CONFIG_PARISC_CPU_TOPOLOGY #include <linux/cpumask.h> -#include <linux/arch_topology.h> + +struct cputopo_parisc { + int thread_id; + int core_id; + int socket_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +extern struct cputopo_parisc cpu_topology[NR_CPUS]; + +#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) +#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); #else static inline void init_cpu_topology(void) { } static inline void store_cpu_topology(unsigned int cpuid) { } -static inline void reset_cpu_topology(void) { } #endif Index: linux-5.18.1/arch/parisc/kernel/Makefile =================================================================== --- linux-5.18.1.orig/arch/parisc/kernel/Makefile 2022-05-31 12:12:11.000000000 +0200 +++ linux-5.18.1/arch/parisc/kernel/Makefile 2022-05-31 12:12:11.000000000 +0200 @@ -31,7 +31,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o # only supported for PCX-W/U in 64-bit mode at the moment obj-$(CONFIG_64BIT) += perf.o perf_asm.o $(obj64-y) -obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += topology.o +obj-$(CONFIG_PARISC_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o Index: linux-5.18.1/arch/parisc/kernel/processor.c =================================================================== --- linux-5.18.1.orig/arch/parisc/kernel/processor.c 2022-05-31 12:12:11.000000000 +0200 +++ linux-5.18.1/arch/parisc/kernel/processor.c 2022-05-31 12:12:11.000000000 +0200 @@ -19,7 +19,6 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/cpu.h> -#include <asm/topology.h> #include <asm/param.h> #include <asm/cache.h> #include <asm/hardware.h> /* for register_parisc_driver() stuff */ @@ -392,7 +391,7 @@ show_cpuinfo (struct seq_file *m, void * boot_cpu_data.cpu_hz / 1000000, boot_cpu_data.cpu_hz % 1000000 ); -#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY +#ifdef CONFIG_PARISC_CPU_TOPOLOGY seq_printf(m, "physical id\t: %d\n", topology_physical_package_id(cpu)); seq_printf(m, "siblings\t: %d\n", @@ -463,8 +462,6 @@ void __init processor_init(void) { unsigned int cpu; - reset_cpu_topology(); - /* reset possible mask. We will mark those which are possible. */ for_each_possible_cpu(cpu) set_cpu_possible(cpu, false); Index: linux-5.18.1/arch/parisc/kernel/topology.c =================================================================== --- linux-5.18.1.orig/arch/parisc/kernel/topology.c 2022-05-31 12:12:11.000000000 +0200 +++ linux-5.18.1/arch/parisc/kernel/topology.c 2022-05-31 12:12:11.000000000 +0200 @@ -13,12 +13,46 @@ #include <linux/percpu.h> #include <linux/sched.h> #include <linux/sched/topology.h> -#include <linux/cpu.h> #include <asm/topology.h> #include <asm/sections.h> -static DEFINE_PER_CPU(struct cpu, cpu_devices); + /* + * cpu topology table + */ +struct cputopo_parisc cpu_topology[NR_CPUS] __read_mostly; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_parisc *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} static int dualcores_found; @@ -29,7 +63,7 @@ static int dualcores_found; */ void store_cpu_topology(unsigned int cpuid) { - struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; + struct cputopo_parisc *cpuid_topo = &cpu_topology[cpuid]; struct cpuinfo_parisc *p; int max_socket = -1; unsigned long cpu; @@ -38,12 +72,6 @@ void store_cpu_topology(unsigned int cpu if (cpuid_topo->core_id != -1) return; -#ifdef CONFIG_HOTPLUG_CPU - per_cpu(cpu_devices, cpuid).hotpluggable = 1; -#endif - if (register_cpu(&per_cpu(cpu_devices, cpuid), cpuid)) - pr_warn("Failed to register CPU%d device", cpuid); - /* create cpu topology mapping */ cpuid_topo->thread_id = -1; cpuid_topo->core_id = 0; @@ -59,25 +87,25 @@ void store_cpu_topology(unsigned int cpu cpuid_topo->core_id = cpu_topology[cpu].core_id; if (p->cpu_loc) { cpuid_topo->core_id++; - cpuid_topo->package_id = cpu_topology[cpu].package_id; + cpuid_topo->socket_id = cpu_topology[cpu].socket_id; dualcores_found = 1; continue; } } - if (cpuid_topo->package_id == -1) - max_socket = max(max_socket, cpu_topology[cpu].package_id); + if (cpuid_topo->socket_id == -1) + max_socket = max(max_socket, cpu_topology[cpu].socket_id); } - if (cpuid_topo->package_id == -1) - cpuid_topo->package_id = max_socket + 1; + if (cpuid_topo->socket_id == -1) + cpuid_topo->socket_id = max_socket + 1; update_siblings_masks(cpuid); pr_info("CPU%u: cpu core %d of socket %d\n", cpuid, cpu_topology[cpuid].core_id, - cpu_topology[cpuid].package_id); + cpu_topology[cpuid].socket_id); } static struct sched_domain_topology_level parisc_mc_topology[] = { @@ -95,6 +123,20 @@ static struct sched_domain_topology_leve */ void __init init_cpu_topology(void) { + unsigned int cpu; + + /* init core mask and capacity */ + for_each_possible_cpu(cpu) { + struct cputopo_parisc *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->socket_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + } + smp_wmb(); + /* Set scheduler topology descriptor */ if (dualcores_found) set_sched_topology(parisc_mc_topology); Index: linux-5.18.1/arch/parisc/kernel/smp.c =================================================================== --- linux-5.18.1.orig/arch/parisc/kernel/smp.c 2022-05-31 12:12:11.000000000 +0200 +++ linux-5.18.1/arch/parisc/kernel/smp.c 2022-05-31 12:12:11.000000000 +0200 @@ -457,7 +457,7 @@ int __cpu_disable(void) #ifdef CONFIG_HOTPLUG_CPU unsigned int cpu = smp_processor_id(); - remove_cpu_topology(cpu); + /*remove_cpu_topology(cpu);*/ /* * Take this CPU offline. Once we clear this, we can't return,