Hi Conor & Alexandre, On Fri, Nov 29, 2024 at 10:58 AM Guo Ren <guoren@xxxxxxxxxx> wrote: > > On Fri, Nov 29, 2024 at 8:55 AM Guo Ren <guoren@xxxxxxxxxx> wrote: > > > > On Fri, Nov 29, 2024 at 12:19 AM Conor Dooley <conor@xxxxxxxxxx> wrote: > > > > > > On Thu, Nov 28, 2024 at 03:50:09PM +0100, Alexandre Ghiti wrote: > > > > On 28/11/2024 15:14, Conor Dooley wrote: > > > > > On Thu, Nov 28, 2024 at 01:41:36PM +0000, Will Deacon wrote: > > > > > > On Thu, Nov 28, 2024 at 12:56:55PM +0000, Conor Dooley wrote: > > > > > > > On Sun, Nov 03, 2024 at 03:51:53PM +0100, Alexandre Ghiti wrote: > > > > > > > > In order to produce a generic kernel, a user can select > > > > > > > > CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket > > > > > > > > spinlock implementation if Zabha or Ziccrse are not present. > > > > > > > > > > > > > > > > Note that we can't use alternatives here because the discovery of > > > > > > > > extensions is done too late and we need to start with the qspinlock > > > > > > > > implementation because the ticket spinlock implementation would pollute > > > > > > > > the spinlock value, so let's use static keys. > > > > > > > > > > > > > > > > This is largely based on Guo's work and Leonardo reviews at [1]. > > > > > > > > > > > > > > > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@xxxxxxxxxx/ [1] > > > > > > > > Signed-off-by: Guo Ren <guoren@xxxxxxxxxx> > > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> > > > > > > > This patch (now commit ab83647fadae2 ("riscv: Add qspinlock support")) > > > > > > > breaks boot on polarfire soc. It dies before outputting anything to the > > > > > > > console. My .config has: > > > > > > > > > > > > > > # CONFIG_RISCV_TICKET_SPINLOCKS is not set > > > > > > > # CONFIG_RISCV_QUEUED_SPINLOCKS is not set > > > > > > > CONFIG_RISCV_COMBO_SPINLOCKS=y > > > > > > I pointed out some of the fragility during review: > > > > > > > > > > > > https://lore.kernel.org/all/20241111164259.GA20042@willie-the-truck/ > > > > > > > > > > > > so I'm kinda surprised it got merged tbh :/ > > > > > Maybe it could be reverted rather than having a broken boot with the > > > > > default settings in -rc1. > > > > > > > > > > > > No need to rush before we know what's happening,I guess you bisected to this > > > > commit right? > > > > > > The symptom is a failure to boot, without any console output, of course > > > I bisected it before blaming something specific. But I don't think it is > > > "rushing" as having -rc1 broken with an option's default is a massive pain > > > in the arse when it comes to testing. > > > > > > > I don't have this soc, so can you provide $stval/$sepc/$scause, a config, a > > > > kernel, anything? > > > > > > I don't have the former cos it died immediately on boot. config is > > > attached. It reproduces in QEMU so you don't need any hardware. > > If QEMU could reproduce, could you provide a dmesg by the below method? > > > > Qemu cmd append: -s -S > > ref: https://qemu-project.gitlab.io/qemu/system/gdb.html > > > > Connect gdb and in console: > > 1. file vmlinux > > 2. source ./Documentation/admin-guide/kdump/gdbmacros.txt > > 3. dmesg > > > > Then, we could get the kernel's early boot logs from memory. > I've reproduced it on qemu, thx for the config. > > Reading symbols from ../build-rv64lp64/vmlinux... > (gdb) tar rem:1234 > Remote debugging using :1234 > ticket_spin_lock (lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/include/asm-generic/ticket_spinlock.h:49 > 49 atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL); > (gdb) bt > #0 ticket_spin_lock (lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/include/asm-generic/ticket_spinlock.h:49 > #1 arch_spin_lock (lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/arch/riscv/include/asm/spinlock.h:28 > #2 do_raw_spin_lock (lock=lock@entry=0xffffffff81b9a5b8 <text_mutex>) > at /home/guoren/source/kernel/linux/kernel/locking/spinlock_debug.c:116 > #3 0xffffffff80b2ea0e in __raw_spin_lock_irqsave > (lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/include/linux/spinlock_api_smp.h:111 > #4 _raw_spin_lock_irqsave (lock=lock@entry=0xffffffff81b9a5b8 > <text_mutex>) at > /home/guoren/source/kernel/linux/kernel/locking/spinlock.c:162 > #5 0xffffffff80b27c54 in rt_mutex_slowtrylock > (lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/kernel/locking/rtmutex.c:1393 > #6 0xffffffff80b295ea in rt_mutex_try_acquire > (lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/kernel/locking/rtmutex.c:319 > #7 __rt_mutex_lock (state=2, lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/kernel/locking/rtmutex.c:1805 > #8 __mutex_lock_common (ip=18446744071562135170, nest_lock=0x0, > subclass=0, state=2, lock=0xffffffff81b9a5b8 <text_mutex>) at > /home/guoren/source/kernel/linux/kernel/locking/rtmutex_api.c:518 > #9 mutex_lock_nested (lock=0xffffffff81b9a5b8 <text_mutex>, > subclass=subclass@entry=0) at > /home/guoren/source/kernel/linux/kernel/locking/rtmutex_api.c:529 > #10 0xffffffff80010682 in arch_jump_label_transform_queue > (entry=entry@entry=0xffffffff8158da28, type=<optimized out>) at > /home/guoren/source/kernel/linux/arch/riscv/kernel/jump_label.c:39 > #11 0xffffffff801d86b2 in __jump_label_update > (key=key@entry=0xffffffff81a1abb0 <qspinlock_key>, > entry=0xffffffff8158da28, stop=stop@entry=0xffffffff815a5e68 > <__tracepoint_ptr_initcall_finish>, init=init@entry=true) > at /home/guoren/source/kernel/linux/kernel/jump_label.c:513 > #12 0xffffffff801d890c in jump_label_update > (key=key@entry=0xffffffff81a1abb0 <qspinlock_key>) at > /home/guoren/source/kernel/linux/kernel/jump_label.c:920 > #13 0xffffffff801d8be8 in static_key_disable_cpuslocked > (key=key@entry=0xffffffff81a1abb0 <qspinlock_key>) at > /home/guoren/source/kernel/linux/kernel/jump_label.c:240 > #14 0xffffffff801d8c04 in static_key_disable > (key=key@entry=0xffffffff81a1abb0 <qspinlock_key>) at > /home/guoren/source/kernel/linux/kernel/jump_label.c:248 > #15 0xffffffff80c04a1a in riscv_spinlock_init () at > /home/guoren/source/kernel/linux/arch/riscv/kernel/setup.c:271 > #16 setup_arch (cmdline_p=cmdline_p@entry=0xffffffff81a03e88) at > /home/guoren/source/kernel/linux/arch/riscv/kernel/setup.c:336 > #17 0xffffffff80c007a2 in start_kernel () at > /home/guoren/source/kernel/linux/init/main.c:922 > #18 0xffffffff80001164 in _start_kernel () > Backtrace stopped: frame did not save the PC > (gdb) p /x lock > $1 = 0xffffffff81b9a5b8 > (gdb) p /x *lock > $2 = {{val = {counter = 0x20000}, {locked = 0x0, pending = 0x0}, > {locked_pending = 0x0, tail = 0x2}}} I have for you here a fast fixup for reference. (PS: I'm digging into the root cause mentioned by Will Deacon.) diff --git a/arch/riscv/include/asm/text-patching.h b/arch/riscv/include/asm/text-patching.h index 7228e266b9a1..0439609f1cff 100644 --- a/arch/riscv/include/asm/text-patching.h +++ b/arch/riscv/include/asm/text-patching.h @@ -12,5 +12,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len); int patch_text(void *addr, u32 *insns, size_t len); extern int riscv_patch_in_stop_machine; +extern int riscv_patch_in_spinlock_init; #endif /* _ASM_RISCV_PATCH_H */ diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 6eee6f736f68..d9a5a5c1933d 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -36,9 +36,11 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, insn = RISCV_INSN_NOP; } - mutex_lock(&text_mutex); + if (!riscv_patch_in_spinlock_init) + mutex_lock(&text_mutex); patch_insn_write(addr, &insn, sizeof(insn)); - mutex_unlock(&text_mutex); + if (!riscv_patch_in_spinlock_init) + mutex_unlock(&text_mutex); return true; } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index db13c9ddf9e3..ab009cf855c2 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -24,6 +24,7 @@ struct patch_insn { }; int riscv_patch_in_stop_machine = false; +int riscv_patch_in_spinlock_init = false; #ifdef CONFIG_MMU @@ -131,7 +132,7 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) * safe but triggers a lockdep failure, so just elide it for that * specific case. */ - if (!riscv_patch_in_stop_machine) + if (!riscv_patch_in_stop_machine && !riscv_patch_in_spinlock_init) lockdep_assert_held(&text_mutex); preempt_disable(); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 016b48fcd6f2..87ddf1702be4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -268,7 +268,9 @@ static void __init riscv_spinlock_init(void) } #if defined(CONFIG_RISCV_COMBO_SPINLOCKS) else { + riscv_patch_in_spinlock_init = 1; static_branch_disable(&qspinlock_key); + riscv_patch_in_spinlock_init = 0; pr_info("Ticket spinlock: enabled\n"); return; } -- Best Regards Guo Ren