+ ard, peterz I've been looking at the PREEMPT_RT report from Pavel: On 2023-05-14 22:17:46 [+0200], Pavel Pisa wrote: > [ 199.657099] ------------[ cut here ]------------ > [ 199.657116] WARNING: CPU: 1 PID: 1702 at kernel/softirq.c:172 __local_bh_disable_ip+0xd8/0x16c > [ 199.657150] DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt)) > [ 199.657155] Modules linked in: ctucanfd_platform ctucanfd dtbocfg(O) uio_pdrv_genirq ip_tables x_tables > [ 199.657192] CPU: 1 PID: 1702 Comm: find Tainted: G O 6.3.0-rc7-rt9-dut #1 > [ 199.657207] Hardware name: Xilinx Zynq Platform > [ 199.657224] unwind_backtrace from show_stack+0x10/0x14 > [ 199.657259] show_stack from dump_stack_lvl+0x40/0x4c > [ 199.657294] dump_stack_lvl from __warn+0x84/0x168 > [ 199.657330] __warn from warn_slowpath_fmt+0x134/0x1b8 > [ 199.657357] warn_slowpath_fmt from __local_bh_disable_ip+0xd8/0x16c > [ 199.657379] __local_bh_disable_ip from vfp_sync_hwstate+0x28/0xa0 > [ 199.657403] vfp_sync_hwstate from vfp_notifier+0x30/0x174 > [ 199.657424] vfp_notifier from atomic_notifier_call_chain+0x64/0x88 > [ 199.657446] atomic_notifier_call_chain from copy_thread+0xa4/0xe0 > [ 199.657474] copy_thread from copy_process+0x1258/0x1ba8 > [ 199.657503] copy_process from kernel_clone+0x94/0x3b8 > [ 199.657525] kernel_clone from sys_clone+0x70/0x98 > [ 199.657547] sys_clone from ret_fast_syscall+0x0/0x54 > [ 199.657565] Exception stack(0xf1231fa8 to 0xf1231ff0) > [ 199.657578] 1fa0: b6f5f088 b6f5f520 01200011 00000000 00000000 00000000 > [ 199.657592] 1fc0: b6f5f088 b6f5f520 00000001 00000078 004efba8 00000000 004cc2fc 00000000 > [ 199.657601] 1fe0: 00000078 bef2b740 b6df8fe3 b6d8e616 > [ 199.657608] ---[ end trace 0000000000000000 ]--- The problem is that vfp_sync_hwstate() does: preempt_disable(); local_bh_disable(); this *would* be okay *if* it is guaranteed that BH is not already disabled on this CPU. This isn't the case because something disabled BH, got preempted and then this occurred. This could be (probably) fixed by dropping that get_cpu() from vfp_sync_hwstate() (unless preemption should really be disabled as claimed by the comment). But then I looked further and stumbled over commit 62b95a7b44d1a ("ARM: 9282/1: vfp: Manipulate task VFP state with softirqs disabled") and got a little sad. On PREEMPT_RT you can't simply add SOFTIRQ_DISABLE_OFFSET to the preemption counter because it works differently. You would have to invoke local_bh_disable() which *may* involve a context switch if BH was disabled by another task which got preempted. I guess this only happens from userland at which point it is guaranteed that BH is disabled since it is not preemptible on !RT. This local_bh_disable() is invoked from do_vfp which has interrupts enabled. The counter-part enables BH by simply removing that constant. Don't we miss a scheduling opportunity if an interrupt occurred and the NEED_RESCHED flag was set? Also if an interrupt raised softirqs while they were disabled, the local_bh_enable() should invoke "do_softirq()" or they remain pending until the next opportunity. Unless your way back there has a check somewhere. The snippet below is probably disgusting but I managed to boot in qemu. diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 06b48ce23e1ca..47c9f14f8c5e9 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -244,6 +244,7 @@ THUMB( fpreg .req r7 ) .endm #endif +#if 0 .macro local_bh_disable, ti, tmp ldr \tmp, [\ti, #TI_PREEMPT] add \tmp, \tmp, #SOFTIRQ_DISABLE_OFFSET @@ -256,6 +257,7 @@ THUMB( fpreg .req r7 ) sub \tmp, \tmp, #SOFTIRQ_DISABLE_OFFSET str \tmp, [\ti, #TI_PREEMPT] .endm +#endif #define USERL(l, x...) \ 9999: x; \ diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 9a89264cdcc0b..80e45999c2961 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -22,7 +22,14 @@ @ IRQs enabled. @ ENTRY(do_vfp) - local_bh_disable r10, r4 + push {r0, r2, r9, r10, lr} + ldr r0, 0f + mov r1, #SOFTIRQ_DISABLE_OFFSET +0: + bl __local_bh_disable_ip + pop {r0, r2, r9, r10, lr} + +/* local_bh_disable r10, r4 */ ldr r4, .LCvfp ldr r11, [r10, #TI_CPU] @ CPU number add r10, r10, #TI_VFPSTATE @ r10 = workspace @@ -30,7 +37,11 @@ ENTRY(do_vfp) ENDPROC(do_vfp) ENTRY(vfp_null_entry) - local_bh_enable_ti r10, r4 +/* local_bh_enable_ti r10, r4 */ + ldr r0, 0f + mov r1, #SOFTIRQ_DISABLE_OFFSET +0: + bl __local_bh_enable_ip ret lr ENDPROC(vfp_null_entry) diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index 26c4f61ecfa39..e9f183aad39ed 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -175,7 +175,13 @@ ENTRY(vfp_support_entry) @ else it's one 32-bit instruction, so @ always subtract 4 from the following @ instruction address. - local_bh_enable_ti r10, r4 + push { r0, r2, r9, r10, r11, lr} + ldr r0, 0f + mov r1, #SOFTIRQ_DISABLE_OFFSET +0: + bl __local_bh_enable_ip + pop { r0, r2, r9, r10, r11, lr} +/* local_bh_enable_ti r10, r4 */ ret r9 @ we think we have handled things @@ -200,7 +206,13 @@ ENTRY(vfp_support_entry) @ not recognised by VFP DBGSTR "not VFP" - local_bh_enable_ti r10, r4 +/* local_bh_enable_ti r10, r4 */ + push { r0, r2, r9, r10, r11, lr} + ldr r0, 0f + mov r1, #SOFTIRQ_DISABLE_OFFSET +0: + bl __local_bh_enable_ip + pop { r0, r2, r9, r10, r11, lr} ret lr process_exception: diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 01bc48d738478..d4f68806e66b9 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -515,11 +515,9 @@ static inline void vfp_pm_init(void) { } */ void vfp_sync_hwstate(struct thread_info *thread) { - unsigned int cpu = get_cpu(); - local_bh_disable(); - - if (vfp_state_in_hw(cpu, thread)) { + preempt_disable(); + if (vfp_state_in_hw(smp_processor_id(), thread)) { u32 fpexc = fmrx(FPEXC); /* @@ -530,8 +528,8 @@ void vfp_sync_hwstate(struct thread_info *thread) fmxr(FPEXC, fpexc); } + preempt_enable(); local_bh_enable(); - put_cpu(); } /* Ensure that the thread reloads the hardware VFP state on the next use. */ Sebastian