From: LiuYe <liu.yeC@xxxxxxx> Currently, if CONFIG_KDB_KEYBOARD is enabled, then kgdboc will attempt to use schedule_work() to provoke a keyboard reset when transitioning out of the debugger and back to normal operation. This can cause deadlock because schedule_work() is not NMI-safe. The stack trace below shows an example of the problem. In this case the master cpu is not running from NMI but it has parked the slave CPUs using an NMI and the parked CPUs is holding spinlocks needed by schedule_work(). example: BUG: spinlock lockup suspected on CPU#0, namex/10450 lock: 0xffff881ffe823980, .magic: dead4ead, .owner: namexx/21888, .owner_cpu: 1 ffff881741d00000 ffff881741c01000 0000000000000000 0000000000000000 ffff881740f58e78 ffff881741cffdd0 ffffffff8147a7fc ffff881740f58f20 Call Trace: [<ffffffff81479e6d>] ? __schedule+0x16d/0xac0 [<ffffffff8147a7fc>] ? schedule+0x3c/0x90 [<ffffffff8147e71a>] ? schedule_hrtimeout_range_clock+0x10a/0x120 [<ffffffff8147d22e>] ? mutex_unlock+0xe/0x10 [<ffffffff811c839b>] ? ep_scan_ready_list+0x1db/0x1e0 [<ffffffff8147e743>] ? schedule_hrtimeout_range+0x13/0x20 [<ffffffff811c864a>] ? ep_poll+0x27a/0x3b0 [<ffffffff8108c540>] ? wake_up_q+0x70/0x70 [<ffffffff811c99a8>] ? SyS_epoll_wait+0xb8/0xd0 [<ffffffff8147f296>] ? entry_SYSCALL_64_fastpath+0x12/0x75 CPU: 0 PID: 10450 Comm: namex Tainted: G O 4.4.65 #1 Hardware name: Insyde Purley/Type2 - Board Product Name1, BIOS 05.21.51.0036 07/19/2019 0000000000000000 ffff881ffe813c10 ffffffff8124e883 ffff881741c01000 ffff881ffe823980 ffff881ffe813c38 ffffffff810a7f7f ffff881ffe823980 000000007d2b7cd0 0000000000000001 ffff881ffe813c68 ffffffff810a80e0 Call Trace: <#DB> [<ffffffff8124e883>] dump_stack+0x85/0xc2 [<ffffffff810a7f7f>] spin_dump+0x7f/0x100 [<ffffffff810a80e0>] do_raw_spin_lock+0xa0/0x150 [<ffffffff8147eb55>] _raw_spin_lock+0x15/0x20 [<ffffffff8108c256>] try_to_wake_up+0x176/0x3d0 [<ffffffff8108c4c5>] wake_up_process+0x15/0x20 [<ffffffff8107b371>] insert_work+0x81/0xc0 [<ffffffff8107b4e5>] __queue_work+0x135/0x390 [<ffffffff8107b786>] queue_work_on+0x46/0x90 [<ffffffff81313d28>] kgdboc_post_exp_handler+0x48/0x70 [<ffffffff810ed488>] kgdb_cpu_enter+0x598/0x610 [<ffffffff810ed6e2>] kgdb_handle_exception+0xf2/0x1f0 [<ffffffff81054e21>] __kgdb_notify+0x71/0xd0 [<ffffffff81054eb5>] kgdb_notify+0x35/0x70 [<ffffffff81082e6a>] notifier_call_chain+0x4a/0x70 [<ffffffff8108304d>] notify_die+0x3d/0x50 [<ffffffff81017219>] do_int3+0x89/0x120 [<ffffffff81480fb4>] int3+0x44/0x80 We fix the problem by using irq_work to call schedule_work() instead of calling it directly. This is because we cannot resynchronize the keyboard state from the hardirq context provided by irq_work. This must be done from the task context in order to call the input subsystem. Therefore, we have to defer the work twice. First, safely switch from the debug trap context (similar to NMI) to the hardirq, and then switch from the hardirq to the system work queue. Signed-off-by: LiuYe <liu.yeC@xxxxxxx> Co-authored-by: Daniel Thompson <daniel.thompson@xxxxxxxxxx> Signed-off-by: Daniel Thompson <daniel.thompson@xxxxxxxxxx> --- V7 -> V8: Update the description information and comments in the code. : Submit this patch based on version linux-6.9-rc2. V6 -> V7: Add comments in the code. V5 -> V6: Replace with a more professional and accurate answer. V4 -> V5: Answer why schedule another work in the irq_work and not do the job directly. V3 -> V4: Add changelogs V2 -> V3: Add description information V1 -> V2: using irq_work to solve this properly. --- --- drivers/tty/serial/kgdboc.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 7ce7bb164..d6ce945f0 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/serial_core.h> +#include <linux/irq_work.h> #define MAX_CONFIG_LEN 40 @@ -82,6 +83,19 @@ static struct input_handler kgdboc_reset_handler = { static DEFINE_MUTEX(kgdboc_reset_mutex); +/* + * This code ensures that the keyboard state, which is changed during kdb + * execution, is resynchronized when we leave the debug trap. The resync + * logic calls into the input subsystem to force a reset. The calls into + * the input subsystem must be executed from normal task context. + * + * We need to trigger the resync from the debug trap, which executes in an + * NMI (or similar) context. To make it safe to call into the input + * subsystem we end up having use two deferred execution techniques. + * Firstly, we use irq_work, which is NMI-safe, to provoke a callback from + * hardirq context. Then, from the hardirq callback we use the system + * workqueue to provoke the callback that actually performs the resync. + */ static void kgdboc_restore_input_helper(struct work_struct *dummy) { /* @@ -99,10 +113,17 @@ static void kgdboc_restore_input_helper(struct work_struct *dummy) static DECLARE_WORK(kgdboc_restore_input_work, kgdboc_restore_input_helper); +static void kgdboc_queue_restore_input_helper(struct irq_work *unused) +{ + schedule_work(&kgdboc_restore_input_work); +} + +static DEFINE_IRQ_WORK(kgdboc_restore_input_irq_work, kgdboc_queue_restore_input_helper); + static void kgdboc_restore_input(void) { if (likely(system_state == SYSTEM_RUNNING)) - schedule_work(&kgdboc_restore_input_work); + irq_work_queue(&kgdboc_restore_input_irq_work); } static int kgdboc_register_kbd(char **cptr) @@ -133,6 +154,7 @@ static void kgdboc_unregister_kbd(void) i--; } } + irq_work_sync(&kgdboc_restore_input_irq_work); flush_work(&kgdboc_restore_input_work); } #else /* ! CONFIG_KDB_KEYBOARD */ -- 2.25.1