From: Thomas Gleixner <tglx@xxxxxxxxxxxxx> [ Upstream commit 3f804f6d201ca93adf4c3df04d1bfd152c1129d6 ] syzbot reported a possible deadlock in pvclock_gtod_notify(): CPU 0 CPU 1 write_seqcount_begin(&tk_core.seq); pvclock_gtod_notify() spin_lock(&pool->lock); queue_work(..., &pvclock_gtod_work) ktime_get() spin_lock(&pool->lock); do { seq = read_seqcount_begin(tk_core.seq) ... } while (read_seqcount_retry(&tk_core.seq, seq); While this is unlikely to happen, it's possible. Delegate queue_work() to irq_work() which postpones it until the tk_core.seq write held region is left and interrupts are reenabled. Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes") Reported-by: syzbot+6beae4000559d41d80f8@xxxxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Message-Id: <87h7jgm1zy.ffs@xxxxxxxxxxxxxxxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> --- arch/x86/kvm/x86.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1fd52eddabf..c37c23c6b178 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7964,6 +7964,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work) static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); +/* + * Indirection to move queue_work() out of the tk_core.seq write held + * region to prevent possible deadlocks against time accessors which + * are invoked with work related locks held. + */ +static void pvclock_irq_work_fn(struct irq_work *w) +{ + queue_work(system_long_wq, &pvclock_gtod_work); +} + +static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn); + /* * Notification about pvclock gtod data update. */ @@ -7975,13 +7987,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, update_pvclock_gtod(tk); - /* disable master clock if host does not trust, or does not - * use, TSC based clocksource. + /* + * Disable master clock if host does not trust, or does not use, + * TSC based clocksource. Delegate queue_work() to irq_work as + * this is invoked with tk_core.seq write held. */ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && atomic_read(&kvm_guest_has_master_clock) != 0) - queue_work(system_long_wq, &pvclock_gtod_work); - + irq_work_queue(&pvclock_irq_work); return 0; } @@ -8096,6 +8109,7 @@ void kvm_arch_exit(void) cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + irq_work_sync(&pvclock_irq_work); cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; -- 2.30.2