wakeup_vcpus_on_cpu_lock is taken from hard interrupt context (pi_wakeup_handler), therefore it cannot sleep. Switch it to a raw spinlock. Fixes: [41297.066254] BUG: scheduling while atomic: CPU 0/KVM/635218/0x00010001 [41297.066323] Preemption disabled at: [41297.066324] [<ffffffff902ee47f>] irq_enter_rcu+0xf/0x60 [41297.066339] Call Trace: [41297.066342] <IRQ> [41297.066346] dump_stack_lvl+0x34/0x44 [41297.066353] ? irq_enter_rcu+0xf/0x60 [41297.066356] __schedule_bug.cold+0x7d/0x8b [41297.066361] __schedule+0x439/0x5b0 [41297.066365] ? task_blocks_on_rt_mutex.constprop.0.isra.0+0x1b0/0x440 [41297.066369] schedule_rtlock+0x1e/0x40 [41297.066371] rtlock_slowlock_locked+0xf1/0x260 [41297.066374] rt_spin_lock+0x3b/0x60 [41297.066378] pi_wakeup_handler+0x31/0x90 [kvm_intel] [41297.066388] sysvec_kvm_posted_intr_wakeup_ipi+0x9d/0xd0 [41297.066392] </IRQ> [41297.066392] asm_sysvec_kvm_posted_intr_wakeup_ipi+0x12/0x20 ... Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index f4169c009400..aa1fe9085d77 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -27,7 +27,7 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu); * CPU. IRQs must be disabled when taking this lock, otherwise deadlock will * occur if a wakeup IRQ arrives and attempts to acquire the lock. */ -static DEFINE_PER_CPU(spinlock_t, wakeup_vcpus_on_cpu_lock); +static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { @@ -87,9 +87,9 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) * current pCPU if the task was migrated. */ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { - spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); list_del(&vmx->pi_wakeup_list); - spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); } dest = cpu_physical_id(cpu); @@ -149,10 +149,10 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) local_irq_save(flags); - spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); list_add_tail(&vmx->pi_wakeup_list, &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); - spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); WARN(pi_desc->sn, "PI descriptor SN field set before blocking"); @@ -204,20 +204,20 @@ void pi_wakeup_handler(void) int cpu = smp_processor_id(); struct vcpu_vmx *vmx; - spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); + raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); list_for_each_entry(vmx, &per_cpu(wakeup_vcpus_on_cpu, cpu), pi_wakeup_list) { if (pi_test_on(&vmx->pi_desc)) kvm_vcpu_wake_up(&vmx->vcpu); } - spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); + raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); } void __init pi_init_cpu(int cpu) { INIT_LIST_HEAD(&per_cpu(wakeup_vcpus_on_cpu, cpu)); - spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); + raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); } bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)