From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx> Preempt can occur in the preemption timer expiration handler: CPU0 CPU1 preemption timer vmexit handle_preemption_timer(vCPU0) kvm_lapic_expired_hv_timer hv_timer_is_use == true sched_out sched_in kvm_arch_vcpu_load kvm_lapic_restart_hv_timer restart_apic_timer start_hv_timer already-expired timer or sw timer triggerd in the window start_sw_timer cancel_hv_timer /* back in kvm_lapic_expired_hv_timer */ cancel_hv_timer WARN_ON(!apic->lapic_timer.hv_timer_in_use); ==> Oops This can be reproduced if CONFIG_PREEMPT is enabled. ------------[ cut here ]------------ WARNING: CPU: 4 PID: 2972 at /home/kernel/linux/arch/x86/kvm//lapic.c:1563 kvm_lapic_expired_hv_timer+0x9e/0xb0 [kvm] CPU: 4 PID: 2972 Comm: qemu-system-x86 Tainted: G OE 4.13.0-rc2+ #16 RIP: 0010:kvm_lapic_expired_hv_timer+0x9e/0xb0 [kvm] Call Trace: handle_preemption_timer+0xe/0x20 [kvm_intel] vmx_handle_exit+0xb8/0xd70 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xdd1/0x1be0 [kvm] ? kvm_arch_vcpu_load+0x47/0x230 [kvm] ? kvm_arch_vcpu_load+0x62/0x230 [kvm] kvm_vcpu_ioctl+0x340/0x700 [kvm] ? kvm_vcpu_ioctl+0x340/0x700 [kvm] ? __fget+0xfc/0x210 do_vfs_ioctl+0xa4/0x6a0 ? __fget+0x11d/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x81/0x220 entry_SYSCALL64_slow_path+0x25/0x25 ------------[ cut here ]------------ WARNING: CPU: 4 PID: 2972 at /home/kernel/linux/arch/x86/kvm//lapic.c:1498 cancel_hv_timer.isra.40+0x4f/0x60 [kvm] CPU: 4 PID: 2972 Comm: qemu-system-x86 Tainted: G W OE 4.13.0-rc2+ #16 RIP: 0010:cancel_hv_timer.isra.40+0x4f/0x60 [kvm] Call Trace: kvm_lapic_expired_hv_timer+0x3e/0xb0 [kvm] handle_preemption_timer+0xe/0x20 [kvm_intel] vmx_handle_exit+0xb8/0xd70 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xdd1/0x1be0 [kvm] ? kvm_arch_vcpu_load+0x47/0x230 [kvm] ? kvm_arch_vcpu_load+0x62/0x230 [kvm] kvm_vcpu_ioctl+0x340/0x700 [kvm] ? kvm_vcpu_ioctl+0x340/0x700 [kvm] ? __fget+0xfc/0x210 do_vfs_ioctl+0xa4/0x6a0 ? __fget+0x11d/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x81/0x220 entry_SYSCALL64_slow_path+0x25/0x25 This patch fixes it by making the caller of start_hv_timer and start_sw_timer be in preemption-disabled regions, which trivially avoid any reentrancy issue with preempt notifier. Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx> --- v1 -> v2: * cleanup patch description v2 -> v3: * the caller of start_hv_timer and start_sw_timer in a preemption-disabled region arch/x86/kvm/lapic.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2819d4c..0d558c5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1495,11 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + WARN_ON(preemptible()); WARN_ON(!apic->lapic_timer.hv_timer_in_use); - preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; - preempt_enable(); } static bool start_hv_timer(struct kvm_lapic *apic) @@ -1552,18 +1551,25 @@ static void start_sw_timer(struct kvm_lapic *apic) static void restart_apic_timer(struct kvm_lapic *apic) { + preempt_disable(); if (!start_hv_timer(apic)) start_sw_timer(apic); + preempt_enable(); } void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - WARN_ON(!apic->lapic_timer.hv_timer_in_use); + preempt_disable(); + /* The preempt notifier has called apic_timer_expired already */ + if (!apic->lapic_timer.hv_timer_in_use) + goto out; WARN_ON(swait_active(&vcpu->wq)); cancel_hv_timer(apic); apic_timer_expired(apic); +out: + preempt_enable(); if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); @@ -1582,9 +1588,11 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; + preempt_disable(); /* Possibly the TSC deadline timer is not enabled yet */ if (apic->lapic_timer.hv_timer_in_use) start_sw_timer(apic); + preempt_enable(); } EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); -- 2.7.4