Add a kvm_x86_ops hook to detect a nested pending "hypervisor timer" and use it to effectively open a window for servicing the expired timer. Like pending SMIs on VMX, opening a window simply means requesting an immediate exit. This fixes a bug where an expired VMX preemption timer (for L2) will be delayed and/or lost if a pending exception is injected into L2. The pending exception is rightly prioritized by vmx_check_nested_events() and injected into L2, with the preemption timer left pending. Because no window opened, L2 is free to run uninterrupted. Fixes: f4124500c2c13 ("KVM: nVMX: Fully emulate preemption timer") Reported-by: Jim Mattson <jmattson@xxxxxxxxxx> Cc: Oliver Upton <oupton@xxxxxxxxxx> Cc: Peter Shier <pshier@xxxxxxxxxx> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/nested.c | 10 ++++++++-- arch/x86/kvm/x86.c | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f26df2cb0591..65dc2c88d8b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1179,6 +1179,7 @@ struct kvm_x86_ops { void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); int (*check_nested_events)(struct kvm_vcpu *vcpu); + bool (*nested_hv_timer_pending)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index dc7315b31fee..63cf339a13ac 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3687,6 +3687,12 @@ static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) vcpu->arch.exception.payload); } +static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) +{ + return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && + to_vmx(vcpu)->nested.preemption_timer_expired; +} + static int vmx_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3742,8 +3748,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && - vmx->nested.preemption_timer_expired) { + if (nested_vmx_preemption_timer_pending(vcpu)) { if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); @@ -6443,6 +6448,7 @@ __init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops, exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc; ops->check_nested_events = vmx_check_nested_events; + ops->nested_hv_timer_pending = nested_vmx_preemption_timer_pending; ops->get_nested_state = vmx_get_nested_state; ops->set_nested_state = vmx_set_nested_state; ops->get_vmcs12_pages = nested_get_vmcs12_pages; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59958ce2b681..ecd612807546 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8324,6 +8324,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops.enable_nmi_window(vcpu); if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) kvm_x86_ops.enable_irq_window(vcpu); + if (is_guest_mode(vcpu) && + kvm_x86_ops.nested_hv_timer_pending && + kvm_x86_ops.nested_hv_timer_pending(vcpu)) + req_immediate_exit = true; WARN_ON(vcpu->arch.exception.pending); } -- 2.26.0