On Wed, 2018-10-03 at 17:26 +0200, Paolo Bonzini wrote: > Commit b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 introduced a check on > the interrupt-window and NMI-window CPU execution controls in order to > inject an external interrupt vmexit before the first guest instruction > executes. However, when APIC virtualization is enabled the host does not > need a vmexit in order to inject an interrupt at the next interrupt window; > instead, it just places the interrupt vector in RVI and the processor will > inject it as soon as possible. Therefore, on machines with APICv it is > not enough to check the CPU execution controls: the same scenario can also > happen if RVI>0. > > Fixes: b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 > Cc: Nikita Leshchenko <nikita.leshchenko@xxxxxxxxxx> > Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> > Cc: Liran Alon <liran.alon@xxxxxxxxxx> > Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> Reviewed-by: Nikita Leshenko <nikita.leshchenko@xxxxxxxxxx> > --- > arch/x86/kvm/vmx.c | 38 ++++++++++++++++++++++++++------------ > 1 file changed, 26 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 6ef2d5b139b9..c7ae8ea87bc4 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -6162,6 +6162,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) > nested_mark_vmcs12_pages_dirty(vcpu); > } > > +static u8 vmx_get_rvi(void) > +{ > + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; > +} > + > static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -6174,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) > WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) > return false; > > - rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff; > + rvi = vmx_get_rvi(); > > vapic_page = kmap(vmx->nested.virtual_apic_page); > vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); > @@ -10349,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) > return max_irr; > } > > +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) > +{ > + u8 rvi = vmx_get_rvi(); > + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); > + > + return ((rvi & 0xf0) > (vppr & 0xf0)); > +} > + > static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) > { > if (!kvm_vcpu_apicv_active(vcpu)) > @@ -12593,10 +12606,13 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) > struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > bool from_vmentry = !!exit_qual; > u32 dummy_exit_qual; > - u32 vmcs01_cpu_exec_ctrl; > + bool evaluate_pending_interrupts; > int r = 0; > > - vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); > + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & > + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); > + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) > + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); > > enter_guest_mode(vcpu); > > @@ -12644,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) > * to L1 or delivered directly to L2 (e.g. In case L1 don't > * intercept EXTERNAL_INTERRUPT). > * > - * Usually this would be handled by L0 requesting a > - * IRQ/NMI window by setting VMCS accordingly. However, > - * this setting was done on VMCS01 and now VMCS02 is active > - * instead. Thus, we force L0 to perform pending event > - * evaluation by requesting a KVM_REQ_EVENT. > - */ > - if (vmcs01_cpu_exec_ctrl & > - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { > + * Usually this would be handled by the processor noticing an > + * IRQ/NMI window request, or checking RVI during evaluation of > + * pending virtual interrupts. However, this setting was done > + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 > + * to perform pending event evaluation by requesting a KVM_REQ_EVENT. > + */ > + if (unlikely(evaluate_pending_interrupts)) > kvm_make_request(KVM_REQ_EVENT, vcpu); > - } > > /* > * Note no nested_vmx_succeed or nested_vmx_fail here. At this point