When L1 IOAPIC redirection-table is written, a request of KVM_REQ_SCAN_IOAPIC is set on all vCPUs. This is done such that all vCPUs will now recalc their IOAPIC handled vectors and load it to their EOI-exitmap. However, it could be that one of the vCPUs is currently running L2. In this case, load_eoi_exitmap() will be called which would write to vmcs02->eoi_exit_bitmap, which is wrong because vmcs02->eoi_exit_bitmap should always be equal to vmcs12->eoi_exit_bitmap. Furthermore, at this point KVM_REQ_SCAN_IOAPIC was already consumed and therefore we will never update vmcs01->eoi_exit_bitmap. This could lead to remote_irr of some IOAPIC level-triggered entry to remain set forever. Fix this issue by delaying the load of EOI-exitmap to when vCPU is running L1. One may wonder why not just delay entire KVM_REQ_SCAN_IOAPIC processing to when vCPU is running L1. This is done in order to handle correctly the case where LAPIC & IO-APIC of L1 is pass-throughed into L2. In this case, vmcs12->virtual_interrupt_delivery should be 0. In current nVMX implementation, that results in vmcs02->virtual_interrupt_delivery to also be 0. Thus, vmcs02->eoi_exit_bitmap is not used. Therefore, every L2 EOI cause a #VMExit into L0 (either on MSR_WRITE to x2APIC MSR or APIC_ACCESS/APIC_WRITE/EPT_MISCONFIG to APIC MMIO page). In order for such L2 EOI to be broadcasted, if needed, from LAPIC to IO-APIC, vcpu->arch.ioapic_handled_vectors must be updated while L2 is running. Therefore, patch makes sure to delay only the loading of EOI-exitmap but not the update of vcpu->arch.ioapic_handled_vectors. Signed-off-by: Liran Alon <liran.alon@xxxxxxxxxx> Reviewed-by: Arbel Moshe <arbel.moshe@xxxxxxxxxx> Signed-off-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/kvm_cache_regs.h | 5 +++++ arch/x86/kvm/x86.c | 19 +++++++++++++++++-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b605a5b6a30c..e04e15898b5b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -73,6 +73,7 @@ #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) +#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -498,6 +499,7 @@ struct kvm_vcpu_arch { u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool apicv_active; + bool load_eoi_exitmap_pending; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f500293dad8d..d1457bc63d1c 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu) static inline void leave_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags &= ~HF_GUEST_MASK; + + if (vcpu->arch.load_eoi_exitmap_pending) { + vcpu->arch.load_eoi_exitmap_pending = false; + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); + } } static inline bool is_guest_mode(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 18b5ca7a3197..0322aa60e191 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6985,8 +6985,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { - u64 eoi_exit_bitmap[4]; - if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; @@ -6999,6 +6997,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } + + if (is_guest_mode(vcpu)) + vcpu->arch.load_eoi_exitmap_pending = true; + else + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); +} + +static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) +{ + u64 eoi_exit_bitmap[4]; + + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) + return; + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, vcpu_to_synic(vcpu)->vec_bitmap, 256); kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); @@ -7113,6 +7125,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu)) + vcpu_load_eoi_exitmap(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) kvm_vcpu_reload_apic_access_page(vcpu); if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { @@ -8338,6 +8352,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int r; vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); + vcpu->arch.load_eoi_exitmap_pending = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; -- 1.9.1