On 21/03/2018 01:50, Liran Alon wrote: > When L1 IOAPIC redirection-table is written, a request of > KVM_REQ_SCAN_IOAPIC is set on all vCPUs. This is done such that > all vCPUs will now recalc their IOAPIC handled vectors and load > it to their EOI-exitmap. > > However, it could be that one of the vCPUs is currently running > L2. In this case, load_eoi_exitmap() will be called which would > write to vmcs02->eoi_exit_bitmap, which is wrong because > vmcs02->eoi_exit_bitmap should always be equal to > vmcs12->eoi_exit_bitmap. Furthermore, at this point > KVM_REQ_SCAN_IOAPIC was already consumed and therefore we will > never update vmcs01->eoi_exit_bitmap. This could lead to remote_irr > of some IOAPIC level-triggered entry to remain set forever. > > Fix this issue by delaying the load of EOI-exitmap to when vCPU > is running L1. > > One may wonder why not just delay entire KVM_REQ_SCAN_IOAPIC > processing to when vCPU is running L1. This is done in order to handle > correctly the case where LAPIC & IO-APIC of L1 is pass-throughed into > L2. In this case, vmcs12->virtual_interrupt_delivery should be 0. In > current nVMX implementation, that results in > vmcs02->virtual_interrupt_delivery to also be 0. Thus, > vmcs02->eoi_exit_bitmap is not used. Therefore, every L2 EOI cause > a #VMExit into L0 (either on MSR_WRITE to x2APIC MSR or > APIC_ACCESS/APIC_WRITE/EPT_MISCONFIG to APIC MMIO page). > In order for such L2 EOI to be broadcasted, if needed, from LAPIC > to IO-APIC, vcpu->arch.ioapic_handled_vectors must be updated > while L2 is running. Therefore, patch makes sure to delay only the > loading of EOI-exitmap but not the update of > vcpu->arch.ioapic_handled_vectors. > > Signed-off-by: Liran Alon <liran.alon@xxxxxxxxxx> > Reviewed-by: Arbel Moshe <arbel.moshe@xxxxxxxxxx> > Signed-off-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/kvm_cache_regs.h | 5 +++++ > arch/x86/kvm/x86.c | 19 +++++++++++++++++-- > 3 files changed, 24 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index b605a5b6a30c..e04e15898b5b 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -73,6 +73,7 @@ > #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) > #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) > #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) > +#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) > > #define CR0_RESERVED_BITS \ > (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ > @@ -498,6 +499,7 @@ struct kvm_vcpu_arch { > u64 apic_base; > struct kvm_lapic *apic; /* kernel irqchip context */ > bool apicv_active; > + bool load_eoi_exitmap_pending; > DECLARE_BITMAP(ioapic_handled_vectors, 256); > unsigned long apic_attention; > int32_t apic_arb_prio; > diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h > index f500293dad8d..d1457bc63d1c 100644 > --- a/arch/x86/kvm/kvm_cache_regs.h > +++ b/arch/x86/kvm/kvm_cache_regs.h > @@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu) > static inline void leave_guest_mode(struct kvm_vcpu *vcpu) > { > vcpu->arch.hflags &= ~HF_GUEST_MASK; > + > + if (vcpu->arch.load_eoi_exitmap_pending) { > + vcpu->arch.load_eoi_exitmap_pending = false; > + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); > + } > } > > static inline bool is_guest_mode(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 18b5ca7a3197..0322aa60e191 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -6985,8 +6985,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) > > static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) > { > - u64 eoi_exit_bitmap[4]; > - > if (!kvm_apic_hw_enabled(vcpu->arch.apic)) > return; > > @@ -6999,6 +6997,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) > kvm_x86_ops->sync_pir_to_irr(vcpu); > kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); > } > + > + if (is_guest_mode(vcpu)) > + vcpu->arch.load_eoi_exitmap_pending = true; > + else > + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); > +} > + > +static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) > +{ > + u64 eoi_exit_bitmap[4]; > + > + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) > + return; > + > bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, > vcpu_to_synic(vcpu)->vec_bitmap, 256); > kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); > @@ -7113,6 +7125,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > } > if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) > vcpu_scan_ioapic(vcpu); > + if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu)) > + vcpu_load_eoi_exitmap(vcpu); > if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) > kvm_vcpu_reload_apic_access_page(vcpu); > if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { > @@ -8338,6 +8352,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > int r; > > vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); > + vcpu->arch.load_eoi_exitmap_pending = false; > vcpu->arch.emulate_ctxt.ops = &emulate_ops; > if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) > vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > This hunk should not be needed since the memory is zero-initialized. Apart from this, I've queued the patch. Paolo