With APICv virtual interrupt delivery feature, EOI write from non root mode doesn't cause VM-Exit unless set in EOI exit bitmap VMCS field. Basically there're two methods to manipulate EOI exit bitmap: [Option 1] Ideally only level triggered irq requires a hook in vLAPIC EOI write, so that vIOAPIC EOI is triggered and emulated. So the simplest approach is to manipulate EOI exit bitmap when vLAPIC acks a new interrupt, based on value of TMR. There're several corner cases worthy of note though: - KVM has specific notifier hooks on vIOAPIC EOI path. So far two sources use it: INT-based device passthrough and PIT pending timers. For the former, it's virtually wired to vIOAPIC and thus TMR already covers it. PIT is special here, which is an edge triggered source. But since other timer sources like vLAPIC timer don't require this notifier hook, possibly PIT can be relaxed in the future too. - posted interrupt will update TMR directly, w/o chance for KVM to update EOI exit bitmap accordingly. This becomes a gap [Option 2] Indicate EOI exit bitmap requirement ('need_eoi') directly from every interrupt source device, and then check this requirement when vLAPIC acks a new pending interrupt. This requires more intrusive changes to current vLAPIC/vIOAPIC logic, so that the "irq_source_id" indicating source of interrupt is passed through from origination point to vLAPIC ack point. For natual requirement like vIOAPIC level triggered entries, it can be implicitly deduced. On the other hand for non-natural requirements like aformentioned PIT or posted interrupt, this approach can handle it efficiently. For simplicity reason, now option 1 is used which should be enough to test MSI-based device passthrough. Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/lapic.c | 7 ++++++- arch/x86/kvm/vmx.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ef74df5..4e06a82 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -671,6 +671,7 @@ struct kvm_x86_ops { void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu); void (*update_irq)(struct kvm_vcpu *vcpu); + void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector, int need_eoi); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d203501..4058384 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -499,8 +499,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (trig_mode) { apic_debug("level trig mode for vector %d", vector); apic_set_vector(vector, apic->regs + APIC_TMR); - } else + if (kvm_apic_vid_enabled(vcpu)) + kvm_x86_ops->set_eoi_exitmap(vcpu, vector, 1); + } else { apic_clear_vector(vector, apic->regs + APIC_TMR); + if (kvm_apic_vid_enabled(vcpu)) + kvm_x86_ops->set_eoi_exitmap(vcpu, vector, 0); + } result = !apic_test_and_set_irr(vector, apic); trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 424a09d..73ff537 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -433,6 +433,7 @@ struct vcpu_vmx { bool rdtscp_enabled; + u32 eoi_exitmap_changed; u64 eoi_exit_bitmap[4]; /* Support for a guest hypervisor (nested VMX) */ @@ -6128,6 +6129,7 @@ static void vmx_update_irq(struct kvm_vcpu *vcpu) u16 status; u8 old; int vector; + struct vcpu_vmx *vmx = to_vmx(vcpu); vector = kvm_apic_get_highest_irr(vcpu); if (vector == -1) @@ -6140,6 +6142,40 @@ static void vmx_update_irq(struct kvm_vcpu *vcpu) status |= (u8)vector; vmcs_write16(GUEST_INTR_STATUS, status); } + + if (vmx->eoi_exitmap_changed) { +#define UPDATE_EOI_EXITMAP(v, e) { \ + if (test_and_clear_bit(e, (void *)&(v)->eoi_exitmap_changed)) \ + vmcs_write64(EOI_EXIT_BITMAP##e, (v)->eoi_exit_bitmap[e]);} + + UPDATE_EOI_EXITMAP(vmx, 0); + UPDATE_EOI_EXITMAP(vmx, 1); + UPDATE_EOI_EXITMAP(vmx, 2); + UPDATE_EOI_EXITMAP(vmx, 3); + } +} + +static void vmx_set_eoi_exitmap(struct kvm_vcpu *vcpu, + int vector, + int need_eoi) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int index, offset, changed; + + if (WARN_ONCE((vector < 0) || (vector > 255), + "KVM VMX: vector (%d) out of range\n", vector)) + return; + + index = vector >> 6; + offset = vector & 63; + if (need_eoi) + changed = !test_and_set_bit(offset, + (void *)&vmx->eoi_exit_bitmap[index]); + else + changed = test_and_clear_bit(offset, + (void *)&vmx->eoi_exit_bitmap[index]); + if (changed) + set_bit(index, (void *)&vmx->eoi_exitmap_changed); } static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) @@ -7403,6 +7439,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .update_cr8_intercept = update_cr8_intercept, .has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery, .update_irq = vmx_update_irq, + .set_eoi_exitmap = vmx_set_eoi_exitmap, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html