- before returning to guest, RVI should be updated if any pending IRRs - EOI exit bitmap controls whether an EOI write should cause VM-Exit. if set, a trap-like induced EOI VM-Exit is triggered. Keep all the bitmaps cleared for now, which should be enough to allow a MSI based device passthrough Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx> --- arch/x86/include/asm/vmx.h | 11 ++++++++ arch/x86/kvm/lapic.c | 22 +++++++++++++++- arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/vmx.c | 62 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 4a8193e..b1eca96 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -60,6 +60,7 @@ #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 @@ -97,6 +98,7 @@ enum vmcs_field { GUEST_GS_SELECTOR = 0x0000080a, GUEST_LDTR_SELECTOR = 0x0000080c, GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, HOST_ES_SELECTOR = 0x00000c00, HOST_CS_SELECTOR = 0x00000c02, HOST_SS_SELECTOR = 0x00000c04, @@ -124,6 +126,14 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -279,6 +289,7 @@ enum vmcs_field { #define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c47f3d3..d203501 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -604,7 +604,27 @@ static int apic_set_eoi(struct kvm_lapic *apic) return vector; } -static void apic_send_ipi(struct kvm_lapic *apic) +/* + * this interface assumes a trap-like exit, which has already finished + * desired side effect including vISR and vPPR update. + */ +void kvm_apic_set_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + int trigger_mode; + + if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; + + if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); + kvm_make_request(KVM_REQ_EVENT, apic->vcpu); +} +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi); + + static void apic_send_ipi(struct kvm_lapic *apic) { u32 icr_low = apic_get_reg(apic, APIC_ICR); u32 icr_high = apic_get_reg(apic, APIC_ICR2); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4e3b435..585337f 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -60,6 +60,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); +void kvm_apic_set_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4a26d04..424a09d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -86,6 +86,9 @@ module_param(fasteoi, bool, S_IRUGO); static bool __read_mostly enable_apicv_reg = 0; module_param(enable_apicv_reg, bool, S_IRUGO); +static bool __read_mostly enable_apicv_vid = 0; +module_param(enable_apicv_vid, bool, S_IRUGO); + /* * If nested=1, nested virtualization is supported, i.e., guests may use * VMX and be a hypervisor for its own guests. If nested=0, guests may not @@ -430,6 +433,8 @@ struct vcpu_vmx { bool rdtscp_enabled; + u64 eoi_exit_bitmap[4]; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; }; @@ -769,6 +774,12 @@ static inline bool cpu_has_vmx_apic_register_virt(void) SECONDARY_EXEC_APIC_REGISTER_VIRT; } +static inline bool cpu_has_vmx_virtual_intr_delivery(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -2485,6 +2496,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_ENABLE_INVPCID; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, @@ -2499,7 +2511,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) _cpu_based_2nd_exec_control &= ~( - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* CR3 accesses and invlpg don't need to cause VM Exits when EPT @@ -2701,6 +2714,9 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_apic_register_virt()) enable_apicv_reg = 0; + if (!cpu_has_vmx_virtual_intr_delivery()) + enable_apicv_vid = 0; + if (nested) nested_vmx_setup_ctls_msrs(); @@ -3832,6 +3848,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (!enable_apicv_reg) exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + if (!enable_apicv_vid) + exec_control &= ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; return exec_control; } @@ -3876,6 +3894,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } + if (enable_apicv_vid) { + vmcs_write64(EOI_EXIT_BITMAP0, 0); + vmcs_write64(EOI_EXIT_BITMAP1, 0); + vmcs_write64(EOI_EXIT_BITMAP2, 0); + vmcs_write64(EOI_EXIT_BITMAP3, 0); + + vmcs_write16(GUEST_INTR_STATUS, 0); + } + if (ple_gap) { vmcs_write32(PLE_GAP, ple_gap); vmcs_write32(PLE_WINDOW, ple_window); @@ -4793,6 +4820,16 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return emulate_instruction(vcpu, 0) == EMULATE_DONE; } +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + int vector = exit_qualification & 0xff; + + /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ + kvm_apic_set_eoi(vcpu, vector); + return 1; +} + static int handle_apic_write(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -5742,6 +5779,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_APIC_WRITE] = handle_apic_write, + [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, @@ -6082,7 +6120,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu) { - return 0; + return irqchip_in_kernel(vcpu->kvm) && enable_apicv_vid; +} + +static void vmx_update_irq(struct kvm_vcpu *vcpu) +{ + u16 status; + u8 old; + int vector; + + vector = kvm_apic_get_highest_irr(vcpu); + if (vector == -1) + return; + + status = vmcs_read16(GUEST_INTR_STATUS); + old = (u8)status & 0xff; + if ((u8)vector != old) { + status &= ~0xff; + status |= (u8)vector; + vmcs_write16(GUEST_INTR_STATUS, status); + } } static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) @@ -7345,6 +7402,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery, + .update_irq = vmx_update_irq, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html