[PATCH 4/5]KVM:x86, apicv: add interface for poking EOI exit bitmap

"Li, Jiongxi" <jiongxi.li@xxxxxxxxx> · Wed, 5 Sep 2012 05:41:39 +0000

With APICv virtual interrupt delivery feature, EOI write from non
root mode doesn't cause VM-Exit unless set in EOI exit bitmap VMCS
field. Basically there're two methods to manipulate EOI exit bitmap:

[Option 1]
Ideally only level triggered irq requires a hook in vLAPIC EOI write,
so that vIOAPIC EOI is triggered and emulated. So the simplest
approach is to manipulate EOI exit bitmap when vLAPIC acks a new
interrupt, based on value of TMR. There're several corner cases
worthy of note though:

  - KVM has specific notifier hooks on vIOAPIC EOI path. So far two
    sources use it: INT-based device passthrough and PIT pending
    timers. For the former, it's virtually wired to vIOAPIC and
    thus TMR already covers it. PIT is special here, which is an
    edge triggered source. But since other timer sources like
    vLAPIC timer don't require this notifier hook, possibly PIT
    can be relaxed in the future too.

  - posted interrupt will update TMR directly, w/o chance for KVM
    to update EOI exit bitmap accordingly. This becomes a gap

[Option 2]
Indicate EOI exit bitmap requirement ('need_eoi') directly from
every interrupt source device, and then check this requirement
when vLAPIC acks a new pending interrupt. This requires more
intrusive changes to current vLAPIC/vIOAPIC logic, so that the
"irq_source_id" indicating source of interrupt is passed through
from origination point to vLAPIC ack point. For natual requirement
like vIOAPIC level triggered entries, it can be implicitly deduced.
On the other hand for non-natural requirements like aformentioned
PIT or posted interrupt, this approach can handle it efficiently.

For simplicity reason, now option 1 is used which should be
enough to test MSI-based device passthrough.

Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>
Signed-off-by: Jiongxi Li <jiongxi.li@xxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/lapic.c            |    7 ++++++-
 arch/x86/kvm/vmx.c              |   37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ef74df5..4e06a82 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -671,6 +671,7 @@ struct kvm_x86_ops {
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
 	int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
 	void (*update_irq)(struct kvm_vcpu *vcpu);
+	void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector, int need_eoi);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d203501..4058384 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -499,8 +499,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		if (trig_mode) {
 			apic_debug("level trig mode for vector %d", vector);
 			apic_set_vector(vector, apic->regs + APIC_TMR);
-		} else
+			if (kvm_apic_vid_enabled(vcpu))
+				kvm_x86_ops->set_eoi_exitmap(vcpu, vector, 1);
+		} else {
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
+			if (kvm_apic_vid_enabled(vcpu))
+				kvm_x86_ops->set_eoi_exitmap(vcpu, vector, 0);
+		}
 
 		result = !apic_test_and_set_irr(vector, apic);
 		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 424a09d..73ff537 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -433,6 +433,7 @@ struct vcpu_vmx {
 
 	bool rdtscp_enabled;
 
+	u32 eoi_exitmap_changed;
 	u64 eoi_exit_bitmap[4];
 
 	/* Support for a guest hypervisor (nested VMX) */
@@ -6128,6 +6129,7 @@ static void vmx_update_irq(struct kvm_vcpu *vcpu)
 	u16 status;
 	u8 old;
 	int vector;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	vector = kvm_apic_get_highest_irr(vcpu);
 	if (vector == -1)
@@ -6140,6 +6142,40 @@ static void vmx_update_irq(struct kvm_vcpu *vcpu)
 		status |= (u8)vector;
 		vmcs_write16(GUEST_INTR_STATUS, status);
 	}
+
+	if (vmx->eoi_exitmap_changed) {
+#define UPDATE_EOI_EXITMAP(v, e) {				\
+	if (test_and_clear_bit(e, (void *)&(v)->eoi_exitmap_changed))	\
+		vmcs_write64(EOI_EXIT_BITMAP##e, (v)->eoi_exit_bitmap[e]);}
+
+		UPDATE_EOI_EXITMAP(vmx, 0);
+		UPDATE_EOI_EXITMAP(vmx, 1);
+		UPDATE_EOI_EXITMAP(vmx, 2);
+		UPDATE_EOI_EXITMAP(vmx, 3);
+	}
+}
+
+static void vmx_set_eoi_exitmap(struct kvm_vcpu *vcpu,
+				int vector,
+				int need_eoi)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int index, offset, changed;
+
+	if (WARN_ONCE((vector < 0) || (vector > 255),
+		"KVM VMX: vector (%d) out of range\n", vector))
+		return;
+
+	index = vector >> 6;
+	offset = vector & 63;
+	if (need_eoi)
+		changed = !test_and_set_bit(offset,
+				(void *)&vmx->eoi_exit_bitmap[index]);
+	else
+		changed = test_and_clear_bit(offset,
+				(void *)&vmx->eoi_exit_bitmap[index]);
+	if (changed)
+		set_bit(index, (void *)&vmx->eoi_exitmap_changed);
 }
 
 static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
@@ -7403,6 +7439,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.update_cr8_intercept = update_cr8_intercept,
 	.has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery,
 	.update_irq = vmx_update_irq,
+	.set_eoi_exitmap = vmx_set_eoi_exitmap,
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html