[PATCH 08/16] ARM: KVM: vgic: don't rely on maintenance interrupt to be nearly synchronous

Marc Zyngier <marc.zyngier@xxxxxxx> · Thu, 6 Dec 2012 17:17:07 +0000

The VGIC code strongly relies on the maintenance interrupt to have
a very low latency, assuming it to be nearly synchronous. This is
wrong, as the architecture doesn't guarantee such thing.

This means the interrupt may be handled too late, possibly after
the triggering VM has been context switched away from the CPU and
another brought in. Disaster looming.

The fix is not to rely on the interrupt itself to do the maintenance.
We can deal with this on the VM exit path, and the VGIC interrupt
only becomes a scheduling hint.

Also, it means that the "kick reduction optimisation" feature becomes
pointless, as it effectively relied on the VGIC interrupt to reduce
the number of times we kick a vcpu. This code is thus dropped altogether.

Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
---
 arch/arm/include/asm/kvm_vgic.h |   4 --
 arch/arm/kvm/arm.c              |  10 +--
 arch/arm/kvm/interrupts_head.S  |   7 ++-
 arch/arm/kvm/vgic.c             | 131 ++++++++++++++++++++--------------------
 4 files changed, 74 insertions(+), 78 deletions(-)

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 9a55c5f..2e2813a 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -132,9 +132,6 @@ struct vgic_cpu {
 	u32		vgic_elrsr[2];	/* Saved only */
 	u32		vgic_apr;
 	u32		vgic_lr[64];	/* A15 has only 4... */
-
-	/* Number of level-triggered interrupt in progress */
-	atomic_t	irq_active_count;
 #endif
 };
 
@@ -174,7 +171,6 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
 #define vgic_initialized(k)	((k)->arch.vgic.ready)
-#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
 
 #else
 static inline int kvm_vgic_hyp_init(void)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 4d4da72..fea8fbb 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -94,15 +94,7 @@ int kvm_arch_hardware_enable(void *garbage)
 
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
-	if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) {
-		if (vgic_active_irq(vcpu) &&
-		    cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
-			return 0;
-
-		return 1;
-	}
-
-	return 0;
+	return (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE);
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 07c85f2..5308eb2 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -416,12 +416,17 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	str	r9, [r11, #(VGIC_CPU_ELRSR + 4)]
 	str	r10, [r11, #VGIC_CPU_APR]
 
+	/* Clear GICH_HCR */
+	mov	r5, #0
+	str	r5, [r2, #GICH_HCR]
+
 	/* Save list registers */
 	add	r2, r2, #GICH_LR0
 	add	r3, r11, #VGIC_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
-1:	ldr	r6, [r2], #4
+1:	ldr	r6, [r2]
 	str	r6, [r3], #4
+	str	r5, [r2], #4		@ Wipe the LR we just saved
 	subs	r4, r4, #1
 	bne	1b
 2:
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index df4f19d..9592067 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -181,8 +181,6 @@ static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
 	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
-	atomic_inc(&vcpu->arch.vgic_cpu.irq_active_count);
-	smp_mb__after_atomic_inc();
 }
 
 static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
@@ -190,8 +188,13 @@ static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
 	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
-	atomic_dec(&vcpu->arch.vgic_cpu.irq_active_count);
-	smp_mb__after_atomic_dec();
+}
+
+static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
 }
 
 static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -987,6 +990,56 @@ epilog:
 	}
 }
 
+static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	bool level_pending = false;
+
+	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
+
+	/*
+	 * We do not need to take the distributor lock here, since the only
+	 * action we perform is clearing the irq_active_bit for an EOIed
+	 * level interrupt.  There is a potential race with
+	 * the queuing of an interrupt in __kvm_sync_to_cpu(), where we check
+	 * if the interrupt is already active. Two possibilities:
+	 *
+	 * - The queuing is occuring on the same vcpu: cannot happen, as we're
+	 *   already in the context of this vcpu, and executing the handler
+	 * - The interrupt has been migrated to another vcpu, and we ignore
+	 *   this interrupt for this run. Big deal. It is still pending though,
+	 *   and will get considered when this vcpu exits.
+	 */
+	if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) {
+		/*
+		 * Some level interrupts have been EOIed. Clear their
+		 * active bit.
+		 */
+		int lr, irq;
+
+		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
+				 vgic_cpu->nr_lr) {
+			irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID;
+
+			vgic_irq_clear_active(vcpu, irq);
+			vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI;
+
+			/* Any additional pending interrupt? */
+			if (vgic_dist_irq_is_pending(vcpu, irq)) {
+				vgic_cpu_irq_set(vcpu, irq);
+				level_pending = true;
+			} else {
+				vgic_cpu_irq_clear(vcpu, irq);
+			}
+		}
+	}
+
+	if (vgic_cpu->vgic_misr & VGIC_MISR_U)
+		vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE;
+
+	return level_pending;
+}
+
 /*
  * Sync back the VGIC state after a guest run. We do not really touch
  * the distributor here (the irq_pending_on_cpu bit is safe to set),
@@ -997,6 +1050,10 @@ static void __kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int lr, pending;
+	bool level_pending;
+
+	/* Process anything that has to do with maintenance interrupt first */
+	level_pending = vgic_process_maintenance(vcpu);
 
 	/* Clear mappings for empty LRs */
 	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
@@ -1015,10 +1072,8 @@ static void __kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu)
 	/* Check if we still have something up our sleeve... */
 	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
 				      vgic_cpu->nr_lr);
-	if (pending < vgic_cpu->nr_lr) {
+	if (level_pending || pending < vgic_cpu->nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-		smp_mb();
-	}
 }
 
 void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu)
@@ -1080,7 +1135,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 	vcpu = kvm_get_vcpu(kvm, cpuid);
 	is_edge = vgic_irq_is_edge(vcpu, irq_num);
 	is_level = !is_edge;
-	state = vgic_bitmap_get_irq_val(&dist->irq_state, cpuid, irq_num);
+	state = vgic_dist_irq_is_pending(vcpu, irq_num);
 
 	/*
 	 * Only inject an interrupt if:
@@ -1156,64 +1211,12 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 
 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 {
-	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
-	struct vgic_dist *dist;
-	struct vgic_cpu *vgic_cpu;
-
-	if (WARN(!vcpu,
-		 "VGIC interrupt on CPU %d with no vcpu\n", smp_processor_id()))
-		return IRQ_HANDLED;
-
-	vgic_cpu = &vcpu->arch.vgic_cpu;
-	dist = &vcpu->kvm->arch.vgic;
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
-
 	/*
-	 * We do not need to take the distributor lock here, since the only
-	 * action we perform is clearing the irq_active_bit for an EOIed
-	 * level interrupt.  There is a potential race with
-	 * the queuing of an interrupt in __kvm_sync_to_cpu(), where we check
-	 * if the interrupt is already active. Two possibilities:
-	 *
-	 * - The queuing is occuring on the same vcpu: cannot happen, as we're
-	 *   already in the context of this vcpu, and executing the handler
-	 * - The interrupt has been migrated to another vcpu, and we ignore
-	 *   this interrupt for this run. Big deal. It is still pending though,
-	 *   and will get considered when this vcpu exits.
+	 * We cannot rely on the vgic maintenance interrupt to be
+	 * delivered synchronously. Which means we can only use it to
+	 * exit the VM, and not do anything useful. The real
+	 * processing is done on the exit path.
 	 */
-	if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) {
-		/*
-		 * Some level interrupts have been EOIed. Clear their
-		 * active bit.
-		 */
-		int lr, irq;
-
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
-				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID;
-
-			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI;
-			writel_relaxed(vgic_cpu->vgic_lr[lr],
-				       dist->vctrl_base + GICH_LR0 + (lr << 2));
-
-			/* Any additionnal pending interrupt? */
-			if (vgic_bitmap_get_irq_val(&dist->irq_state,
-						    vcpu->vcpu_id, irq)) {
-				vgic_cpu_irq_set(vcpu, irq);
-				set_bit(vcpu->vcpu_id,
-					&dist->irq_pending_on_cpu);
-			} else {
-				vgic_cpu_irq_clear(vcpu, irq);
-			}
-		}
-	}
-
-	if (vgic_cpu->vgic_misr & VGIC_MISR_U) {
-		vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE;
-		writel_relaxed(vgic_cpu->vgic_hcr, dist->vctrl_base + GICH_HCR);
-	}
-
 	return IRQ_HANDLED;
 }
 
-- 
1.8.0.1



_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm