The VGIC code strongly relies on the maintenance interrupt to have a very low latency, assuming it to be nearly synchronous. This is wrong, as the architecture doesn't guarantee such thing. This means the interrupt may be handled too late, possibly after the triggering VM has been context switched away from the CPU and another brought in. Disaster looming. The fix is not to rely on the interrupt itself to do the maintenance. We can deal with this on the VM exit path, and the VGIC interrupt only becomes a scheduling hint. Also, it means that the "kick reduction optimisation" feature becomes pointless, as it effectively relied on the VGIC interrupt to reduce the number of times we kick a vcpu. This code is thus dropped altogether. Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> --- arch/arm/include/asm/kvm_vgic.h | 4 -- arch/arm/kvm/arm.c | 10 +--- arch/arm/kvm/interrupts_head.S | 4 ++ arch/arm/kvm/vgic.c | 130 +++++++++++++++++++++------------------- 4 files changed, 73 insertions(+), 75 deletions(-) diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h index 9a55c5f..2e2813a 100644 --- a/arch/arm/include/asm/kvm_vgic.h +++ b/arch/arm/include/asm/kvm_vgic.h @@ -132,9 +132,6 @@ struct vgic_cpu { u32 vgic_elrsr[2]; /* Saved only */ u32 vgic_apr; u32 vgic_lr[64]; /* A15 has only 4... */ - - /* Number of level-triggered interrupt in progress */ - atomic_t irq_active_count; #endif }; @@ -174,7 +171,6 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) #define vgic_initialized(k) ((k)->arch.vgic.ready) -#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) #else static inline int kvm_vgic_hyp_init(void) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a277d61..eb19cdc 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -94,15 +94,7 @@ int kvm_arch_hardware_enable(void *garbage) int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) { - if (vgic_active_irq(vcpu) && - cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) - return 0; - - return 1; - } - - return 0; + return (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE); } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 07c85f2..692c0cb 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -416,6 +416,10 @@ vcpu .req r0 @ vcpu pointer always in r0 str r9, [r11, #(VGIC_CPU_ELRSR + 4)] str r10, [r11, #VGIC_CPU_APR] + /* Clear GICH_HCR */ + mov r5, #0 + str r5, [r2, #GICH_HCR] + /* Save list registers */ add r2, r2, #GICH_LR0 add r3, r11, #VGIC_CPU_LR diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index d610f3b..7b994c0 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -181,7 +181,6 @@ static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); - atomic_inc(&vcpu->arch.vgic_cpu.irq_active_count); } static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) @@ -189,7 +188,13 @@ static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); - atomic_dec(&vcpu->arch.vgic_cpu.irq_active_count); +} + +static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); } static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) @@ -985,6 +990,58 @@ epilog: } } +static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + bool level_pending = false; + + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + + /* + * We do not need to take the distributor lock here, since the only + * action we perform is clearing the irq_active_bit for an EOIed + * level interrupt. There is a potential race with + * the queuing of an interrupt in __kvm_sync_to_cpu(), where we check + * if the interrupt is already active. Two possibilities: + * + * - The queuing is occurring on the same vcpu: cannot happen, + * as we're already in the context of this vcpu, and + * executing the handler + * - The interrupt has been migrated to another vcpu, and we + * ignore this interrupt for this run. Big deal. It is still + * pending though, and will get considered when this vcpu + * exits. + */ + if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) { + /* + * Some level interrupts have been EOIed. Clear their + * active bit. + */ + int lr, irq; + + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + vgic_cpu->nr_lr) { + irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID; + + vgic_irq_clear_active(vcpu, irq); + vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; + + /* Any additional pending interrupt? */ + if (vgic_dist_irq_is_pending(vcpu, irq)) { + vgic_cpu_irq_set(vcpu, irq); + level_pending = true; + } else { + vgic_cpu_irq_clear(vcpu, irq); + } + } + } + + if (vgic_cpu->vgic_misr & VGIC_MISR_U) + vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE; + + return level_pending; +} + /* * Sync back the VGIC state after a guest run. We do not really touch * the distributor here (the irq_pending_on_cpu bit is safe to set), @@ -995,6 +1052,9 @@ static void __kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int lr, pending; + bool level_pending; + + level_pending = vgic_process_maintenance(vcpu); /* Clear mappings for empty LRs */ for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, @@ -1013,10 +1073,8 @@ static void __kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu) /* Check if we still have something up our sleeve... */ pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, vgic_cpu->nr_lr); - if (pending < vgic_cpu->nr_lr) { + if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); - smp_mb(); - } } void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu) @@ -1078,7 +1136,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, vcpu = kvm_get_vcpu(kvm, cpuid); is_edge = vgic_irq_is_edge(vcpu, irq_num); is_level = !is_edge; - state = vgic_bitmap_get_irq_val(&dist->irq_state, cpuid, irq_num); + state = vgic_dist_irq_is_pending(vcpu, irq_num); /* * Only inject an interrupt if: @@ -1154,64 +1212,12 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, static irqreturn_t vgic_maintenance_handler(int irq, void *data) { - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data; - struct vgic_dist *dist; - struct vgic_cpu *vgic_cpu; - - if (WARN(!vcpu, - "VGIC interrupt on CPU %d with no vcpu\n", smp_processor_id())) - return IRQ_HANDLED; - - vgic_cpu = &vcpu->arch.vgic_cpu; - dist = &vcpu->kvm->arch.vgic; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); - /* - * We do not need to take the distributor lock here, since the only - * action we perform is clearing the irq_active_bit for an EOIed - * level interrupt. There is a potential race with - * the queuing of an interrupt in __kvm_sync_to_cpu(), where we check - * if the interrupt is already active. Two possibilities: - * - * - The queuing is occuring on the same vcpu: cannot happen, as we're - * already in the context of this vcpu, and executing the handler - * - The interrupt has been migrated to another vcpu, and we ignore - * this interrupt for this run. Big deal. It is still pending though, - * and will get considered when this vcpu exits. + * We cannot rely on the vgic maintenance interrupt to be + * delivered synchronously. This means we can only use it to + * exit the VM, and we perform the handling of EOIed + * interrupts on the exit path (see vgic_process_maintenance). */ - if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) { - /* - * Some level interrupts have been EOIed. Clear their - * active bit. - */ - int lr, irq; - - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, - vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID; - - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; - writel_relaxed(vgic_cpu->vgic_lr[lr], - dist->vctrl_base + GICH_LR0 + (lr << 2)); - - /* Any additionnal pending interrupt? */ - if (vgic_bitmap_get_irq_val(&dist->irq_state, - vcpu->vcpu_id, irq)) { - vgic_cpu_irq_set(vcpu, irq); - set_bit(vcpu->vcpu_id, - &dist->irq_pending_on_cpu); - } else { - vgic_cpu_irq_clear(vcpu, irq); - } - } - } - - if (vgic_cpu->vgic_misr & VGIC_MISR_U) { - vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE; - writel_relaxed(vgic_cpu->vgic_hcr, dist->vctrl_base + GICH_HCR); - } - return IRQ_HANDLED; } -- 1.8.0.1 _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm