On 13/12/12 21:52, Christoffer Dall wrote: > > > On Thursday, December 6, 2012, Marc Zyngier wrote: > > The VGIC code strongly relies on the maintenance interrupt to have > a very low latency, assuming it to be nearly synchronous. This is > wrong, as the architecture doesn't guarantee such thing. > > This means the interrupt may be handled too late, possibly after > the triggering VM has been context switched away from the CPU and > another brought in. Disaster looming. > > The fix is not to rely on the interrupt itself to do the maintenance. > We can deal with this on the VM exit path, and the VGIC interrupt > only becomes a scheduling hint. > > Also, it means that the "kick reduction optimisation" feature becomes > pointless, as it effectively relied on the VGIC interrupt to reduce > the number of times we kick a vcpu. This code is thus dropped > altogether. > > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx <javascript:;>> > --- > arch/arm/include/asm/kvm_vgic.h | 4 -- > arch/arm/kvm/arm.c | 10 +-- > arch/arm/kvm/interrupts_head.S | 7 ++- > arch/arm/kvm/vgic.c | 131 > ++++++++++++++++++++-------------------- > 4 files changed, 74 insertions(+), 78 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_vgic.h > b/arch/arm/include/asm/kvm_vgic.h > index 9a55c5f..2e2813a 100644 > --- a/arch/arm/include/asm/kvm_vgic.h > +++ b/arch/arm/include/asm/kvm_vgic.h > @@ -132,9 +132,6 @@ struct vgic_cpu { > u32 vgic_elrsr[2]; /* Saved only */ > u32 vgic_apr; > u32 vgic_lr[64]; /* A15 has only 4... */ > - > - /* Number of level-triggered interrupt in progress */ > - atomic_t irq_active_count; > #endif > }; > > @@ -174,7 +171,6 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, > struct kvm_run *run, > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) > #define vgic_initialized(k) ((k)->arch.vgic.ready) > -#define vgic_active_irq(v) > (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) > > #else > static inline int kvm_vgic_hyp_init(void) > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 4d4da72..fea8fbb 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -94,15 +94,7 @@ int kvm_arch_hardware_enable(void *garbage) > > int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) > { > - if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) { > - if (vgic_active_irq(vcpu) && > - cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, > IN_GUEST_MODE) == EXITING_GUEST_MODE) > - return 0; > - > - return 1; > - } > - > - return 0; > + return (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE); > } > > void kvm_arch_hardware_disable(void *garbage) > diff --git a/arch/arm/kvm/interrupts_head.S > b/arch/arm/kvm/interrupts_head.S > index 07c85f2..5308eb2 100644 > --- a/arch/arm/kvm/interrupts_head.S > +++ b/arch/arm/kvm/interrupts_head.S > @@ -416,12 +416,17 @@ vcpu .req r0 @ vcpu > pointer always in r0 > str r9, [r11, #(VGIC_CPU_ELRSR + 4)] > str r10, [r11, #VGIC_CPU_APR] > > + /* Clear GICH_HCR */ > + mov r5, #0 > + str r5, [r2, #GICH_HCR] > + > /* Save list registers */ > add r2, r2, #GICH_LR0 > add r3, r11, #VGIC_CPU_LR > ldr r4, [r11, #VGIC_CPU_NR_LR] > -1: ldr r6, [r2], #4 > +1: ldr r6, [r2] > str r6, [r3], #4 > + str r5, [r2], #4 @ Wipe the LR we just saved > > > that's a shame - a whole bunch of more mmio operations. Didn't we just > disable the whole vgic above and we replace all the lr's on another vm > entry, so why is this necessary? Good point. Clearing GICH_HCR should be enough already. > > > subs r4, r4, #1 > bne 1b > 2: > diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c > index df4f19d..9592067 100644 > --- a/arch/arm/kvm/vgic.c > +++ b/arch/arm/kvm/vgic.c > @@ -181,8 +181,6 @@ static void vgic_irq_set_active(struct kvm_vcpu > *vcpu, int irq) > struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > > vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, > irq, 1); > - atomic_inc(&vcpu->arch.vgic_cpu.irq_active_count); > - smp_mb__after_atomic_inc(); > } > > static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) > @@ -190,8 +188,13 @@ static void vgic_irq_clear_active(struct > kvm_vcpu *vcpu, int irq) > struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > > vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, > irq, 0); > - atomic_dec(&vcpu->arch.vgic_cpu.irq_active_count); > - smp_mb__after_atomic_dec(); > > > didn't you just add this one? As the commit log says above, this feature is simply removed, as it doesn't make sense anymore. > > +} > + > +static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) > +{ > + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > + > + return vgic_bitmap_get_irq_val(&dist->irq_state, > vcpu->vcpu_id, irq); > } > > static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) > @@ -987,6 +990,56 @@ epilog: > } > } > > +static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) > +{ > + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > + bool level_pending = false; > + > + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); > + > + /* > + * We do not need to take the distributor lock here, since > the only > + * action we perform is clearing the irq_active_bit for an EOIed > + * level interrupt. There is a potential race with > + * the queuing of an interrupt in __kvm_sync_to_cpu(), where > we check > + * if the interrupt is already active. Two possibilities: > + * > + * - The queuing is occuring on the same vcpu: cannot > happen, as we're > > > occurring > > > > + * already in the context of this vcpu, and executing the > handler > + * - The interrupt has been migrated to another vcpu, and we > ignore > + * this interrupt for this run. Big deal. It is still > pending though, > + * and will get considered when this vcpu exits. > + */ > + if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) { > + /* > + * Some level interrupts have been EOIed. Clear their > + * active bit. > + */ > + int lr, irq; > + > + for_each_set_bit(lr, (unsigned long > *)vgic_cpu->vgic_eisr, > + vgic_cpu->nr_lr) { > + irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID; > + > + vgic_irq_clear_active(vcpu, irq); > + vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; > + > + /* Any additional pending interrupt? */ > + if (vgic_dist_irq_is_pending(vcpu, irq)) { > + vgic_cpu_irq_set(vcpu, irq); > + level_pending = true; > + } else { > + vgic_cpu_irq_clear(vcpu, irq); > + } > + } > + } > + > + if (vgic_cpu->vgic_misr & VGIC_MISR_U) > + vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE; > + > + return level_pending; > +} > + > /* > * Sync back the VGIC state after a guest run. We do not really touch > * the distributor here (the irq_pending_on_cpu bit is safe to set), > @@ -997,6 +1050,10 @@ static void __kvm_vgic_sync_from_cpu(struct > kvm_vcpu *vcpu) > struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > int lr, pending; > + bool level_pending; > + > + /* Process anything that has to do with maintenance > interrupt first */ > > > this comment doesn't really help understanding anything - the function > name says it all > > > + level_pending = vgic_process_maintenance(vcpu); > > /* Clear mappings for empty LRs */ > for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, > @@ -1015,10 +1072,8 @@ static void __kvm_vgic_sync_from_cpu(struct > kvm_vcpu *vcpu) > /* Check if we still have something up our sleeve... */ > pending = find_first_zero_bit((unsigned long > *)vgic_cpu->vgic_elrsr, > vgic_cpu->nr_lr); > - if (pending < vgic_cpu->nr_lr) { > + if (level_pending || pending < vgic_cpu->nr_lr) > set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); > - smp_mb(); > - } > } > > void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu) > @@ -1080,7 +1135,7 @@ static bool vgic_update_irq_state(struct kvm > *kvm, int cpuid, > vcpu = kvm_get_vcpu(kvm, cpuid); > is_edge = vgic_irq_is_edge(vcpu, irq_num); > is_level = !is_edge; > - state = vgic_bitmap_get_irq_val(&dist->irq_state, cpuid, > irq_num); > + state = vgic_dist_irq_is_pending(vcpu, irq_num); > > /* > * Only inject an interrupt if: > @@ -1156,64 +1211,12 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int > cpuid, unsigned int irq_num, > > static irqreturn_t vgic_maintenance_handler(int irq, void *data) > { > - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data; > - struct vgic_dist *dist; > - struct vgic_cpu *vgic_cpu; > - > - if (WARN(!vcpu, > - "VGIC interrupt on CPU %d with no vcpu\n", > smp_processor_id())) > - return IRQ_HANDLED; > - > - vgic_cpu = &vcpu->arch.vgic_cpu; > - dist = &vcpu->kvm->arch.vgic; > - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); > - > /* > - * We do not need to take the distributor lock here, since > the only > - * action we perform is clearing the irq_active_bit for an EOIed > - * level interrupt. There is a potential race with > - * the queuing of an interrupt in __kvm_sync_to_cpu(), where > we check > - * if the interrupt is already active. Two possibilities: > - * > - * - The queuing is occuring on the same vcpu: cannot > happen, as we're > - * already in the context of this vcpu, and executing the > handler > - * - The interrupt has been migrated to another vcpu, and we > ignore > - * this interrupt for this run. Big deal. It is still > pending though, > - * and will get considered when this vcpu exits. > + * We cannot rely on the vgic maintenance interrupt to be > + * delivered synchronously. Which means we can only use it to > > > Starting a sentence with Which sounds strange to me - granted I'm not a > native English speaker, but I would think that this should be either > "..., which means" or "... This means that ..." > > > + * exit the VM, and not do anything useful. The real > > > skip the ", and not do anything useful." Also "The real processing" is > rather vague, this should describe what we do on the exit path instead, > or it should be described in that code and this comment should say > something like "see vgic_process_maintenance() for details.". > > > + * processing is done on the exit path. > */ > - if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) { > - /* > - * Some level interrupts have been EOIed. Clear their > - * active bit. > - */ > - int lr, irq; > - > - for_each_set_bit(lr, (unsigned long > *)vgic_cpu->vgic_eisr, > - vgic_cpu->nr_lr) { > - irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID; > - > - vgic_irq_clear_active(vcpu, irq); > - vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; > - writel_relaxed(vgic_cpu->vgic_lr[lr], > - dist->vctrl_base + GICH_LR0 + > (lr << 2)); > - > - /* Any additionnal pending interrupt? */ > - if (vgic_bitmap_get_irq_val(&dist->irq_state, > - vcpu->vcpu_id, > irq)) { > - vgic_cpu_irq_set(vcpu, irq); > - set_bit(vcpu->vcpu_id, > - &dist->irq_pending_on_cpu); > - } else { > - vgic_cpu_irq_clear(vcpu, irq); > - } > - } > - } > - > - if (vgic_cpu->vgic_misr & VGIC_MISR_U) { > - vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE; > - writel_relaxed(vgic_cpu->vgic_hcr, dist->vctrl_base > + GICH_HCR); > - } > - > return IRQ_HANDLED; > } > > -- > 1.8.0.1 > > > > _______________________________________________ > kvmarm mailing list > kvmarm@xxxxxxxxxxxxxxxxxxxxx <javascript:;> > https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm > -- Jazz is not dead. It just smells funny... _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm