On Sun, Mar 11, 2018 at 12:49:55PM +0000, Marc Zyngier wrote: > The vgic code is trying to be clever when injecting GICv2 SGIs, > and will happily populate LRs with the same interrupt number if > they come from multiple vcpus (after all, they are distinct > interrupt sources). > > Unfortunately, this is against the letter of the architecture, > and the GICv2 architecture spec says "Each valid interrupt stored > in the List registers must have a unique VirtualID for that > virtual CPU interface.". GICv3 has similar (although slightly > ambiguous) restrictions. > > This results in guests locking up when using GICv2-on-GICv3, for > example. The obvious fix is to stop trying so hard, and inject > a single vcpu per SGI per guest entry. After all, pending SGIs > with multiple source vcpus are pretty rare, and are mostly seen > in scenario where the physical CPUs are severely overcomitted. > > But as we now only inject a single instance of a multi-source SGI per > vcpu entry, we may delay those interrupts for longer than strictly > necessary, and run the risk of injecting lower priority interrupts > in the meantime. > > In order to address this, we adopt a three stage strategy: > - If we encounter a multi-source SGI in the AP list while computing > its depth, we force the list to be sorted > - When populating the LRs, we prevent the injection of any interrupt > of lower priority than that of the first multi-source SGI we've > injected. > - Finally, the injection of a multi-source SGI triggers the request > of a maintenance interrupt when there will be no pending interrupt > in the LRs (HCR_NPIE). > > At the point where the last pending interrupt in the LRs switches > from Pending to Active, the maintenance interrupt will be delivered, > allowing us to add the remaining SGIs using the same process. > > Cc: stable@xxxxxxxxxxxxxxx > Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework") > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> The fact that we have to do this is really annoying, but I see not other way around it. It will get slightly better if we move to insertion sort based on priorities when injecting interrupts as discussed with Andre, though. Acked-by: Christoffer Dall <cdall@xxxxxxxxxx> > --- > include/linux/irqchip/arm-gic-v3.h | 1 + > include/linux/irqchip/arm-gic.h | 1 + > virt/kvm/arm/vgic/vgic-v2.c | 9 +++++- > virt/kvm/arm/vgic/vgic-v3.c | 9 +++++- > virt/kvm/arm/vgic/vgic.c | 61 +++++++++++++++++++++++++++++--------- > virt/kvm/arm/vgic/vgic.h | 2 ++ > 6 files changed, 67 insertions(+), 16 deletions(-) > > diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h > index c00c4c33e432..b26eccc78fb1 100644 > --- a/include/linux/irqchip/arm-gic-v3.h > +++ b/include/linux/irqchip/arm-gic-v3.h > @@ -503,6 +503,7 @@ > > #define ICH_HCR_EN (1 << 0) > #define ICH_HCR_UIE (1 << 1) > +#define ICH_HCR_NPIE (1 << 3) > #define ICH_HCR_TC (1 << 10) > #define ICH_HCR_TALL0 (1 << 11) > #define ICH_HCR_TALL1 (1 << 12) > diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h > index d3453ee072fc..68d8b1f73682 100644 > --- a/include/linux/irqchip/arm-gic.h > +++ b/include/linux/irqchip/arm-gic.h > @@ -84,6 +84,7 @@ > > #define GICH_HCR_EN (1 << 0) > #define GICH_HCR_UIE (1 << 1) > +#define GICH_HCR_NPIE (1 << 3) > > #define GICH_LR_VIRTUALID (0x3ff << 0) > #define GICH_LR_PHYSID_CPUID_SHIFT (10) > diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c > index c32d7b93ffd1..44264d11be02 100644 > --- a/virt/kvm/arm/vgic/vgic-v2.c > +++ b/virt/kvm/arm/vgic/vgic-v2.c > @@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void) > vgic_v2_write_lr(i, 0); > } > > +void vgic_v2_set_npie(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; > + > + cpuif->vgic_hcr |= GICH_HCR_NPIE; > +} > + > void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) > { > struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; > @@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) > int lr; > unsigned long flags; > > - cpuif->vgic_hcr &= ~GICH_HCR_UIE; > + cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE); > > for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { > u32 val = cpuif->vgic_lr[lr]; > diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c > index 6b329414e57a..0ff2006f3781 100644 > --- a/virt/kvm/arm/vgic/vgic-v3.c > +++ b/virt/kvm/arm/vgic/vgic-v3.c > @@ -26,6 +26,13 @@ static bool group1_trap; > static bool common_trap; > static bool gicv4_enable; > > +void vgic_v3_set_npie(struct kvm_vcpu *vcpu) > +{ > + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; > + > + cpuif->vgic_hcr |= ICH_HCR_NPIE; > +} > + > void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) > { > struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; > @@ -47,7 +54,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) > int lr; > unsigned long flags; > > - cpuif->vgic_hcr &= ~ICH_HCR_UIE; > + cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE); > > for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { > u64 val = cpuif->vgic_lr[lr]; > diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c > index c7c5ef190afa..859cf88657d5 100644 > --- a/virt/kvm/arm/vgic/vgic.c > +++ b/virt/kvm/arm/vgic/vgic.c > @@ -684,22 +684,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) > vgic_v3_set_underflow(vcpu); > } > > +static inline void vgic_set_npie(struct kvm_vcpu *vcpu) > +{ > + if (kvm_vgic_global_state.type == VGIC_V2) > + vgic_v2_set_npie(vcpu); > + else > + vgic_v3_set_npie(vcpu); > +} > + > /* Requires the ap_list_lock to be held. */ > -static int compute_ap_list_depth(struct kvm_vcpu *vcpu) > +static int compute_ap_list_depth(struct kvm_vcpu *vcpu, > + bool *multi_sgi) > { > struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > struct vgic_irq *irq; > int count = 0; > > + *multi_sgi = false; > + > DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); > > list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { > spin_lock(&irq->irq_lock); > /* GICv2 SGIs can count for more than one... */ > - if (vgic_irq_is_sgi(irq->intid) && irq->source) > - count += hweight8(irq->source); > - else > + if (vgic_irq_is_sgi(irq->intid) && irq->source) { > + int w = hweight8(irq->source); > + > + count += w; > + *multi_sgi |= (w > 1); > + } else { > count++; > + } > spin_unlock(&irq->irq_lock); > } > return count; > @@ -710,28 +725,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) > { > struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > struct vgic_irq *irq; > - int count = 0; > + int count; > + bool npie = false; > + bool multi_sgi; > + u8 prio = 0xff; > > DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); > > - if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) > + count = compute_ap_list_depth(vcpu, &multi_sgi); > + if (count > kvm_vgic_global_state.nr_lr || multi_sgi) > vgic_sort_ap_list(vcpu); > > + count = 0; > + > list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { > spin_lock(&irq->irq_lock); > > - if (unlikely(vgic_target_oracle(irq) != vcpu)) > - goto next; > - > /* > - * If we get an SGI with multiple sources, try to get > - * them in all at once. > + * If we have multi-SGIs in the pipeline, we need to > + * guarantee that they are all seen before any IRQ of > + * lower priority. In that case, we need to filter out > + * these interrupts by exiting early. This is easy as > + * the AP list has been sorted already. > */ > - do { > + if (multi_sgi && irq->priority > prio) { > + spin_unlock(&irq->irq_lock); > + break; > + } > + > + if (likely(vgic_target_oracle(irq) == vcpu)) { > vgic_populate_lr(vcpu, irq, count++); > - } while (irq->source && count < kvm_vgic_global_state.nr_lr); > > -next: > + if (irq->source) { > + npie = true; > + prio = irq->priority; > + } > + } > + > spin_unlock(&irq->irq_lock); > > if (count == kvm_vgic_global_state.nr_lr) { > @@ -742,6 +772,9 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) > } > } > > + if (npie) > + vgic_set_npie(vcpu); > + > vcpu->arch.vgic_cpu.used_lrs = count; > > /* Nuke remaining LRs */ > diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h > index 12c37b89f7a3..91f37c05e817 100644 > --- a/virt/kvm/arm/vgic/vgic.h > +++ b/virt/kvm/arm/vgic/vgic.h > @@ -159,6 +159,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); > void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); > void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); > void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); > +void vgic_v2_set_npie(struct kvm_vcpu *vcpu); > int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); > int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, > int offset, u32 *val); > @@ -188,6 +189,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); > void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); > void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); > void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); > +void vgic_v3_set_npie(struct kvm_vcpu *vcpu); > void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); > void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); > void vgic_v3_enable(struct kvm_vcpu *vcpu); > -- > 2.14.2 >