Re: [PATCH v2 1/2] KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid

Christoffer Dall <cdall@xxxxxxxxxx> · Mon, 12 Mar 2018 17:44:09 -0700

On Sun, Mar 11, 2018 at 12:49:55PM +0000, Marc Zyngier wrote:
> The vgic code is trying to be clever when injecting GICv2 SGIs,
> and will happily populate LRs with the same interrupt number if
> they come from multiple vcpus (after all, they are distinct
> interrupt sources).
> 
> Unfortunately, this is against the letter of the architecture,
> and the GICv2 architecture spec says "Each valid interrupt stored
> in the List registers must have a unique VirtualID for that
> virtual CPU interface.". GICv3 has similar (although slightly
> ambiguous) restrictions.
> 
> This results in guests locking up when using GICv2-on-GICv3, for
> example. The obvious fix is to stop trying so hard, and inject
> a single vcpu per SGI per guest entry. After all, pending SGIs
> with multiple source vcpus are pretty rare, and are mostly seen
> in scenario where the physical CPUs are severely overcomitted.
> 
> But as we now only inject a single instance of a multi-source SGI per
> vcpu entry, we may delay those interrupts for longer than strictly
> necessary, and run the risk of injecting lower priority interrupts
> in the meantime.
> 
> In order to address this, we adopt a three stage strategy:
> - If we encounter a multi-source SGI in the AP list while computing
>   its depth, we force the list to be sorted
> - When populating the LRs, we prevent the injection of any interrupt
>   of lower priority than that of the first multi-source SGI we've
>   injected.
> - Finally, the injection of a multi-source SGI triggers the request
>   of a maintenance interrupt when there will be no pending interrupt
>   in the LRs (HCR_NPIE).
> 
> At the point where the last pending interrupt in the LRs switches
> from Pending to Active, the maintenance interrupt will be delivered,
> allowing us to add the remaining SGIs using the same process.
> 
> Cc: stable@xxxxxxxxxxxxxxx
> Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework")
> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>

The fact that we have to do this is really annoying, but I see not other
way around it.  It will get slightly better if we move to insertion sort
based on priorities when injecting interrupts as discussed with Andre,
though.

Acked-by: Christoffer Dall <cdall@xxxxxxxxxx>

> ---
>  include/linux/irqchip/arm-gic-v3.h |  1 +
>  include/linux/irqchip/arm-gic.h    |  1 +
>  virt/kvm/arm/vgic/vgic-v2.c        |  9 +++++-
>  virt/kvm/arm/vgic/vgic-v3.c        |  9 +++++-
>  virt/kvm/arm/vgic/vgic.c           | 61 +++++++++++++++++++++++++++++---------
>  virt/kvm/arm/vgic/vgic.h           |  2 ++
>  6 files changed, 67 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
> index c00c4c33e432..b26eccc78fb1 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -503,6 +503,7 @@
>  
>  #define ICH_HCR_EN			(1 << 0)
>  #define ICH_HCR_UIE			(1 << 1)
> +#define ICH_HCR_NPIE			(1 << 3)
>  #define ICH_HCR_TC			(1 << 10)
>  #define ICH_HCR_TALL0			(1 << 11)
>  #define ICH_HCR_TALL1			(1 << 12)
> diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> index d3453ee072fc..68d8b1f73682 100644
> --- a/include/linux/irqchip/arm-gic.h
> +++ b/include/linux/irqchip/arm-gic.h
> @@ -84,6 +84,7 @@
>  
>  #define GICH_HCR_EN			(1 << 0)
>  #define GICH_HCR_UIE			(1 << 1)
> +#define GICH_HCR_NPIE			(1 << 3)
>  
>  #define GICH_LR_VIRTUALID		(0x3ff << 0)
>  #define GICH_LR_PHYSID_CPUID_SHIFT	(10)
> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
> index c32d7b93ffd1..44264d11be02 100644
> --- a/virt/kvm/arm/vgic/vgic-v2.c
> +++ b/virt/kvm/arm/vgic/vgic-v2.c
> @@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void)
>  		vgic_v2_write_lr(i, 0);
>  }
>  
> +void vgic_v2_set_npie(struct kvm_vcpu *vcpu)
> +{
> +	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
> +
> +	cpuif->vgic_hcr |= GICH_HCR_NPIE;
> +}
> +
>  void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
> @@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>  	int lr;
>  	unsigned long flags;
>  
> -	cpuif->vgic_hcr &= ~GICH_HCR_UIE;
> +	cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE);
>  
>  	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
>  		u32 val = cpuif->vgic_lr[lr];
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 6b329414e57a..0ff2006f3781 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -26,6 +26,13 @@ static bool group1_trap;
>  static bool common_trap;
>  static bool gicv4_enable;
>  
> +void vgic_v3_set_npie(struct kvm_vcpu *vcpu)
> +{
> +	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
> +
> +	cpuif->vgic_hcr |= ICH_HCR_NPIE;
> +}
> +
>  void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
> @@ -47,7 +54,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
>  	int lr;
>  	unsigned long flags;
>  
> -	cpuif->vgic_hcr &= ~ICH_HCR_UIE;
> +	cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE);
>  
>  	for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
>  		u64 val = cpuif->vgic_lr[lr];
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index c7c5ef190afa..859cf88657d5 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -684,22 +684,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
>  		vgic_v3_set_underflow(vcpu);
>  }
>  
> +static inline void vgic_set_npie(struct kvm_vcpu *vcpu)
> +{
> +	if (kvm_vgic_global_state.type == VGIC_V2)
> +		vgic_v2_set_npie(vcpu);
> +	else
> +		vgic_v3_set_npie(vcpu);
> +}
> +
>  /* Requires the ap_list_lock to be held. */
> -static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
> +static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
> +				 bool *multi_sgi)
>  {
>  	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>  	struct vgic_irq *irq;
>  	int count = 0;
>  
> +	*multi_sgi = false;
> +
>  	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
>  
>  	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
>  		spin_lock(&irq->irq_lock);
>  		/* GICv2 SGIs can count for more than one... */
> -		if (vgic_irq_is_sgi(irq->intid) && irq->source)
> -			count += hweight8(irq->source);
> -		else
> +		if (vgic_irq_is_sgi(irq->intid) && irq->source) {
> +			int w = hweight8(irq->source);
> +
> +			count += w;
> +			*multi_sgi |= (w > 1);
> +		} else {
>  			count++;
> +		}
>  		spin_unlock(&irq->irq_lock);
>  	}
>  	return count;
> @@ -710,28 +725,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>  	struct vgic_irq *irq;
> -	int count = 0;
> +	int count;
> +	bool npie = false;
> +	bool multi_sgi;
> +	u8 prio = 0xff;
>  
>  	DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
>  
> -	if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr)
> +	count = compute_ap_list_depth(vcpu, &multi_sgi);
> +	if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
>  		vgic_sort_ap_list(vcpu);
>  
> +	count = 0;
> +
>  	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
>  		spin_lock(&irq->irq_lock);
>  
> -		if (unlikely(vgic_target_oracle(irq) != vcpu))
> -			goto next;
> -
>  		/*
> -		 * If we get an SGI with multiple sources, try to get
> -		 * them in all at once.
> +		 * If we have multi-SGIs in the pipeline, we need to
> +		 * guarantee that they are all seen before any IRQ of
> +		 * lower priority. In that case, we need to filter out
> +		 * these interrupts by exiting early. This is easy as
> +		 * the AP list has been sorted already.
>  		 */
> -		do {
> +		if (multi_sgi && irq->priority > prio) {
> +			spin_unlock(&irq->irq_lock);
> +			break;
> +		}
> +
> +		if (likely(vgic_target_oracle(irq) == vcpu)) {
>  			vgic_populate_lr(vcpu, irq, count++);
> -		} while (irq->source && count < kvm_vgic_global_state.nr_lr);
>  
> -next:
> +			if (irq->source) {
> +				npie = true;
> +				prio = irq->priority;
> +			}
> +		}
> +
>  		spin_unlock(&irq->irq_lock);
>  
>  		if (count == kvm_vgic_global_state.nr_lr) {
> @@ -742,6 +772,9 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
>  		}
>  	}
>  
> +	if (npie)
> +		vgic_set_npie(vcpu);
> +
>  	vcpu->arch.vgic_cpu.used_lrs = count;
>  
>  	/* Nuke remaining LRs */
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 12c37b89f7a3..91f37c05e817 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -159,6 +159,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
>  void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
>  void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
>  void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
> +void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
>  int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
>  int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
>  			 int offset, u32 *val);
> @@ -188,6 +189,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
>  void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
>  void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
>  void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
> +void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
>  void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
>  void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
>  void vgic_v3_enable(struct kvm_vcpu *vcpu);
> -- 
> 2.14.2
>