Re: [PATCH 3/3] KVM: arm/arm64: vgic: introduce vgic_cpu pending status and lowest_priority

Marc Zyngier <marc.zyngier@xxxxxxx> · Wed, 24 Jul 2019 12:39:38 +0100

On 24/07/2019 10:04, Xiangyou Xie wrote:
> During the halt polling process, vgic_cpu->ap_list_lock is frequently
> obtained andreleased, (kvm_vcpu_check_block->kvm_arch_vcpu_runnable->
> kvm_vgic_vcpu_pending_irq).This action affects the performance of virq
> interrupt injection, because vgic_queue_irq_unlock also attempts to get
> vgic_cpu->ap_list_lock and add irq to vgic_cpu ap_list.

Numbers. Give me numbers. Please.

> 
> The irq pending state and the minimum priority introduced by the patch,
> kvm_vgic_vcpu_pending_irq do not need to traverse vgic_cpu ap_list, only
> the check pending state and priority.
> 
> Signed-off-by: Xiangyou Xie <xiexiangyou@xxxxxxxxxx>
> ---
>  include/kvm/arm_vgic.h   |  5 +++++
>  virt/kvm/arm/vgic/vgic.c | 40 ++++++++++++++++++++++------------------
>  2 files changed, 27 insertions(+), 18 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index ce372a0..636db29 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -337,6 +337,11 @@ struct vgic_cpu {
>  
>  	/* Cache guest interrupt ID bits */
>  	u32 num_id_bits;
> +
> +	/* Minimum of priority in all irqs */
> +	u8 lowest_priority;

In all IRQs? That are in every possible state?

> +	/* Irq pending flag */
> +	bool pending;

What does pending mean here? Strictly pending? or covering the other
states of an interrupt (Active, Active+Pending)?

>  };
>  
>  extern struct static_key_false vgic_v2_cpuif_trap;
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index deb8471..767dfe0 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -398,6 +398,12 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
>  	 * now in the ap_list.
>  	 */
>  	vgic_get_irq_kref(irq);
> +
> +	if (!irq->active) {

Why not active? What if the interrupt is Active+Pending? What is the
rational for this? This applies to the whole of this patch.

> +		vcpu->arch.vgic_cpu.pending = true;
> +		if (vcpu->arch.vgic_cpu.lowest_priority > irq->priority)
> +			vcpu->arch.vgic_cpu.lowest_priority = irq->priority;
> +	}
>  	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
>  	irq->vcpu = vcpu;
>  
> @@ -618,6 +624,9 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
>  retry:
>  	raw_spin_lock(&vgic_cpu->ap_list_lock);
>  
> +	vgic_cpu->lowest_priority = U8_MAX;
> +	vgic_cpu->pending = false;
> +
>  	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
>  		struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
>  		bool target_vcpu_needs_kick = false;
> @@ -649,6 +658,11 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
>  		}
>  
>  		if (target_vcpu == vcpu) {
> +			if (!irq->active) {
> +				vgic_cpu->pending = true;
> +				if (vgic_cpu->lowest_priority > irq->priority)
> +					vgic_cpu->lowest_priority = irq->priority;
> +			}
>  			/* We're on the right CPU */
>  			raw_spin_unlock(&irq->irq_lock);
>  			continue;
> @@ -690,6 +704,11 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
>  
>  			list_del(&irq->ap_list);
>  			irq->vcpu = target_vcpu;
> +			if (!irq->active) {
> +				new_cpu->pending = true;
> +				if (new_cpu->lowest_priority > irq->priority)
> +					new_cpu->lowest_priority = irq->priority;
> +			}
>  			list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
>  			target_vcpu_needs_kick = true;
>  		}
> @@ -930,9 +949,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> -	struct vgic_irq *irq;
> -	bool pending = false;
> -	unsigned long flags;
>  	struct vgic_vmcr vmcr;
>  
>  	if (!vcpu->kvm->arch.vgic.enabled)
> @@ -943,22 +959,10 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
>  
>  	vgic_get_vmcr(vcpu, &vmcr);
>  
> -	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
> -
> -	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
> -		raw_spin_lock(&irq->irq_lock);
> -		pending = irq_is_pending(irq) && irq->enabled &&
> -			  !irq->active &&
> -			  irq->priority < vmcr.pmr;
> -		raw_spin_unlock(&irq->irq_lock);
> -
> -		if (pending)
> -			break;
> -	}
> -
> -	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
> +	if (vgic_cpu->pending && vgic_cpu->lowest_priority < vmcr.pmr)
> +		return true;

And here we go. You've dropped the lock, and yet are evaluating two
unrelated fields that could be changed by a parallel injection or the
vcpu entering/exiting the guest.

I'm sure you get better performance. I'm also pretty sure this is
completely unsafe.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...