Re: [PATCH v4 2/2] KVM: VMX: Add Posted Interrupt supporting

Marcelo Tosatti <mtosatti@xxxxxxxxxx> · Sat, 23 Feb 2013 10:43:09 -0300

On Fri, Feb 22, 2013 at 09:42:32PM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> 
> Posted Interrupt allows APIC interrupts to inject into guest directly
> without any vmexit.
> 
> - When delivering a interrupt to guest, if target vcpu is running,
>   update Posted-interrupt requests bitmap and send a notification event
>   to the vcpu. Then the vcpu will handle this interrupt automatically,
>   without any software involvemnt.
> 
> - If target vcpu is not running or there already a notification event
>   pending in the vcpu, do nothing. The interrupt will be handled by
>   next vm entry
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> ---

> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index e4595f1..64616cc 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>  	set_irq_regs(old_regs);
>  }
>  
> +#ifdef CONFIG_HAVE_KVM
> +/*
> + * Handler for POSTED_INTERRUPT_VECTOR.
> + */
> +void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
> +{
> +	struct pt_regs *old_regs = set_irq_regs(regs);
> +
> +	ack_APIC_irq();
> +
> +	irq_enter();
> +
> +	exit_idle();
> +
> +	irq_exit();
> +
> +	set_irq_regs(old_regs);
> +}
> +#endif
> +

Add per-cpu counters, similarly to other handlers in the same file.

> @@ -379,6 +398,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
>  	if (!apic->irr_pending)
>  		return -1;
>  
> +	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>  	result = apic_search_irr(apic);
>  	ASSERT(result == -1 || result >= 16);

kvm_x86_ops->sync_pir_to_irr() should be called from vcpu_enter_guest,
before inject_pending_event.

        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
		->
		inject_pending_event
		...
	}

> @@ -685,6 +705,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  {
>  	int result = 0;
>  	struct kvm_vcpu *vcpu = apic->vcpu;
> +	bool delivered = false;
>  
>  	switch (delivery_mode) {
>  	case APIC_DM_LOWEST:
> @@ -700,7 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  		} else
>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
>  
> -		result = !apic_test_and_set_irr(vector, apic);
> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
> +						&result, &delivered))
> +			result = !apic_test_and_set_irr(vector, apic);
> +
>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>  					  trig_mode, vector, !result);
>  		if (!result) {
> @@ -710,8 +734,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  			break;
>  		}
>  
> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
> -		kvm_vcpu_kick(vcpu);
> +		if (!delivered) {
> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> +			kvm_vcpu_kick(vcpu);
> +		}

This set bit / kick pair should be for non-APICv only (please
check for it directly).

> +	return test_bit(vector, (unsigned long *)pi_desc->pir);
> +}
> +
> +static void pi_set_pir(int vector, struct pi_desc *pi_desc)
> +{
> +	__set_bit(vector, (unsigned long *)pi_desc->pir);
> +}

This must be locked atomic operation (set_bit).

> +
>  struct vcpu_vmx {
>  	struct kvm_vcpu       vcpu;
>  	unsigned long         host_rsp;
> @@ -429,6 +465,10 @@ struct vcpu_vmx {
>  
>  	bool rdtscp_enabled;
>  
> +	/* Posted interrupt descriptor */
> +	struct pi_desc pi_desc;
> +	spinlock_t pi_lock;

Don't see why the lock is necessary. Please use the following
pseudocode for vmx_deliver_posted_interrupt:

vmx_deliver_posted_interrupt(), returns 'bool injected'.

1. orig_irr = read irr from vapic page
2. if (orig_irr != 0)
3.	return false;
4. kvm_make_request(KVM_REQ_EVENT)
5. bool injected = !test_and_set_bit(PIR)
6. if (vcpu->guest_mode && injected)
7. 	if (test_and_set_bit(PIR notification bit))
8.		send PIR IPI
9. return injected

On vcpu_enter_guest:

if (kvm_check_request(KVM_REQ_EVENT))  {
	pir->virr sync			(*)
	...
}
...
vcpu->mode = IN_GUEST_MODE;
local_irq_disable
clear pir notification bit unconditionally (*)

Right after local_irq_disable.

> +static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
> +		int vector, int *result, bool *delivered)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	unsigned long flags;
> +
> +	if (!vmx_vm_has_apicv(vcpu->kvm))
> +		return false;
> +
> +	spin_lock_irqsave(&vmx->pi_lock, flags);
> +	if (pi_test_pir(vector, &vmx->pi_desc) ||
> +		kvm_apic_test_irr(vector, vcpu->arch.apic)) {
> +		spin_unlock_irqrestore(&vmx->pi_lock, flags);
> +		return true;
> +	}
> +
> +	pi_set_pir(vector, &vmx->pi_desc);
> +	*result = 1;
> +	if (!pi_test_and_set_on(&vmx->pi_desc) &&
> +			(vcpu->mode == IN_GUEST_MODE)) {
> +		kvm_make_request(KVM_REQ_EVENT, vcpu);
> +		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
> +				    POSTED_INTR_VECTOR);
> +		*delivered = true;
> +	}
> +	spin_unlock_irqrestore(&vmx->pi_lock, flags);
> +
> +	return true;
> +}

Please confirm whether a spinlock is necessary with the pseudocode 
above.

> +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	unsigned long flags;
> +
> +	if (!vmx_vm_has_apicv(vcpu->kvm))
> +		return;
> +
> +	spin_lock_irqsave(&vmx->pi_lock, flags);

> +	if (!pi_test_and_clear_on(&vmx->pi_desc)) {

This needs to move to vcpu_enter_guest, after local_irq_disabled,
unconditionally, as noted.

> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
>  				    struct kvm_lapic_state *s)
>  {
> +	kvm_x86_ops->sync_pir_to_irr(vcpu);
>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>  
>  	return 0;
> -- 
> 1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html