RE: [PATCH v4 2/2] KVM: VMX: Add Posted Interrupt supporting

"Zhang, Yang Z" <yang.z.zhang@xxxxxxxxx> · Sat, 23 Feb 2013 14:05:28 +0000

Marcelo Tosatti wrote on 2013-02-23:
> 
> On Fri, Feb 22, 2013 at 09:42:32PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
>> 
>> Posted Interrupt allows APIC interrupts to inject into guest directly
>> without any vmexit.
>> 
>> - When delivering a interrupt to guest, if target vcpu is running,
>>   update Posted-interrupt requests bitmap and send a notification event
>>   to the vcpu. Then the vcpu will handle this interrupt automatically,
>>   without any software involvemnt.
>> - If target vcpu is not running or there already a notification event
>>   pending in the vcpu, do nothing. The interrupt will be handled by
>>   next vm entry
>> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
>> ---
>> 
>> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
>> index e4595f1..64616cc 100644
>> --- a/arch/x86/kernel/irq.c
>> +++ b/arch/x86/kernel/irq.c
>> @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>>  	set_irq_regs(old_regs);
>>  }
>> +#ifdef CONFIG_HAVE_KVM
>> +/*
>> + * Handler for POSTED_INTERRUPT_VECTOR.
>> + */
>> +void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
>> +{
>> +	struct pt_regs *old_regs = set_irq_regs(regs);
>> +
>> +	ack_APIC_irq();
>> +
>> +	irq_enter();
>> +
>> +	exit_idle();
>> +
>> +	irq_exit();
>> +
>> +	set_irq_regs(old_regs);
>> +}
>> +#endif
>> +
> 
> Add per-cpu counters, similarly to other handlers in the same file.
Sure.

>> @@ -379,6 +398,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
> *apic)
>>  	if (!apic->irr_pending)
>>  		return -1;
>> +	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>>  	result = apic_search_irr(apic);
>>  	ASSERT(result == -1 || result >= 16);
> 
> kvm_x86_ops->sync_pir_to_irr() should be called from vcpu_enter_guest,
> before inject_pending_event.
> 
>         if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> 		->
> 		inject_pending_event
> 		...
> 	}
Some other places will search irr to do something, like kvm_cpu_has_interrupt(). So we must to sync pir to irr before touch irr, not just before enter guest.

> 
>> @@ -685,6 +705,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>  {
>>  	int result = 0;
>>  	struct kvm_vcpu *vcpu = apic->vcpu;
>> +	bool delivered = false;
>> 
>>  	switch (delivery_mode) {
>>  	case APIC_DM_LOWEST:
>> @@ -700,7 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>  		} else
>>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
>> -		result = !apic_test_and_set_irr(vector, apic);
>> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector,
>> +						&result, &delivered))
>> +			result = !apic_test_and_set_irr(vector, apic);
>> +
>>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>>  					  trig_mode, vector, !result);
>>  		if (!result) {
>> @@ -710,8 +734,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> delivery_mode,
>>  			break;
>>  		}
>> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
>> -		kvm_vcpu_kick(vcpu);
>> +		if (!delivered) {
>> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
>> +			kvm_vcpu_kick(vcpu);
>> +		}
> 
> This set bit / kick pair should be for non-APICv only (please
> check for it directly).
Sure

>> +	return test_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
>> +
>> +static void pi_set_pir(int vector, struct pi_desc *pi_desc)
>> +{
>> +	__set_bit(vector, (unsigned long *)pi_desc->pir);
>> +}
> 
> This must be locked atomic operation (set_bit).
If using spinlock, pi_set_pir is not called concurrently. It safe to use _set_bit.

>> +
>>  struct vcpu_vmx {
>>  	struct kvm_vcpu       vcpu;
>>  	unsigned long         host_rsp;
>> @@ -429,6 +465,10 @@ struct vcpu_vmx {
>> 
>>  	bool rdtscp_enabled;
>> +	/* Posted interrupt descriptor */
>> +	struct pi_desc pi_desc;
>> +	spinlock_t pi_lock;
> 
> Don't see why the lock is necessary. Please use the following
> pseudocode for vmx_deliver_posted_interrupt:
> 
> vmx_deliver_posted_interrupt(), returns 'bool injected'.
> 
> 1. orig_irr = read irr from vapic page
> 2. if (orig_irr != 0)
> 3.	return false;
> 4. kvm_make_request(KVM_REQ_EVENT)
> 5. bool injected = !test_and_set_bit(PIR)
> 6. if (vcpu->guest_mode && injected)
> 7. 	if (test_and_set_bit(PIR notification bit))
> 8.		send PIR IPI
> 9. return injected

Consider follow case:
vcpu 0                      |                vcpu1
send intr to vcpu1
check irr
                                            receive a posted intr
										pir->irr(pir is cleared, irr is set)
injected=test_and_set_bit=true
pir=set

Then both irr and pir have the interrupt pending, they may merge to one later, but injected reported as true. Wrong.

> On vcpu_enter_guest:
> 
> if (kvm_check_request(KVM_REQ_EVENT))  {
> 	pir->virr sync			(*)
> 	...
> }
> ...
> vcpu->mode = IN_GUEST_MODE;
> local_irq_disable
> clear pir notification bit unconditionally (*)
Why clear ON bit here? If there is no pir->irr operation in this vmentry, clear on here is redundant. 

> Right after local_irq_disable.
> 
>> +static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu,
>> +		int vector, int *result, bool *delivered)
>> +{
>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +	unsigned long flags;
>> +
>> +	if (!vmx_vm_has_apicv(vcpu->kvm))
>> +		return false;
>> +
>> +	spin_lock_irqsave(&vmx->pi_lock, flags);
>> +	if (pi_test_pir(vector, &vmx->pi_desc) ||
>> +		kvm_apic_test_irr(vector, vcpu->arch.apic)) {
>> +		spin_unlock_irqrestore(&vmx->pi_lock, flags);
>> +		return true;
>> +	}
>> +
>> +	pi_set_pir(vector, &vmx->pi_desc);
>> +	*result = 1;
>> +	if (!pi_test_and_set_on(&vmx->pi_desc) &&
>> +			(vcpu->mode == IN_GUEST_MODE)) {
>> +		kvm_make_request(KVM_REQ_EVENT, vcpu);
>> +		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
>> +				    POSTED_INTR_VECTOR);
>> +		*delivered = true;
>> +	}
>> +	spin_unlock_irqrestore(&vmx->pi_lock, flags);
>> +
>> +	return true;
>> +}
> 
> Please confirm whether a spinlock is necessary with the pseudocode above.
In current implementation, spinlock is necessary to avoid race condition between vcpus when delivery posted interrupt and sync pir->irr.

>> +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +{
>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +	unsigned long flags;
>> +
>> +	if (!vmx_vm_has_apicv(vcpu->kvm))
>> +		return;
>> +
>> +	spin_lock_irqsave(&vmx->pi_lock, flags);
> 
> 
>> +	if (!pi_test_and_clear_on(&vmx->pi_desc)) {
> 
> This needs to move to vcpu_enter_guest, after local_irq_disabled,
> unconditionally, as noted.
> 
>> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
>>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
>>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>>  
>>  	return 0;
>> --
>> 1.7.1
>

Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html