Re: [PATCH v6 5/5] KVM : VMX: Use posted interrupt to deliver virtual interrupt

Gleb Natapov <gleb@xxxxxxxxxx> · Tue, 19 Mar 2013 17:18:54 +0200



On Tue, Mar 19, 2013 at 12:03:22PM -0300, Marcelo Tosatti wrote:
> On Tue, Mar 19, 2013 at 02:23:59PM +0200, Gleb Natapov wrote:
> > On Tue, Mar 19, 2013 at 12:11:47PM +0000, Zhang, Yang Z wrote:
> > > Gleb Natapov wrote on 2013-03-19:
> > > > On Fri, Mar 15, 2013 at 09:31:11PM +0800, Yang Zhang wrote:
> > > >> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> > > >> 
> > > >> If posted interrupt is avaliable, then uses it to inject virtual
> > > >> interrupt to guest.
> > > >> 
> > > >> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> > > >> ---
> > > >>  arch/x86/kvm/irq.c   |    3 ++-
> > > >>  arch/x86/kvm/lapic.c |   16 +++++++++++++---
> > > >>  arch/x86/kvm/lapic.h |    1 +
> > > >>  arch/x86/kvm/vmx.c   |   11 +++++++++++
> > > >>  arch/x86/kvm/x86.c   |    4 ++++
> > > >>  5 files changed, 31 insertions(+), 4 deletions(-)
> > > >> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > > >> index 484bc87..5179988 100644
> > > >> --- a/arch/x86/kvm/irq.c
> > > >> +++ b/arch/x86/kvm/irq.c
> > > >> @@ -81,7 +81,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> > > >>  	if (kvm_cpu_has_extint(v))
> > > >>  		return 1;
> > > >> -	return kvm_apic_has_interrupt(v) != -1;	/* LAPIC */
> > > >> +	return (kvm_apic_has_interrupt(v) != -1) ||
> > > >> +		kvm_hwapic_has_interrupt(v);
> > > > That's incorrect. kvm_cpu_has_interrupt() should return true only it
> > > > there is IRR suitable to be injected, not just any IRR.
> > > > kvm_apic_has_interrupt() should call kvm_apic_update_irr().
> > > You are right.
> > > 
> > > >>  }
> > > >>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> > > >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > > >> index b3ea50e..46c7310 100644
> > > >> --- a/arch/x86/kvm/lapic.c
> > > >> +++ b/arch/x86/kvm/lapic.c
> > > >> @@ -713,7 +713,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > > > delivery_mode,
> > > >>  		} else
> > > >>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
> > > >> -		result = !apic_test_and_set_irr(vector, apic);
> > > >> +		result = 1;
> > > >> +		if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector))
> > > >> +			result = !apic_test_and_set_irr(vector, apic);
> > > >> +
> > > >>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
> > > >>  					  trig_mode, vector, !result);
> > > >>  		if (!result) {
> > > >> @@ -723,8 +726,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > > > delivery_mode,
> > > >>  			break;
> > > >>  		}
> > > >> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
> > > >> -		kvm_vcpu_kick(vcpu);
> > > >> +		if (!kvm_x86_ops->vm_has_apicv(vcpu->kvm)) {
> > > >> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> > > >> +			kvm_vcpu_kick(vcpu);
> > > >> +		}
> > > >>  		break;
> > > > apicv code and non apicv code are completely different. What's the point
> > > > checking for apicv twice here?
> > > > Just do:
> > > > 
> > > > if (kvm_x86_ops->deliver_posted_interrupt)
> > > > 	kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)
> > > > else {
> > > > 	result = !apic_test_and_set_irr(vector, apic);
> > > > 	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > > > 	kvm_vcpu_kick(vcpu);
> > > > }
> > > > 
> > > > And set kvm_x86_ops->deliver_posted_interrupt only if apicv is enabled.
> > > > 
> > > > Also rearrange patches so that APIC_TMR handling goes before posted
> > > > interrupt series.
> > > Sure. 
> > > 
> > > >> 
> > > >>  	case APIC_DM_REMRD: @@ -1604,6 +1609,11 @@ int
> > > >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
> > > >> +bool kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu)
> > > >> +{
> > > >> +	return kvm_x86_ops->hwapic_has_interrupt(vcpu);
> > > >> +}
> > > >> +
> > > >>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
> > > >>  {
> > > >>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> > > >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > > >> index e5327be..c6abc63 100644
> > > >> --- a/arch/x86/kvm/lapic.h
> > > >> +++ b/arch/x86/kvm/lapic.h
> > > >> @@ -37,6 +37,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu);
> > > >>  void kvm_free_lapic(struct kvm_vcpu *vcpu);
> > > >>  
> > > >>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +bool
> > > >>  kvm_hwapic_has_interrupt(struct kvm_vcpu *vcpu); int
> > > >>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
> > > >>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); void
> > > >>  kvm_lapic_reset(struct kvm_vcpu *vcpu);
> > > >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > > >> index 0b5a8ae..48a2239 100644
> > > >> --- a/arch/x86/kvm/vmx.c
> > > >> +++ b/arch/x86/kvm/vmx.c
> > > >> @@ -3932,6 +3932,17 @@ static void vmx_posted_intr_clear_on(struct
> > > > kvm_vcpu *vcpu)
> > > >>  	clear_bit(POSTED_INTR_ON, (unsigned long *)&vmx->pi_desc.u.control);
> > > >>  }
> > > >> +/*
> > > >> + * Send interrupt to vcpu via posted interrupt way.
> > > >> + * Return false if posted interrupt is not supported and the caller will
> > > >> + * roll back to old way(via set vIRR).
> > > >> + * Return true if posted interrupt is avalialbe, the interrupt is set
> > > >> + * in pir(posted interrupt requests):
> > > >> + * 1. If target vcpu is running(non-root mode), send posted interrupt
> > > >> + * notification to vcpu and hardware will sync pir to vIRR atomically.
> > > >> + * 2. If target vcpu isn't running(root mode), kick it to pick up the
> > > >> + * interrupt from pir in next vmentry.
> > > >> + */
> > > > The comment should go into previous patch. Also I prefer to not check
> > > > for posted interrupt inside the callback, but set it to NULL instead.
> > > > This way we avoid calling a callback on a hot path needlessly.
> > > It's make sense. So just follow the logic you mentioned above?
> > >  
> > Yes.
> > 
> > > >>  static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
> > > >>  {
> > > >>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > > >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > >> index 0baa90d..0981100 100644
> > > >> --- a/arch/x86/kvm/x86.c
> > > >> +++ b/arch/x86/kvm/x86.c
> > > >> @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
> > > >>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 				   
> > > >>  struct kvm_lapic_state *s) { +	kvm_x86_ops->sync_pir_to_irr(vcpu);
> > > >>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
> > > >>  
> > > >>  	return 0; @@ -5699,6 +5700,7 @@ static int vcpu_enter_guest(struct
> > > >>  kvm_vcpu *vcpu) 	}
> > > >>  
> > > >>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > > >>  +		kvm_x86_ops->sync_pir_to_irr(vcpu); 		inject_pending_event(vcpu);
> > > >>  
> > > >>  		/* enable NMI/IRQ window open exits if needed */
> > > >> @@ -5741,6 +5743,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > > >> 
> > > >>  	local_irq_disable();
> > > >> +	kvm_x86_ops->posted_intr_clear_on(vcpu);
> > > >> +
> > > > Why is this separate from pir_to_irr syncing?
> > > This is the result of discussion with Marcelo.
> > > It is more reasonable to put it here to avoid unnecessary posted interrupt between:
> > > 
> > > vcpu->mode = IN_GUEST_MODE;
> > > 
> > > <--interrupt may arrived here and this is unnecessary.
> > > 
> > > local_irq_disable();
> > > 
> > 
> > But this still can happen as far as I see:
> > 
> > vcpu0                                         vcpu1: 
> > pi_test_and_set_pir()
> > kvm_make_request(KVM_REQ_EVENT)
> >                                             if (KVM_REQ_EVENT)
> >                                                    sync_pir_to_irr()
> 						^^^^^^^^^^^^^^^^^^
> >                                             vcpu->mode = IN_GUEST_MODE;
> > if (vcpu->mode == IN_GUEST_MODE)
> >   if (!pi_test_and_set_on())
> >     apic->send_IPI_mask()
> >                                             --> IPI arrives here
> >                                             local_irq_disable();
> >                                             posted_intr_clear_on()
> > 
> > 
> > May be move vcpu->mode = IN_GUEST_MODE after local_irq_disable()?
> 
> Scenario is correct because injected PIR has been synced to IRR before
> guest entry.
I do not say it is not. Yang said that the code tries to prevent this
from happening, I showed that this still can happen. There is not harm,
just useless IPI.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html