On Fri, Feb 22, 2013 at 09:42:32PM +0800, Yang Zhang wrote: > From: Yang Zhang <yang.z.zhang@xxxxxxxxx> > > Posted Interrupt allows APIC interrupts to inject into guest directly > without any vmexit. > > - When delivering a interrupt to guest, if target vcpu is running, > update Posted-interrupt requests bitmap and send a notification event > to the vcpu. Then the vcpu will handle this interrupt automatically, > without any software involvemnt. > > - If target vcpu is not running or there already a notification event > pending in the vcpu, do nothing. The interrupt will be handled by > next vm entry > > Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > --- > diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c > index e4595f1..64616cc 100644 > --- a/arch/x86/kernel/irq.c > +++ b/arch/x86/kernel/irq.c > @@ -228,6 +228,26 @@ void smp_x86_platform_ipi(struct pt_regs *regs) > set_irq_regs(old_regs); > } > > +#ifdef CONFIG_HAVE_KVM > +/* > + * Handler for POSTED_INTERRUPT_VECTOR. > + */ > +void smp_kvm_posted_intr_ipi(struct pt_regs *regs) > +{ > + struct pt_regs *old_regs = set_irq_regs(regs); > + > + ack_APIC_irq(); > + > + irq_enter(); > + > + exit_idle(); > + > + irq_exit(); > + > + set_irq_regs(old_regs); > +} > +#endif > + Add per-cpu counters, similarly to other handlers in the same file. > @@ -379,6 +398,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) > if (!apic->irr_pending) > return -1; > > + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); > result = apic_search_irr(apic); > ASSERT(result == -1 || result >= 16); kvm_x86_ops->sync_pir_to_irr() should be called from vcpu_enter_guest, before inject_pending_event. if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { -> inject_pending_event ... } > @@ -685,6 +705,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > { > int result = 0; > struct kvm_vcpu *vcpu = apic->vcpu; > + bool delivered = false; > > switch (delivery_mode) { > case APIC_DM_LOWEST: > @@ -700,7 +721,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > } else > apic_clear_vector(vector, apic->regs + APIC_TMR); > > - result = !apic_test_and_set_irr(vector, apic); > + if (!kvm_x86_ops->deliver_posted_interrupt(vcpu, vector, > + &result, &delivered)) > + result = !apic_test_and_set_irr(vector, apic); > + > trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, > trig_mode, vector, !result); > if (!result) { > @@ -710,8 +734,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > break; > } > > - kvm_make_request(KVM_REQ_EVENT, vcpu); > - kvm_vcpu_kick(vcpu); > + if (!delivered) { > + kvm_make_request(KVM_REQ_EVENT, vcpu); > + kvm_vcpu_kick(vcpu); > + } This set bit / kick pair should be for non-APICv only (please check for it directly). > + return test_bit(vector, (unsigned long *)pi_desc->pir); > +} > + > +static void pi_set_pir(int vector, struct pi_desc *pi_desc) > +{ > + __set_bit(vector, (unsigned long *)pi_desc->pir); > +} This must be locked atomic operation (set_bit). > + > struct vcpu_vmx { > struct kvm_vcpu vcpu; > unsigned long host_rsp; > @@ -429,6 +465,10 @@ struct vcpu_vmx { > > bool rdtscp_enabled; > > + /* Posted interrupt descriptor */ > + struct pi_desc pi_desc; > + spinlock_t pi_lock; Don't see why the lock is necessary. Please use the following pseudocode for vmx_deliver_posted_interrupt: vmx_deliver_posted_interrupt(), returns 'bool injected'. 1. orig_irr = read irr from vapic page 2. if (orig_irr != 0) 3. return false; 4. kvm_make_request(KVM_REQ_EVENT) 5. bool injected = !test_and_set_bit(PIR) 6. if (vcpu->guest_mode && injected) 7. if (test_and_set_bit(PIR notification bit)) 8. send PIR IPI 9. return injected On vcpu_enter_guest: if (kvm_check_request(KVM_REQ_EVENT)) { pir->virr sync (*) ... } ... vcpu->mode = IN_GUEST_MODE; local_irq_disable clear pir notification bit unconditionally (*) Right after local_irq_disable. > +static bool vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, > + int vector, int *result, bool *delivered) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + unsigned long flags; > + > + if (!vmx_vm_has_apicv(vcpu->kvm)) > + return false; > + > + spin_lock_irqsave(&vmx->pi_lock, flags); > + if (pi_test_pir(vector, &vmx->pi_desc) || > + kvm_apic_test_irr(vector, vcpu->arch.apic)) { > + spin_unlock_irqrestore(&vmx->pi_lock, flags); > + return true; > + } > + > + pi_set_pir(vector, &vmx->pi_desc); > + *result = 1; > + if (!pi_test_and_set_on(&vmx->pi_desc) && > + (vcpu->mode == IN_GUEST_MODE)) { > + kvm_make_request(KVM_REQ_EVENT, vcpu); > + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), > + POSTED_INTR_VECTOR); > + *delivered = true; > + } > + spin_unlock_irqrestore(&vmx->pi_lock, flags); > + > + return true; > +} Please confirm whether a spinlock is necessary with the pseudocode above. > +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + unsigned long flags; > + > + if (!vmx_vm_has_apicv(vcpu->kvm)) > + return; > + > + spin_lock_irqsave(&vmx->pi_lock, flags); > + if (!pi_test_and_clear_on(&vmx->pi_desc)) { This needs to move to vcpu_enter_guest, after local_irq_disabled, unconditionally, as noted. > @@ -2679,6 +2679,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, > struct kvm_lapic_state *s) > { > + kvm_x86_ops->sync_pir_to_irr(vcpu); > memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); > > return 0; > -- > 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html