On Tue, Mar 12, 2013 at 12:44:41PM +0100, Jan Kiszka wrote: > A VCPU sending INIT or SIPI to some other VCPU races for setting the > remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED > was overwritten by kvm_emulate_halt and, thus, got lost. > > This introduces APIC events for those two signals, keeping them in > kvm_apic until kvm_apic_accept_events is run over the target vcpu > context. kvm_apic_has_events reports to kvm_arch_vcpu_runnable if there > are pending events, thus if vcpu blocking should end. > > The patch comes with the side effect of effectively obsoleting > KVM_MP_STATE_SIPI_RECEIVED. We still accept it from user space, but > immediately translate it to KVM_MP_STATE_INIT_RECEIVED + KVM_APIC_SIPI. > The vcpu itself will no longer enter the KVM_MP_STATE_SIPI_RECEIVED > state. That also means we no longer exit to user space after receiving a > SIPI event. > > Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> > --- > > This doesn't fix the wrong behaviour on INIT for the BSP yet. Leaving > this honor to Paolo. > > I didn't try porting any INIT handling for nested on top yet but I > think it should be feasible - once we know their semantics for sure, at > least on Intel... > > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/lapic.c | 41 +++++++++++++++++++++++++++++++------- > arch/x86/kvm/lapic.h | 7 ++++++ > arch/x86/kvm/x86.c | 39 ++++++++++++++++++++---------------- > 4 files changed, 63 insertions(+), 25 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 348d859..2d28e76 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1002,6 +1002,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); > int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); > int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); > int kvm_cpu_get_interrupt(struct kvm_vcpu *v); > +void kvm_vcpu_reset(struct kvm_vcpu *vcpu); > > void kvm_define_shared_msr(unsigned index, u32 msr); > void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 02b51dd..4a21a6b 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -731,7 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > case APIC_DM_INIT: > if (!trig_mode || level) { > result = 1; > - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > + set_bit(KVM_APIC_INIT, &apic->pending_events); > kvm_make_request(KVM_REQ_EVENT, vcpu); > kvm_vcpu_kick(vcpu); > } else { > @@ -743,13 +743,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, > case APIC_DM_STARTUP: > apic_debug("SIPI to vcpu %d vector 0x%02x\n", > vcpu->vcpu_id, vector); > - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > - result = 1; > - vcpu->arch.sipi_vector = vector; > - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; > - kvm_make_request(KVM_REQ_EVENT, vcpu); > - kvm_vcpu_kick(vcpu); > - } > + result = 1; > + apic->sipi_vector = vector; > + /* make sure sipi_vector is visible for the receiver */ > + smp_wmb(); > + set_bit(KVM_APIC_SIPI, &apic->pending_events); > + kvm_make_request(KVM_REQ_EVENT, vcpu); > + kvm_vcpu_kick(vcpu); > break; > > case APIC_DM_EXTINT: > @@ -1860,6 +1860,31 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) > addr); > } > > +bool kvm_apic_has_events(struct kvm_vcpu *vcpu) > +{ > + return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; The function is called only from kvm_arch_vcpu_runnable() and kvm_arch_vcpu_runnable() is called only if irqchip is in kernel, so I think kvm_vcpu_has_lapic() can be dropped. > +} > + > +void kvm_apic_accept_events(struct kvm_vcpu *vcpu) > +{ > + struct kvm_lapic *apic = vcpu->arch.apic; > + > + if (!kvm_vcpu_has_lapic(vcpu)) > + return; > + > + if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) > + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > + if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && > + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > + vcpu->arch.sipi_vector = apic->sipi_vector; > + pr_debug("vcpu %d received sipi with vector # %x\n", > + vcpu->vcpu_id, vcpu->arch.sipi_vector); > + kvm_lapic_reset(vcpu); > + kvm_vcpu_reset(vcpu); Shouldn't we reset on KVM_APIC_INIT? > + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > + } > +} > + > void kvm_lapic_init(void) > { > /* do not patch jump label more than once per second */ > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 1676d34..ef3f4ef 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -5,6 +5,9 @@ > > #include <linux/kvm_host.h> > > +#define KVM_APIC_INIT 0 > +#define KVM_APIC_SIPI 1 > + > struct kvm_timer { > struct hrtimer timer; > s64 period; /* unit: ns */ > @@ -32,13 +35,17 @@ struct kvm_lapic { > void *regs; > gpa_t vapic_addr; > struct page *vapic_page; > + unsigned long pending_events; > + unsigned int sipi_vector; > }; > int kvm_create_lapic(struct kvm_vcpu *vcpu); > void kvm_free_lapic(struct kvm_vcpu *vcpu); > > int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); > +bool kvm_apic_has_events(struct kvm_vcpu *vcpu); > int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); > int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); > +void kvm_apic_accept_events(struct kvm_vcpu *vcpu); > void kvm_lapic_reset(struct kvm_vcpu *vcpu); > u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); > void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index b891ac3..a0b8041 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; > > static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); > > -static void kvm_vcpu_reset(struct kvm_vcpu *vcpu); > - > static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) > { > int i; > @@ -5663,6 +5661,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > bool req_immediate_exit = 0; > > if (vcpu->requests) { > + kvm_apic_accept_events(vcpu); > + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > + r = 1; > + goto out; > + } Why not call it under kvm_check_request(KVM_REQ_EVENT, vcpu) bellow? > if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) > kvm_mmu_unload(vcpu); > if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) > @@ -5847,14 +5850,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) > int r; > struct kvm *kvm = vcpu->kvm; > > - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { > - pr_debug("vcpu %d received sipi with vector # %x\n", > - vcpu->vcpu_id, vcpu->arch.sipi_vector); > - kvm_lapic_reset(vcpu); > - kvm_vcpu_reset(vcpu); > - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > - } > - > vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); > r = vapic_enter(vcpu); > if (r) { > @@ -5871,8 +5866,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) > srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); > kvm_vcpu_block(vcpu); > vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); > - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) > - { > + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { > + kvm_apic_accept_events(vcpu); > switch(vcpu->arch.mp_state) { > case KVM_MP_STATE_HALTED: > vcpu->arch.mp_state = > @@ -5880,7 +5875,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) > case KVM_MP_STATE_RUNNABLE: > vcpu->arch.apf.halted = false; > break; > - case KVM_MP_STATE_SIPI_RECEIVED: > + case KVM_MP_STATE_INIT_RECEIVED: > + break; > default: > r = -EINTR; > break; > @@ -5901,7 +5897,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) > ++vcpu->stat.request_irq_exits; > } > > - kvm_check_async_pf_completion(vcpu); > + if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) > + kvm_check_async_pf_completion(vcpu); Do not understand why is this 'if' needed. > > if (signal_pending(current)) { > r = -EINTR; > @@ -6015,6 +6012,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > > if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { > kvm_vcpu_block(vcpu); > + kvm_apic_accept_events(vcpu); > clear_bit(KVM_REQ_UNHALT, &vcpu->requests); > r = -EAGAIN; > goto out; > @@ -6171,6 +6169,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, > int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, > struct kvm_mp_state *mp_state) > { > + kvm_apic_accept_events(vcpu); > mp_state->mp_state = vcpu->arch.mp_state; > return 0; > } > @@ -6178,7 +6177,13 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, > int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, > struct kvm_mp_state *mp_state) > { > - vcpu->arch.mp_state = mp_state->mp_state; > + if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { > + if (!kvm_vcpu_has_lapic(vcpu)) > + return -EINVAL; > + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > + set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); > + } else > + vcpu->arch.mp_state = mp_state->mp_state; > kvm_make_request(KVM_REQ_EVENT, vcpu); > return 0; > } > @@ -6515,7 +6520,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > kvm_x86_ops->vcpu_free(vcpu); > } > > -static void kvm_vcpu_reset(struct kvm_vcpu *vcpu) > +void kvm_vcpu_reset(struct kvm_vcpu *vcpu) > { > atomic_set(&vcpu->arch.nmi_queued, 0); > vcpu->arch.nmi_pending = 0; > @@ -6988,7 +6993,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) > return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && > !vcpu->arch.apf.halted) > || !list_empty_careful(&vcpu->async_pf.done) > - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED > + || kvm_apic_has_events(vcpu) > || atomic_read(&vcpu->arch.nmi_queued) || > (kvm_arch_interrupt_allowed(vcpu) && > kvm_cpu_has_interrupt(vcpu)); > -- > 1.7.3.4 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html