On 2011-09-15 16:45, Avi Kivity wrote: > If simultaneous NMIs happen, we're supposed to queue the second > and next (collapsing them), but currently we sometimes collapse > the second into the first. Can you describe the race in a few more details here ("sometimes" sounds like "I don't know when" :) )? > > Fix by using a counter for pending NMIs instead of a bool; collapsing > happens when the NMI window reopens. > > Signed-off-by: Avi Kivity <avi@xxxxxxxxxx> > --- > > Not sure whether this interacts correctly with NMI-masked-by-STI or with > save/restore. > > arch/x86/include/asm/kvm_host.h | 2 +- > arch/x86/kvm/svm.c | 1 + > arch/x86/kvm/vmx.c | 3 ++- > arch/x86/kvm/x86.c | 33 +++++++++++++++------------------ > arch/x86/kvm/x86.h | 7 +++++++ > 5 files changed, 26 insertions(+), 20 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 6ab4241..3a95885 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -413,7 +413,7 @@ struct kvm_vcpu_arch { > u32 tsc_catchup_mult; > s8 tsc_catchup_shift; > > - bool nmi_pending; > + atomic_t nmi_pending; > bool nmi_injected; > > struct mtrr_state_type mtrr_state; > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index e7ed4b1..d4c792f 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -3609,6 +3609,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) > if ((svm->vcpu.arch.hflags & HF_IRET_MASK) > && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { > svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); > + kvm_collapse_pending_nmis(&svm->vcpu); > kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); > } > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index a0d6bd9..745dadb 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4761,6 +4761,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) > cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; > vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); > ++vcpu->stat.nmi_window_exits; > + kvm_collapse_pending_nmis(vcpu); > kvm_make_request(KVM_REQ_EVENT, vcpu); > > return 1; > @@ -5790,7 +5791,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > if (vmx_interrupt_allowed(vcpu)) { > vmx->soft_vnmi_blocked = 0; > } else if (vmx->vnmi_blocked_time > 1000000000LL && > - vcpu->arch.nmi_pending) { > + atomic_read(&vcpu->arch.nmi_pending)) { > /* > * This CPU don't support us in finding the end of an > * NMI-blocked window if the guest runs with IRQs > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6b37f18..d4f45e0 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -359,8 +359,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) > > void kvm_inject_nmi(struct kvm_vcpu *vcpu) > { > + atomic_inc(&vcpu->arch.nmi_pending); > kvm_make_request(KVM_REQ_EVENT, vcpu); > - vcpu->arch.nmi_pending = 1; Does the reordering matter? Do we need barriers? > } > EXPORT_SYMBOL_GPL(kvm_inject_nmi); > > @@ -2844,7 +2844,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, > KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); > > events->nmi.injected = vcpu->arch.nmi_injected; > - events->nmi.pending = vcpu->arch.nmi_pending; > + events->nmi.pending = atomic_read(&vcpu->arch.nmi_pending) != 0; > events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); > events->nmi.pad = 0; > > @@ -2878,7 +2878,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, > > vcpu->arch.nmi_injected = events->nmi.injected; > if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) > - vcpu->arch.nmi_pending = events->nmi.pending; > + atomic_set(&vcpu->arch.nmi_pending, events->nmi.pending); > kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); > > if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) > @@ -4763,7 +4763,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) > kvm_set_rflags(vcpu, ctxt->eflags); > > if (irq == NMI_VECTOR) > - vcpu->arch.nmi_pending = false; > + atomic_set(&vcpu->arch.nmi_pending, 0); > else > vcpu->arch.interrupt.pending = false; > > @@ -5570,9 +5570,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) > } > > /* try to inject new event if pending */ > - if (vcpu->arch.nmi_pending) { > + if (atomic_read(&vcpu->arch.nmi_pending)) { > if (kvm_x86_ops->nmi_allowed(vcpu)) { > - vcpu->arch.nmi_pending = false; > + atomic_dec(&vcpu->arch.nmi_pending); Here we lost NMIs in the past by overwriting nmi_pending while another one was already queued, right? > vcpu->arch.nmi_injected = true; > kvm_x86_ops->set_nmi(vcpu); > } > @@ -5604,10 +5604,14 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) > } > } > > +static bool nmi_in_progress(struct kvm_vcpu *vcpu) > +{ > + return vcpu->arch.nmi_injected || kvm_x86_ops->get_nmi_mask(vcpu); > +} > + > static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > { > int r; > - bool nmi_pending; > bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && > vcpu->run->request_interrupt_window; > > @@ -5654,19 +5658,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > if (unlikely(r)) > goto out; > > - /* > - * An NMI can be injected between local nmi_pending read and > - * vcpu->arch.nmi_pending read inside inject_pending_event(). > - * But in that case, KVM_REQ_EVENT will be set, which makes > - * the race described above benign. > - */ > - nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); > - > if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { > inject_pending_event(vcpu); > > /* enable NMI/IRQ window open exits if needed */ > - if (nmi_pending) > + if (atomic_read(&vcpu->arch.nmi_pending) > + && nmi_in_progress(vcpu)) Is nmi_pending && !nmi_in_progress possible at all? Is it rather a BUG condition? If not, what will happen next? > kvm_x86_ops->enable_nmi_window(vcpu); > else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) > kvm_x86_ops->enable_irq_window(vcpu); > @@ -6374,7 +6371,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > > int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) > { > - vcpu->arch.nmi_pending = false; > + atomic_set(&vcpu->arch.nmi_pending, 0); > vcpu->arch.nmi_injected = false; > > vcpu->arch.switch_db_regs = 0; > @@ -6649,7 +6646,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) > !vcpu->arch.apf.halted) > || !list_empty_careful(&vcpu->async_pf.done) > || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED > - || vcpu->arch.nmi_pending || > + || atomic_read(&vcpu->arch.nmi_pending) || > (kvm_arch_interrupt_allowed(vcpu) && > kvm_cpu_has_interrupt(vcpu)); > } > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > index d36fe23..ed04e2b 100644 > --- a/arch/x86/kvm/x86.h > +++ b/arch/x86/kvm/x86.h > @@ -125,4 +125,11 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, > gva_t addr, void *val, unsigned int bytes, > struct x86_exception *exception); > > +static inline void kvm_collapse_pending_nmis(struct kvm_vcpu *vcpu) > +{ > + /* Collapse all NMIs queued while an NMI handler was running to one */ > + if (atomic_read(&vcpu->arch.nmi_pending)) > + atomic_set(&vcpu->arch.nmi_pending, 1); Is it OK that NMIs injected after the collapse will increment this to > 1 again? Or is that impossible? > +} > + > #endif Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html