> > To eliminate dispatching NMI/IRQ through the IDT, add > > kvm_vmx_reinject_nmi_irq(), which calls external_interrupt() for IRQ > > reinjection. > > > > Lastly replace calling a NMI/IRQ handler in an IDT descriptor with > > calling kvm_vmx_reinject_nmi_irq(). > > > > Signed-off-by: H. Peter Anvin (Intel) <hpa@xxxxxxxxx> > > Signed-off-by: Xin Li <xin3.li@xxxxxxxxx> > > Idem. > > > > +#if IS_ENABLED(CONFIG_KVM_INTEL) > > +/* > > + * KVM VMX reinjects NMI/IRQ on its current stack, it's a sync > > + * call thus the values in the pt_regs structure are not used in > > + * executing NMI/IRQ handlers, except cs.RPL and flags.IF, which > > + * are both always 0 in the VMX NMI/IRQ reinjection context. Thus > > + * we simply allocate a zeroed pt_regs structure on current stack > > + * to call external_interrupt(). > > + */ > > +void kvm_vmx_reinject_nmi_irq(u32 vector) > > noinstr ? Seems it's not needed to me. > > > +{ > > + struct pt_regs irq_regs; > > + > > + memset(&irq_regs, 0, sizeof(irq_regs)); > > + > > + if (vector == NMI_VECTOR) > > + return exc_nmi(&irq_regs); > > + > > + external_interrupt(&irq_regs, vector); } > > +EXPORT_SYMBOL_GPL(kvm_vmx_reinject_nmi_irq); > > +#endif > > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index > > 63247c57c72c..b457e4888468 100644 > > --- a/arch/x86/kvm/vmx/vmx.c > > +++ b/arch/x86/kvm/vmx/vmx.c > > @@ -46,6 +46,7 @@ > > #include <asm/mshyperv.h> > > #include <asm/mwait.h> > > #include <asm/spec-ctrl.h> > > +#include <asm/traps.h> > > #include <asm/virtext.h> > > #include <asm/vmx.h> > > > > @@ -6758,15 +6759,11 @@ static void vmx_apicv_post_state_restore(struct > kvm_vcpu *vcpu) > > memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); } > > > > -void vmx_do_interrupt_nmi_irqoff(unsigned long entry); > > - > > -static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, > > - unsigned long entry) > > +static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 > > +vector) > > { > > - bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist; > > - > > - kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : > KVM_HANDLING_IRQ); > > - vmx_do_interrupt_nmi_irqoff(entry); > > + kvm_before_interrupt(vcpu, vector == NMI_VECTOR ? > > + KVM_HANDLING_NMI : > KVM_HANDLING_IRQ); > > + kvm_vmx_reinject_nmi_irq(vector); > > kvm_after_interrupt(vcpu); > > } > > > > @@ -6792,7 +6789,6 @@ static void handle_nm_fault_irqoff(struct > > kvm_vcpu *vcpu) > > > > static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) { > > - const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist; > > u32 intr_info = vmx_get_intr_info(&vmx->vcpu); > > > > /* if exit due to PF check for async PF */ @@ -6806,20 +6802,19 @@ > > static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) > > kvm_machine_check(); > > /* We need to handle NMIs before interrupts are enabled */ > > else if (is_nmi(intr_info)) > > - handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry); > > + handle_interrupt_nmi_irqoff(&vmx->vcpu, NMI_VECTOR); > > } > > > > static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) > > { > > u32 intr_info = vmx_get_intr_info(vcpu); > > unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; > > - gate_desc *desc = (gate_desc *)host_idt_base + vector; > > > > if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, > > "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) > > return; > > > > - handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); > > + handle_interrupt_nmi_irqoff(vcpu, vector); > > vcpu->arch.at_instruction_boundary = true; } > > How does any of this work? You're calling into entry/noinstr code from a > random context. Can you please elaborate your concern a bit more? We are here in handle_external_interrupt_irqoff () because an external interrupt happened when a guest was running and the CPU vm-exits to host to dispatch to the IRQ handler with IRQ disabled.