On Wed, Nov 09, 2022 at 10:15:44PM -0800, Xin Li wrote: > To eliminate dispatching NMI/IRQ through the IDT, add > kvm_vmx_reinject_nmi_irq(), which calls external_interrupt() > for IRQ reinjection. > > Lastly replace calling a NMI/IRQ handler in an IDT descriptor > with calling kvm_vmx_reinject_nmi_irq(). > > Signed-off-by: H. Peter Anvin (Intel) <hpa@xxxxxxxxx> > Signed-off-by: Xin Li <xin3.li@xxxxxxxxx> Idem. > +#if IS_ENABLED(CONFIG_KVM_INTEL) > +/* > + * KVM VMX reinjects NMI/IRQ on its current stack, it's a sync > + * call thus the values in the pt_regs structure are not used in > + * executing NMI/IRQ handlers, except cs.RPL and flags.IF, which > + * are both always 0 in the VMX NMI/IRQ reinjection context. Thus > + * we simply allocate a zeroed pt_regs structure on current stack > + * to call external_interrupt(). > + */ > +void kvm_vmx_reinject_nmi_irq(u32 vector) noinstr ? > +{ > + struct pt_regs irq_regs; > + > + memset(&irq_regs, 0, sizeof(irq_regs)); > + > + if (vector == NMI_VECTOR) > + return exc_nmi(&irq_regs); > + > + external_interrupt(&irq_regs, vector); > +} > +EXPORT_SYMBOL_GPL(kvm_vmx_reinject_nmi_irq); > +#endif > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 63247c57c72c..b457e4888468 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -46,6 +46,7 @@ > #include <asm/mshyperv.h> > #include <asm/mwait.h> > #include <asm/spec-ctrl.h> > +#include <asm/traps.h> > #include <asm/virtext.h> > #include <asm/vmx.h> > > @@ -6758,15 +6759,11 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) > memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); > } > > -void vmx_do_interrupt_nmi_irqoff(unsigned long entry); > - > -static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, > - unsigned long entry) > +static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 vector) > { > - bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist; > - > - kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ); > - vmx_do_interrupt_nmi_irqoff(entry); > + kvm_before_interrupt(vcpu, vector == NMI_VECTOR ? > + KVM_HANDLING_NMI : KVM_HANDLING_IRQ); > + kvm_vmx_reinject_nmi_irq(vector); > kvm_after_interrupt(vcpu); > } > > @@ -6792,7 +6789,6 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu) > > static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) > { > - const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist; > u32 intr_info = vmx_get_intr_info(&vmx->vcpu); > > /* if exit due to PF check for async PF */ > @@ -6806,20 +6802,19 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) > kvm_machine_check(); > /* We need to handle NMIs before interrupts are enabled */ > else if (is_nmi(intr_info)) > - handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry); > + handle_interrupt_nmi_irqoff(&vmx->vcpu, NMI_VECTOR); > } > > static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) > { > u32 intr_info = vmx_get_intr_info(vcpu); > unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; > - gate_desc *desc = (gate_desc *)host_idt_base + vector; > > if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, > "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) > return; > > - handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); > + handle_interrupt_nmi_irqoff(vcpu, vector); > vcpu->arch.at_instruction_boundary = true; > } How does any of this work? You're calling into entry/noinstr code from a random context.