> > Jumping way back to providing a wrapper for FRED, if we do that, then > > there's no need to include calling.h, and the weird wrinkle about the > > ERET target kinda goes away too. E.g. provide this in > > arch/x86/entry/entry_64_fred.S > > > > .section .text, "ax" > > > > /* Note, this is instrumentation safe, and returns via RET, not ERETS! > > */ #if IS_ENABLED(CONFIG_KVM_INTEL) > > SYM_CODE_START(fred_irq_entry_from_kvm) > > FRED_ENTER > > call external_interrupt > > FRED_EXIT > > RET > > SYM_CODE_END(fred_irq_entry_from_kvm) > > EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm); > > #endif > > > > and then the KVM side for this particular chunk is more simply: > > > > lea 1f(%rip), %rax > > push %rax > > push $0 /* FRED error code, 0 for NMI and external > > interrupts */ > > > > \branch_insn \branch_target The call instruction here inserts a return address between the FRED stack frame just created and the GP regs pushed in the FRED wrapper, thus it doesn't work. I only realize it after looking at the stack layout in the Intel Simics emulator. > > 1: > > VMX_DO_EVENT_FRAME_END > > RET > > > Alternatively, the whole thing could be shoved into > > arch/x86/entry/entry_64_fred.S, but at a glance I don't think that > > would be a net positive due to the need to handle IRQs vs. NMIs. But following your idea of "the whole thing could be shoved into ...", plus the patch you posted, I get a patch as the following, which works and gets rid of the 2 warnings of "with modified stack frame". After calling external_interrupt() and POP_REGS, we can also use ERETS, thus to unify the stack adjustment. diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index de9e2dc70e40..2b638914c6ed 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -4,12 +4,87 @@ */ #include <asm/asm.h> +#include <asm/export.h> #include <asm/fred.h> +#include <asm/segment.h> #include "calling.h" .code64 +#if IS_ENABLED(CONFIG_KVM_INTEL) +.macro FRED_ENTRY_FROM_KVM event_func nmi=0 + ENDBR +#ifdef CONFIG_X86_FRED + push %rbp + mov %rsp, %rbp + + /* + * Don't check the FRED stack level, the call stack leading to this + * helper is effectively constant and shallow (relatively speaking). + * + * Emulate the FRED-defined redzone and stack alignment. + */ + sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp + and $FRED_STACK_FRAME_RSP_MASK, %rsp + + /* + * Start to push a FRED stack frame, which is always 64 bytes: + * + * +--------+-----------------+ + * | Bytes | Usage | + * +--------+-----------------+ + * | 63:56 | Reserved | + * | 55:48 | Event Data | + * | 47:40 | SS + Event Info | + * | 39:32 | RSP | + * | 31:24 | RFLAGS | + * | 23:16 | CS + Aux Info | + * | 15:8 | RIP | + * | 7:0 | Error Code | + * +--------+-----------------+ + */ + push $0 /* Reserved, must be 0 */ + push $0 /* Event data, 0 for IRQ/NMI */ + + shl $32, %rdi /* Event type and vector */ + .if \nmi + bts $FRED_SSX_NMI_BIT, %rdi + .endif + bts $FRED_SSX_64_BIT_MODE_BIT, %rdi + or $__KERNEL_DS, %rdi + push %rdi + push %rbp + pushf + mov $__KERNEL_CS, %rax + push %rax + + /* + * Unlike the IDT event delivery, FRED _always_ pushes an error code + * after pushing the return RIP, thus the CALL instruction CANNOT be + * used here to push the return RIP, otherwise there is no chance to + * push an error code before invoking the IRQ/NMI handler. + * + * Use LEA to get the return RIP and push it, then push an error code. + */ + lea 1f(%rip), %rax + push %rax /* Return RIP */ + push $0 /* Error code, 0 for IRQ/NMI */ + + PUSH_AND_CLEAR_REGS + movq %rsp, %rdi /* %rdi -> pt_regs */ + call \event_func + POP_REGS + ERETS +1: + pop %rbp + RET +#else /* CONFIG_X86_FRED */ + ud2 +#endif /* CONFIG_X86_FRED */ +.endm +#endif /* CONFIG_KVM_INTEL */ + .macro FRED_ENTER UNWIND_HINT_END_OF_STACK ENDBR @@ -22,6 +97,16 @@ POP_REGS .endm + .section .text, "ax" + +/* Note, this is instrumentation safe, and returns via RET */ +#if IS_ENABLED(CONFIG_KVM_INTEL) +SYM_CODE_START(fred_irq_entry_from_kvm) + FRED_ENTRY_FROM_KVM external_interrupt +SYM_CODE_END(fred_irq_entry_from_kvm) +EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm); +#endif + .section .noinstr.text, "ax" /* @@ -55,3 +140,10 @@ SYM_CODE_START_NOALIGN(fred_entrypoint_kernel) FRED_EXIT ERETS SYM_CODE_END(fred_entrypoint_kernel) + +#if IS_ENABLED(CONFIG_KVM_INTEL) +SYM_CODE_START(fred_nmi_entry_from_kvm) + FRED_ENTRY_FROM_KVM fred_exc_nmi nmi=1 +SYM_CODE_END(fred_nmi_entry_from_kvm) +EXPORT_SYMBOL_GPL(fred_nmi_entry_from_kvm); +#endif diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index 3c91f0eae62e..c7288213de14 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -122,6 +122,10 @@ DECLARE_FRED_HANDLER(fred_exc_double_fault); extern asmlinkage __visible void fred_entrypoint_user(void); extern asmlinkage __visible void fred_entrypoint_kernel(void); +/* For KVM VMX to handle IRQs in IRQ induced VM exits */ +extern asmlinkage __visible void fred_irq_entry_from_kvm(unsigned int event_info); +extern asmlinkage __visible void fred_nmi_entry_from_kvm(unsigned int event_info); + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_X86_FRED */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index be275a0410a8..f6f557c6329e 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -40,6 +40,20 @@ push %_ASM_BP mov %_ASM_SP, %_ASM_BP + /* + * Start to push an IDT IRQ/NMI stack frame, which is 40 bytes on + * x86_64 and 24 bytes on x86_32: + * + * +-------+-------------------+ + * | Bytes | Usage | + * +-------+-------------------+ + * | 39:32 | SS (x86_64 only) | + * | 31:24 | RSP (x86_64 only) | + * | 23:16 | RFLAGS | + * | 15:8 | CS | + * | 7:0 | RIP | + * +-------+-------------------+ + */ #ifdef CONFIG_X86_64 /* * Align RSP to a 16-byte boundary (to emulate CPU behavior) before diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index df461f387e20..83c89239fcff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6961,14 +6961,16 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) { u32 intr_info = vmx_get_intr_info(vcpu); unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; - gate_desc *desc = (gate_desc *)host_idt_base + vector; if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, "unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); - vmx_do_interrupt_irqoff(gate_offset(desc)); + if (cpu_feature_enabled(X86_FEATURE_FRED)) + fred_irq_entry_from_kvm((EVENT_TYPE_HWINT << 16) | vector); + else + vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector)); kvm_after_interrupt(vcpu); vcpu->arch.at_instruction_boundary = true; @@ -7254,7 +7256,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI && is_nmi(vmx_get_intr_info(vcpu))) { kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); - vmx_do_nmi_irqoff(); + if (cpu_feature_enabled(X86_FEATURE_FRED)) + fred_nmi_entry_from_kvm((EVENT_TYPE_NMI << 16) | NMI_VECTOR); + else + vmx_do_nmi_irqoff(); kvm_after_interrupt(vcpu); }