> > +#include "../../entry/calling.h" > > Rather than do the low level PUSH_REGS and POP_REGS, I vote to have core code > expose a FRED-specific wrapper for invoking external_interrupt(). More below. Nice idea! > > + /* > > + * A FRED stack frame has extra 16 bytes of information pushed at the > > + * regular stack top compared to an IDT stack frame. > > There is pretty much no chance that anyone remembers the layout of an IDT stack > frame off the top of their head. I.e. saying "FRED has 16 bytes more" isn't all > that useful. It also fails to capture the fact that FRED stuff a hell of a lot > more information in those "common" 48 bytes. > > It'll be hard/impossible to capture all of the overload info in a comment, but > showing the actual layout of the frame would be super helpful, e.g. something > like > this > > /* > * FRED stack frames are always 64 bytes: > * > * ------------------------------ > * | Bytes | Usage | > * -----------------------------| > * | 63:56 | Reserved | > * | 55:48 | Event Data | > * | 47:40 | SS + Event Info | > * | 39:32 | RSP | > * | 31:24 | RFLAGS | > * | 23:16 | CS + Aux Info | > * | 15:8 | RIP | > * | 7:0 | Error Code | > * ------------------------------ > */ > * > * Use LEA to get the return RIP and push it, then push an error code. > * Note, only NMI handling does an ERETS to the target! IRQ handling > * doesn't need to unmask NMIs and so simply uses CALL+RET, i.e. the > * RIP pushed here is only truly consumed for NMIs! I take these as ASM code does need more love, i.e., nice comments :/ Otherwise only more confusion. > > Jumping way back to providing a wrapper for FRED, if we do that, then there's no > need to include calling.h, and the weird wrinkle about the ERET target kinda goes > away too. E.g. provide this in arch/x86/entry/entry_64_fred.S > > .section .text, "ax" > > /* Note, this is instrumentation safe, and returns via RET, not ERETS! */ > #if IS_ENABLED(CONFIG_KVM_INTEL) > SYM_CODE_START(fred_irq_entry_from_kvm) > FRED_ENTER > call external_interrupt > FRED_EXIT > RET > SYM_CODE_END(fred_irq_entry_from_kvm) > EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm); > #endif > > and then the KVM side for this particular chunk is more simply: > > lea 1f(%rip), %rax > push %rax > push $0 /* FRED error code, 0 for NMI and external > interrupts */ > > \branch_insn \branch_target > 1: > VMX_DO_EVENT_FRAME_END > RET > > > Alternatively, the whole thing could be shoved into > arch/x86/entry/entry_64_fred.S, > but at a glance I don't think that would be a net positive due to the need to > handle > IRQs vs. NMIs. > > > + \branch_insn \branch_target > > + > > + .if \nmi == 0 > > + POP_REGS > > + .endif > > + > > +1: > > + /* > > + * "Restore" RSP from RBP, even though IRET has already unwound RSP to > > As mentioned above, this is incorrect on two fronts. > > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > index 0ecf4be2c6af..4e90c69a92bf 100644 > > --- a/arch/x86/kvm/vmx/vmx.c > > +++ b/arch/x86/kvm/vmx/vmx.c > > @@ -6890,6 +6890,14 @@ static void vmx_apicv_post_state_restore(struct > kvm_vcpu *vcpu) > > memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); > > } > > > > +#ifdef CONFIG_X86_FRED > > +void vmx_do_fred_interrupt_irqoff(unsigned int vector); > > +void vmx_do_fred_nmi_irqoff(unsigned int vector); > > +#else > > +#define vmx_do_fred_interrupt_irqoff(x) BUG() > > +#define vmx_do_fred_nmi_irqoff(x) BUG() > > +#endif > > My slight preference is to open code the BUG() as a ud2 in assembly, purely to > avoid more #ifdefs. > > > + > > void vmx_do_interrupt_irqoff(unsigned long entry); > > void vmx_do_nmi_irqoff(void); > > > > @@ -6932,14 +6940,16 @@ static void handle_external_interrupt_irqoff(struct > kvm_vcpu *vcpu) > > { > > u32 intr_info = vmx_get_intr_info(vcpu); > > unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; > > - gate_desc *desc = (gate_desc *)host_idt_base + vector; > > > > if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, > > "unexpected VM-Exit interrupt info: 0x%x", intr_info)) > > return; > > > > kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); > > - vmx_do_interrupt_irqoff(gate_offset(desc)); > > + if (cpu_feature_enabled(X86_FEATURE_FRED)) > > + vmx_do_fred_interrupt_irqoff(vector); /* Event type is 0 */ > > I strongly prefer to use code to document what's going on. E.g. the tail comment > just left me wondering, what event type is 0? Whereas this makes it quite clear > that KVM is signaling a hardware interrupt. The fact that it's a nop as far as > code generation goes is irrelevant. > > vmx_do_fred_interrupt_irqoff((EVENT_TYPE_HWINT << 16) | vector); Better readability. > > > + else > > + vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base > + vector)); > > kvm_after_interrupt(vcpu); > > > > vcpu->arch.at_instruction_boundary = true; > > Here's a diff for (hopefully) everything I've suggested above. Thanks a lot! I will test it and update the patch in this mail thread. > > --- > arch/x86/entry/entry_64_fred.S | 17 ++++++- > arch/x86/kernel/traps.c | 5 -- > arch/x86/kvm/vmx/vmenter.S | 84 +++++++++++++++------------------- > arch/x86/kvm/vmx/vmx.c | 7 +-- > 4 files changed, 55 insertions(+), 58 deletions(-) > > diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S > index 12063267d2ac..a973c0bd29f6 100644 > --- a/arch/x86/entry/entry_64_fred.S > +++ b/arch/x86/entry/entry_64_fred.S > @@ -10,7 +10,6 @@ > #include "calling.h" > > .code64 > - .section ".noinstr.text", "ax" > > .macro FRED_ENTER > UNWIND_HINT_END_OF_STACK > @@ -24,6 +23,22 @@ > POP_REGS > .endm > > + .section .text, "ax" > + > +/* Note, this is instrumentation safe, and returns via RET, not ERETS! */ > +#if IS_ENABLED(CONFIG_KVM_INTEL) > +SYM_CODE_START(fred_irq_entry_from_kvm) > + FRED_ENTER > + call external_interrupt > + FRED_EXIT > + RET > +SYM_CODE_END(fred_irq_entry_from_kvm) > +EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm); > +#endif > + > + .section ".noinstr.text", "ax" > + > + > /* > * The new RIP value that FRED event delivery establishes is > * IA32_FRED_CONFIG & ~FFFH for events that occur in ring 3. > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index 21eeba7b188f..cbcb83c71dab 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -1566,11 +1566,6 @@ int external_interrupt(struct pt_regs *regs) > return 0; > } > > -#if IS_ENABLED(CONFIG_KVM_INTEL) > -/* For KVM VMX to handle IRQs in IRQ induced VM exits. */ > -EXPORT_SYMBOL_GPL(external_interrupt); > -#endif > - > #endif /* CONFIG_X86_64 */ > > void __init trap_init(void) > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > index 79a4c91d9434..e25df565c3f8 100644 > --- a/arch/x86/kvm/vmx/vmenter.S > +++ b/arch/x86/kvm/vmx/vmenter.S > @@ -9,7 +9,6 @@ > #include <asm/segment.h> > #include "kvm-asm-offsets.h" > #include "run_flags.h" > -#include "../../entry/calling.h" > > #define WORD_SIZE (BITS_PER_LONG / 8) > > @@ -33,15 +32,31 @@ > #define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE > #endif > > +.macro VMX_DO_EVENT_FRAME_BEGIN > + /* > + * Unconditionally create a stack frame, getting the correct RSP on the > + * stack (for x86-64) would take two instructions anyways, and RBP can > + * be used to restore RSP to make objtool happy (see below). > + */ > + push %_ASM_BP > + mov %_ASM_SP, %_ASM_BP > +.endm > + > +.macro VMX_DO_EVENT_FRAME_END > + /* > + * "Restore" RSP from RBP, even though {E,I}RET has already unwound RSP > + * to the correct value *in most cases*. KVM's IRQ handling with FRED > + * doesn't do ERETS, and objtool doesn't know the callee will IRET/ERET > + * and, without the explicit restore, thinks the stack is getting walloped. > + * Using an unwind hint is problematic due to x86-64's dynamic alignment. > + */ > + mov %_ASM_BP, %_ASM_SP > + pop %_ASM_BP > +.endm > + > #ifdef CONFIG_X86_FRED > .macro VMX_DO_FRED_EVENT_IRQOFF branch_insn branch_target nmi=0 > - /* > - * Unconditionally create a stack frame, getting the correct RSP on the > - * stack (for x86-64) would take two instructions anyways, and RBP can > - * be used to restore RSP to make objtool happy (see below). > - */ > - push %_ASM_BP > - mov %_ASM_SP, %_ASM_BP > + VMX_DO_EVENT_FRAME_BEGIN > > /* > * Don't check the FRED stack level, the call stack leading to this > @@ -78,43 +93,23 @@ > * push an error code before invoking the IRQ/NMI handler. > * > * Use LEA to get the return RIP and push it, then push an error code. > + * Note, only NMI handling does an ERETS to the target! IRQ handling > + * doesn't need to unmask NMIs and so simply uses CALL+RET, i.e. the > + * RIP pushed here is only truly consumed for NMIs! > */ > lea 1f(%rip), %rax > push %rax > push $0 /* FRED error code, 0 for NMI and external > interrupts */ > > - .if \nmi == 0 > - PUSH_REGS > - mov %rsp, %rdi > - .endif > - > \branch_insn \branch_target > - > - .if \nmi == 0 > - POP_REGS > - .endif > - > 1: > - /* > - * "Restore" RSP from RBP, even though IRET has already unwound RSP to > - * the correct value. objtool doesn't know the callee will IRET and, > - * without the explicit restore, thinks the stack is getting walloped. > - * Using an unwind hint is problematic due to x86-64's dynamic alignment. > - */ > - mov %_ASM_BP, %_ASM_SP > - pop %_ASM_BP > + VMX_DO_EVENT_FRAME_END > RET > .endm > #endif > > .macro VMX_DO_EVENT_IRQOFF call_insn call_target > - /* > - * Unconditionally create a stack frame, getting the correct RSP on the > - * stack (for x86-64) would take two instructions anyways, and RBP can > - * be used to restore RSP to make objtool happy (see below). > - */ > - push %_ASM_BP > - mov %_ASM_SP, %_ASM_BP > + VMX_DO_EVENT_FRAME_BEGIN > > #ifdef CONFIG_X86_64 > /* > @@ -129,14 +124,7 @@ > push $__KERNEL_CS > \call_insn \call_target > > - /* > - * "Restore" RSP from RBP, even though IRET has already unwound RSP to > - * the correct value. objtool doesn't know the callee will IRET and, > - * without the explicit restore, thinks the stack is getting walloped. > - * Using an unwind hint is problematic due to x86-64's dynamic alignment. > - */ > - mov %_ASM_BP, %_ASM_SP > - pop %_ASM_BP > + VMX_DO_EVENT_FRAME_END > RET > .endm > > @@ -375,11 +363,13 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, > SYM_L_GLOBAL) > > SYM_FUNC_END(__vmx_vcpu_run) > > -#ifdef CONFIG_X86_FRED > SYM_FUNC_START(vmx_do_fred_nmi_irqoff) > +#ifdef CONFIG_X86_FRED > VMX_DO_FRED_EVENT_IRQOFF jmp fred_entrypoint_kernel nmi=1 > +#else > + ud2 > +#endif > SYM_FUNC_END(vmx_do_fred_nmi_irqoff) > -#endif > > SYM_FUNC_START(vmx_do_nmi_irqoff) > VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx > @@ -438,11 +428,13 @@ SYM_FUNC_END(vmread_error_trampoline) > #endif > > .section .text, "ax" > -#ifdef CONFIG_X86_FRED > SYM_FUNC_START(vmx_do_fred_interrupt_irqoff) > - VMX_DO_FRED_EVENT_IRQOFF call external_interrupt > +#ifdef CONFIG_X86_FRED > + VMX_DO_FRED_EVENT_IRQOFF call fred_irq_entry_from_kvm > +#else > + ud2 > +#endif > SYM_FUNC_END(vmx_do_fred_interrupt_irqoff) > -#endif > > SYM_FUNC_START(vmx_do_interrupt_irqoff) > VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index bf757f5071e4..cb4675dd87df 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -6919,13 +6919,8 @@ static void vmx_apicv_post_state_restore(struct > kvm_vcpu *vcpu) > memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); > } > > -#ifdef CONFIG_X86_FRED > void vmx_do_fred_interrupt_irqoff(unsigned int vector); > void vmx_do_fred_nmi_irqoff(unsigned int vector); > -#else > -#define vmx_do_fred_interrupt_irqoff(x) BUG() > -#define vmx_do_fred_nmi_irqoff(x) BUG() > -#endif > > void vmx_do_interrupt_irqoff(unsigned long entry); > void vmx_do_nmi_irqoff(void); > @@ -6976,7 +6971,7 @@ static void handle_external_interrupt_irqoff(struct > kvm_vcpu *vcpu) > > kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); > if (cpu_feature_enabled(X86_FEATURE_FRED)) > - vmx_do_fred_interrupt_irqoff(vector); /* Event type is 0 */ > + vmx_do_fred_interrupt_irqoff((EVENT_TYPE_HWINT << 16) | > vector); > else > vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base > + vector)); > kvm_after_interrupt(vcpu); > > base-commit: 8961078ffe509a97ec7803b17912e57c47b93fa2 > --