RE: [PATCH RESEND v9 33/36] KVM: VMX: Add VMX_DO_FRED_EVENT_IRQOFF for IRQ/NMI handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> > +#include "../../entry/calling.h"
> 
> Rather than do the low level PUSH_REGS and POP_REGS, I vote to have core code
> expose a FRED-specific wrapper for invoking external_interrupt().  More below.

Nice idea!

> > +	/*
> > +	 * A FRED stack frame has extra 16 bytes of information pushed at the
> > +	 * regular stack top compared to an IDT stack frame.
> 
> There is pretty much no chance that anyone remembers the layout of an IDT stack
> frame off the top of their head.  I.e. saying "FRED has 16 bytes more" isn't all
> that useful.  It also fails to capture the fact that FRED stuff a hell of a lot
> more information in those "common" 48 bytes.
> 
> It'll be hard/impossible to capture all of the overload info in a comment, but
> showing the actual layout of the frame would be super helpful, e.g. something
> like
> this
> 
> 	/*
> 	 * FRED stack frames are always 64 bytes:
> 	 *
> 	 * ------------------------------
> 	 * | Bytes  | Usage             |
> 	 * -----------------------------|
> 	 * | 63:56  | Reserved          |
> 	 * | 55:48  | Event Data        |
>          * | 47:40  | SS + Event Info   |
>          * | 39:32  | RSP               |
> 	 * | 31:24  | RFLAGS            |
>          * | 23:16  | CS + Aux Info     |
>          * |  15:8  | RIP               |
>          * |   7:0  | Error Code        |
>          * ------------------------------
> 	 */
> 	 *
> 	 * Use LEA to get the return RIP and push it, then push an error code.
> 	 * Note, only NMI handling does an ERETS to the target!  IRQ handling
> 	 * doesn't need to unmask NMIs and so simply uses CALL+RET, i.e. the
> 	 * RIP pushed here is only truly consumed for NMIs!

I take these as ASM code does need more love, i.e., nice comments :/

Otherwise only more confusion.

 
> 
> Jumping way back to providing a wrapper for FRED, if we do that, then there's no
> need to include calling.h, and the weird wrinkle about the ERET target kinda goes
> away too.  E.g. provide this in arch/x86/entry/entry_64_fred.S
> 
> 	.section .text, "ax"
> 
> /* Note, this is instrumentation safe, and returns via RET, not ERETS! */
> #if IS_ENABLED(CONFIG_KVM_INTEL)
> SYM_CODE_START(fred_irq_entry_from_kvm)
> 	FRED_ENTER
> 	call external_interrupt
> 	FRED_EXIT
> 	RET
> SYM_CODE_END(fred_irq_entry_from_kvm)
> EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm);
> #endif
> 
> and then the KVM side for this particular chunk is more simply:
> 
> 	lea 1f(%rip), %rax
> 	push %rax
> 	push $0		/* FRED error code, 0 for NMI and external
> interrupts */
> 
> 	\branch_insn \branch_target
> 1:
> 	VMX_DO_EVENT_FRAME_END
> 	RET
> 
> 
> Alternatively, the whole thing could be shoved into
> arch/x86/entry/entry_64_fred.S,
> but at a glance I don't think that would be a net positive due to the need to
> handle
> IRQs vs. NMIs.
> 
> > +	\branch_insn \branch_target
> > +
> > +	.if \nmi == 0
> > +	POP_REGS
> > +	.endif
> > +
> > +1:
> > +	/*
> > +	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
> 
> As mentioned above, this is incorrect on two fronts.
> 
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 0ecf4be2c6af..4e90c69a92bf 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -6890,6 +6890,14 @@ static void vmx_apicv_post_state_restore(struct
> kvm_vcpu *vcpu)
> >  	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
> >  }
> >
> > +#ifdef CONFIG_X86_FRED
> > +void vmx_do_fred_interrupt_irqoff(unsigned int vector);
> > +void vmx_do_fred_nmi_irqoff(unsigned int vector);
> > +#else
> > +#define vmx_do_fred_interrupt_irqoff(x) BUG()
> > +#define vmx_do_fred_nmi_irqoff(x) BUG()
> > +#endif
> 
> My slight preference is to open code the BUG() as a ud2 in assembly, purely to
> avoid more #ifdefs.
> 
> > +
> >  void vmx_do_interrupt_irqoff(unsigned long entry);
> >  void vmx_do_nmi_irqoff(void);
> >
> > @@ -6932,14 +6940,16 @@ static void handle_external_interrupt_irqoff(struct
> kvm_vcpu *vcpu)
> >  {
> >  	u32 intr_info = vmx_get_intr_info(vcpu);
> >  	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
> > -	gate_desc *desc = (gate_desc *)host_idt_base + vector;
> >
> >  	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
> >  	    "unexpected VM-Exit interrupt info: 0x%x", intr_info))
> >  		return;
> >
> >  	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
> > -	vmx_do_interrupt_irqoff(gate_offset(desc));
> > +	if (cpu_feature_enabled(X86_FEATURE_FRED))
> > +		vmx_do_fred_interrupt_irqoff(vector);	/* Event type is 0 */
> 
> I strongly prefer to use code to document what's going on.  E.g. the tail comment
> just left me wondering, what event type is 0?  Whereas this makes it quite clear
> that KVM is signaling a hardware interrupt.  The fact that it's a nop as far as
> code generation goes is irrelevant.
> 
> 	vmx_do_fred_interrupt_irqoff((EVENT_TYPE_HWINT << 16) | vector);

Better readability.

> 
> > +	else
> > +		vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base
> + vector));
> >  	kvm_after_interrupt(vcpu);
> >
> >  	vcpu->arch.at_instruction_boundary = true;
> 
> Here's a diff for (hopefully) everything I've suggested above.

Thanks a lot!  I will test it and update the patch in this mail thread.

> 
> ---
>  arch/x86/entry/entry_64_fred.S | 17 ++++++-
>  arch/x86/kernel/traps.c        |  5 --
>  arch/x86/kvm/vmx/vmenter.S     | 84 +++++++++++++++-------------------
>  arch/x86/kvm/vmx/vmx.c         |  7 +--
>  4 files changed, 55 insertions(+), 58 deletions(-)
> 
> diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
> index 12063267d2ac..a973c0bd29f6 100644
> --- a/arch/x86/entry/entry_64_fred.S
> +++ b/arch/x86/entry/entry_64_fred.S
> @@ -10,7 +10,6 @@
>  #include "calling.h"
> 
>  	.code64
> -	.section ".noinstr.text", "ax"
> 
>  .macro FRED_ENTER
>  	UNWIND_HINT_END_OF_STACK
> @@ -24,6 +23,22 @@
>  	POP_REGS
>  .endm
> 
> +	.section .text, "ax"
> +
> +/* Note, this is instrumentation safe, and returns via RET, not ERETS! */
> +#if IS_ENABLED(CONFIG_KVM_INTEL)
> +SYM_CODE_START(fred_irq_entry_from_kvm)
> +	FRED_ENTER
> +	call external_interrupt
> +	FRED_EXIT
> +	RET
> +SYM_CODE_END(fred_irq_entry_from_kvm)
> +EXPORT_SYMBOL_GPL(fred_irq_entry_from_kvm);
> +#endif
> +
> +	.section ".noinstr.text", "ax"
> +
> +
>  /*
>   * The new RIP value that FRED event delivery establishes is
>   * IA32_FRED_CONFIG & ~FFFH for events that occur in ring 3.
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 21eeba7b188f..cbcb83c71dab 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -1566,11 +1566,6 @@ int external_interrupt(struct pt_regs *regs)
>  	return 0;
>  }
> 
> -#if IS_ENABLED(CONFIG_KVM_INTEL)
> -/* For KVM VMX to handle IRQs in IRQ induced VM exits. */
> -EXPORT_SYMBOL_GPL(external_interrupt);
> -#endif
> -
>  #endif /* CONFIG_X86_64 */
> 
>  void __init trap_init(void)
> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> index 79a4c91d9434..e25df565c3f8 100644
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
> @@ -9,7 +9,6 @@
>  #include <asm/segment.h>
>  #include "kvm-asm-offsets.h"
>  #include "run_flags.h"
> -#include "../../entry/calling.h"
> 
>  #define WORD_SIZE (BITS_PER_LONG / 8)
> 
> @@ -33,15 +32,31 @@
>  #define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
>  #endif
> 
> +.macro VMX_DO_EVENT_FRAME_BEGIN
> +	/*
> +	 * Unconditionally create a stack frame, getting the correct RSP on the
> +	 * stack (for x86-64) would take two instructions anyways, and RBP can
> +	 * be used to restore RSP to make objtool happy (see below).
> +	 */
> +	push %_ASM_BP
> +	mov %_ASM_SP, %_ASM_BP
> +.endm
> +
> +.macro VMX_DO_EVENT_FRAME_END
> +	/*
> +	 * "Restore" RSP from RBP, even though {E,I}RET has already unwound RSP
> +	 * to the correct value *in most cases*.  KVM's IRQ handling with FRED
> +	 * doesn't do ERETS, and objtool doesn't know the callee will IRET/ERET
> +	 * and, without the explicit restore, thinks the stack is getting walloped.
> +	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
> +	 */
> +	mov %_ASM_BP, %_ASM_SP
> +	pop %_ASM_BP
> +.endm
> +
>  #ifdef CONFIG_X86_FRED
>  .macro VMX_DO_FRED_EVENT_IRQOFF branch_insn branch_target nmi=0
> -	/*
> -	 * Unconditionally create a stack frame, getting the correct RSP on the
> -	 * stack (for x86-64) would take two instructions anyways, and RBP can
> -	 * be used to restore RSP to make objtool happy (see below).
> -	 */
> -	push %_ASM_BP
> -	mov %_ASM_SP, %_ASM_BP
> +	VMX_DO_EVENT_FRAME_BEGIN
> 
>  	/*
>  	 * Don't check the FRED stack level, the call stack leading to this
> @@ -78,43 +93,23 @@
>  	 * push an error code before invoking the IRQ/NMI handler.
>  	 *
>  	 * Use LEA to get the return RIP and push it, then push an error code.
> +	 * Note, only NMI handling does an ERETS to the target!  IRQ handling
> +	 * doesn't need to unmask NMIs and so simply uses CALL+RET, i.e. the
> +	 * RIP pushed here is only truly consumed for NMIs!
>  	 */
>  	lea 1f(%rip), %rax
>  	push %rax
>  	push $0		/* FRED error code, 0 for NMI and external
> interrupts */
> 
> -	.if \nmi == 0
> -	PUSH_REGS
> -	mov %rsp, %rdi
> -	.endif
> -
>  	\branch_insn \branch_target
> -
> -	.if \nmi == 0
> -	POP_REGS
> -	.endif
> -
>  1:
> -	/*
> -	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
> -	 * the correct value.  objtool doesn't know the callee will IRET and,
> -	 * without the explicit restore, thinks the stack is getting walloped.
> -	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
> -	 */
> -	mov %_ASM_BP, %_ASM_SP
> -	pop %_ASM_BP
> +	VMX_DO_EVENT_FRAME_END
>  	RET
>  .endm
>  #endif
> 
>  .macro VMX_DO_EVENT_IRQOFF call_insn call_target
> -	/*
> -	 * Unconditionally create a stack frame, getting the correct RSP on the
> -	 * stack (for x86-64) would take two instructions anyways, and RBP can
> -	 * be used to restore RSP to make objtool happy (see below).
> -	 */
> -	push %_ASM_BP
> -	mov %_ASM_SP, %_ASM_BP
> +	VMX_DO_EVENT_FRAME_BEGIN
> 
>  #ifdef CONFIG_X86_64
>  	/*
> @@ -129,14 +124,7 @@
>  	push $__KERNEL_CS
>  	\call_insn \call_target
> 
> -	/*
> -	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
> -	 * the correct value.  objtool doesn't know the callee will IRET and,
> -	 * without the explicit restore, thinks the stack is getting walloped.
> -	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
> -	 */
> -	mov %_ASM_BP, %_ASM_SP
> -	pop %_ASM_BP
> +	VMX_DO_EVENT_FRAME_END
>  	RET
>  .endm
> 
> @@ -375,11 +363,13 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit,
> SYM_L_GLOBAL)
> 
>  SYM_FUNC_END(__vmx_vcpu_run)
> 
> -#ifdef CONFIG_X86_FRED
>  SYM_FUNC_START(vmx_do_fred_nmi_irqoff)
> +#ifdef CONFIG_X86_FRED
>  	VMX_DO_FRED_EVENT_IRQOFF jmp fred_entrypoint_kernel nmi=1
> +#else
> +	ud2
> +#endif
>  SYM_FUNC_END(vmx_do_fred_nmi_irqoff)
> -#endif
> 
>  SYM_FUNC_START(vmx_do_nmi_irqoff)
>  	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
> @@ -438,11 +428,13 @@ SYM_FUNC_END(vmread_error_trampoline)
>  #endif
> 
>  .section .text, "ax"
> -#ifdef CONFIG_X86_FRED
>  SYM_FUNC_START(vmx_do_fred_interrupt_irqoff)
> -	VMX_DO_FRED_EVENT_IRQOFF call external_interrupt
> +#ifdef CONFIG_X86_FRED
> +	VMX_DO_FRED_EVENT_IRQOFF call fred_irq_entry_from_kvm
> +#else
> +	ud2
> +#endif
>  SYM_FUNC_END(vmx_do_fred_interrupt_irqoff)
> -#endif
> 
>  SYM_FUNC_START(vmx_do_interrupt_irqoff)
>  	VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index bf757f5071e4..cb4675dd87df 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6919,13 +6919,8 @@ static void vmx_apicv_post_state_restore(struct
> kvm_vcpu *vcpu)
>  	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
>  }
> 
> -#ifdef CONFIG_X86_FRED
>  void vmx_do_fred_interrupt_irqoff(unsigned int vector);
>  void vmx_do_fred_nmi_irqoff(unsigned int vector);
> -#else
> -#define vmx_do_fred_interrupt_irqoff(x) BUG()
> -#define vmx_do_fred_nmi_irqoff(x) BUG()
> -#endif
> 
>  void vmx_do_interrupt_irqoff(unsigned long entry);
>  void vmx_do_nmi_irqoff(void);
> @@ -6976,7 +6971,7 @@ static void handle_external_interrupt_irqoff(struct
> kvm_vcpu *vcpu)
> 
>  	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
>  	if (cpu_feature_enabled(X86_FEATURE_FRED))
> -		vmx_do_fred_interrupt_irqoff(vector);	/* Event type is 0 */
> +		vmx_do_fred_interrupt_irqoff((EVENT_TYPE_HWINT << 16) |
> vector);
>  	else
>  		vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base
> + vector));
>  	kvm_after_interrupt(vcpu);
> 
> base-commit: 8961078ffe509a97ec7803b17912e57c47b93fa2
> --





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux