Re: [PATCH] KVM: VMX: Move vmx_vcpu_run()'s VM-Enter asm blob to a helper function

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 16/01/19 02:10, Sean Christopherson wrote:
> ...along with the function's STACK_FRAME_NON_STANDARD tag.  Moving the
> asm blob results in a significantly smaller amount of code that is
> marked with STACK_FRAME_NON_STANDARD, which makes it far less likely
> that gcc will split the function and trigger a spurious objtool warning.
> As a bonus, removing STACK_FRAME_NON_STANDARD from vmx_vcpu_run() allows
> the bulk of code to be properly checked by objtool.
> 
> Because %rbp is not loaded via VMCS fields, vmx_vcpu_run() must manually
> save/restore the host's RBP and load the guest's RBP prior to calling
> vmx_vmenter().  Modifying %rbp triggers objtool's stack validation code,
> and so vmx_vcpu_run() is tagged with STACK_FRAME_NON_STANDARD since it's
> impossible to avoid modifying %rbp.
> 
> Unfortunately, vmx_vcpu_run() is also a gigantic function that gcc will
> split into separate functions, e.g. so that pieces of the function can
> be inlined.  Splitting the function means that the compiled Elf file
> will contain one or more vmx_vcpu_run.part.* functions in addition to
> a vmx_vcpu_run function.  Depending on where the function is split,
> objtool may warn about a "call without frame pointer save/setup" in
> vmx_vcpu_run.part.* since objtool's stack validation looks for exact
> names when whitelisting functions tagged with STACK_FRAME_NON_STANDARD.
> 
> Up until recently, the undesirable function splitting was effectively
> blocked because vmx_vcpu_run() was tagged with __noclone.  At the time,
> __noclone had an unintended side effect that put vmx_vcpu_run() into a
> separate optimization unit, which in turn prevented gcc from inlining
> the function (or any of its own function calls) and thus eliminated gcc's
> motivation to split the function.  Removing the __noclone attribute
> allowed gcc to optimize vmx_vcpu_run(), exposing the objtool warning.
> 
> Kudos to Qian Cai for root causing that the fnsplit optimization is what
> caused objtool to complain.
> 
> Fixes: 453eafbe65f7 ("KVM: VMX: Move VM-Enter + VM-Exit handling to non-inline sub-routines")
> Cc: Qian Cai <cai@xxxxxx>
> Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
> Reported-by: kbuild test robot <lkp@xxxxxxxxx>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> ---
>  arch/x86/kvm/vmx/vmx.c | 139 ++++++++++++++++++++++-------------------
>  1 file changed, 73 insertions(+), 66 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 4d39f731bc33..80262c7a4495 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6362,72 +6362,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
>  	vmx->loaded_vmcs->hv_timer_armed = false;
>  }
>  
> -static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> +static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
>  {
> -	struct vcpu_vmx *vmx = to_vmx(vcpu);
> -	unsigned long cr3, cr4, evmcs_rsp;
> -
> -	/* Record the guest's net vcpu time for enforced NMI injections. */
> -	if (unlikely(!enable_vnmi &&
> -		     vmx->loaded_vmcs->soft_vnmi_blocked))
> -		vmx->loaded_vmcs->entry_time = ktime_get();
> -
> -	/* Don't enter VMX if guest state is invalid, let the exit handler
> -	   start emulation until we arrive back to a valid state */
> -	if (vmx->emulation_required)
> -		return;
> -
> -	if (vmx->ple_window_dirty) {
> -		vmx->ple_window_dirty = false;
> -		vmcs_write32(PLE_WINDOW, vmx->ple_window);
> -	}
> -
> -	if (vmx->nested.need_vmcs12_sync)
> -		nested_sync_from_vmcs12(vcpu);
> -
> -	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
> -		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
> -	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
> -		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
> -
> -	cr3 = __get_current_cr3_fast();
> -	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
> -		vmcs_writel(HOST_CR3, cr3);
> -		vmx->loaded_vmcs->host_state.cr3 = cr3;
> -	}
> -
> -	cr4 = cr4_read_shadow();
> -	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
> -		vmcs_writel(HOST_CR4, cr4);
> -		vmx->loaded_vmcs->host_state.cr4 = cr4;
> -	}
> -
> -	/* When single-stepping over STI and MOV SS, we must clear the
> -	 * corresponding interruptibility bits in the guest state. Otherwise
> -	 * vmentry fails as it then expects bit 14 (BS) in pending debug
> -	 * exceptions being set, but that's not correct for the guest debugging
> -	 * case. */
> -	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> -		vmx_set_interrupt_shadow(vcpu, 0);
> -
> -	if (static_cpu_has(X86_FEATURE_PKU) &&
> -	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
> -	    vcpu->arch.pkru != vmx->host_pkru)
> -		__write_pkru(vcpu->arch.pkru);
> -
> -	pt_guest_enter(vmx);
> -
> -	atomic_switch_perf_msrs(vmx);
> -
> -	vmx_update_hv_timer(vcpu);
> -
> -	/*
> -	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> -	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
> -	 * is no need to worry about the conditional branch over the wrmsr
> -	 * being speculatively taken.
> -	 */
> -	x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
> +	unsigned long evmcs_rsp;
>  
>  	vmx->__launched = vmx->loaded_vmcs->launched;
>  
> @@ -6567,6 +6504,77 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  		, "eax", "ebx", "edi"
>  #endif
>  	      );
> +}
> +STACK_FRAME_NON_STANDARD(__vmx_vcpu_run);
> +
> +static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	unsigned long cr3, cr4;
> +
> +	/* Record the guest's net vcpu time for enforced NMI injections. */
> +	if (unlikely(!enable_vnmi &&
> +		     vmx->loaded_vmcs->soft_vnmi_blocked))
> +		vmx->loaded_vmcs->entry_time = ktime_get();
> +
> +	/* Don't enter VMX if guest state is invalid, let the exit handler
> +	   start emulation until we arrive back to a valid state */
> +	if (vmx->emulation_required)
> +		return;
> +
> +	if (vmx->ple_window_dirty) {
> +		vmx->ple_window_dirty = false;
> +		vmcs_write32(PLE_WINDOW, vmx->ple_window);
> +	}
> +
> +	if (vmx->nested.need_vmcs12_sync)
> +		nested_sync_from_vmcs12(vcpu);
> +
> +	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
> +		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
> +	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
> +		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
> +
> +	cr3 = __get_current_cr3_fast();
> +	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
> +		vmcs_writel(HOST_CR3, cr3);
> +		vmx->loaded_vmcs->host_state.cr3 = cr3;
> +	}
> +
> +	cr4 = cr4_read_shadow();
> +	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
> +		vmcs_writel(HOST_CR4, cr4);
> +		vmx->loaded_vmcs->host_state.cr4 = cr4;
> +	}
> +
> +	/* When single-stepping over STI and MOV SS, we must clear the
> +	 * corresponding interruptibility bits in the guest state. Otherwise
> +	 * vmentry fails as it then expects bit 14 (BS) in pending debug
> +	 * exceptions being set, but that's not correct for the guest debugging
> +	 * case. */
> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> +		vmx_set_interrupt_shadow(vcpu, 0);
> +
> +	if (static_cpu_has(X86_FEATURE_PKU) &&
> +	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
> +	    vcpu->arch.pkru != vmx->host_pkru)
> +		__write_pkru(vcpu->arch.pkru);
> +
> +	pt_guest_enter(vmx);
> +
> +	atomic_switch_perf_msrs(vmx);
> +
> +	vmx_update_hv_timer(vcpu);
> +
> +	/*
> +	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> +	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
> +	 * is no need to worry about the conditional branch over the wrmsr
> +	 * being speculatively taken.
> +	 */
> +	x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
> +
> +	__vmx_vcpu_run(vcpu, vmx);
>  
>  	/*
>  	 * We do not use IBRS in the kernel. If this vCPU has used the
> @@ -6648,7 +6656,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	vmx_recover_nmi_blocking(vmx);
>  	vmx_complete_interrupts(vmx);
>  }
> -STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
>  
>  static struct kvm *vmx_vm_alloc(void)
>  {
> 

Queued, thanks.

Paolo



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux