On 16/01/19 02:10, Sean Christopherson wrote: > ...along with the function's STACK_FRAME_NON_STANDARD tag. Moving the > asm blob results in a significantly smaller amount of code that is > marked with STACK_FRAME_NON_STANDARD, which makes it far less likely > that gcc will split the function and trigger a spurious objtool warning. > As a bonus, removing STACK_FRAME_NON_STANDARD from vmx_vcpu_run() allows > the bulk of code to be properly checked by objtool. > > Because %rbp is not loaded via VMCS fields, vmx_vcpu_run() must manually > save/restore the host's RBP and load the guest's RBP prior to calling > vmx_vmenter(). Modifying %rbp triggers objtool's stack validation code, > and so vmx_vcpu_run() is tagged with STACK_FRAME_NON_STANDARD since it's > impossible to avoid modifying %rbp. > > Unfortunately, vmx_vcpu_run() is also a gigantic function that gcc will > split into separate functions, e.g. so that pieces of the function can > be inlined. Splitting the function means that the compiled Elf file > will contain one or more vmx_vcpu_run.part.* functions in addition to > a vmx_vcpu_run function. Depending on where the function is split, > objtool may warn about a "call without frame pointer save/setup" in > vmx_vcpu_run.part.* since objtool's stack validation looks for exact > names when whitelisting functions tagged with STACK_FRAME_NON_STANDARD. > > Up until recently, the undesirable function splitting was effectively > blocked because vmx_vcpu_run() was tagged with __noclone. At the time, > __noclone had an unintended side effect that put vmx_vcpu_run() into a > separate optimization unit, which in turn prevented gcc from inlining > the function (or any of its own function calls) and thus eliminated gcc's > motivation to split the function. Removing the __noclone attribute > allowed gcc to optimize vmx_vcpu_run(), exposing the objtool warning. > > Kudos to Qian Cai for root causing that the fnsplit optimization is what > caused objtool to complain. > > Fixes: 453eafbe65f7 ("KVM: VMX: Move VM-Enter + VM-Exit handling to non-inline sub-routines") > Cc: Qian Cai <cai@xxxxxx> > Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> > Reported-by: kbuild test robot <lkp@xxxxxxxxx> > Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> > --- > arch/x86/kvm/vmx/vmx.c | 139 ++++++++++++++++++++++------------------- > 1 file changed, 73 insertions(+), 66 deletions(-) > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 4d39f731bc33..80262c7a4495 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -6362,72 +6362,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) > vmx->loaded_vmcs->hv_timer_armed = false; > } > > -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) > +static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) > { > - struct vcpu_vmx *vmx = to_vmx(vcpu); > - unsigned long cr3, cr4, evmcs_rsp; > - > - /* Record the guest's net vcpu time for enforced NMI injections. */ > - if (unlikely(!enable_vnmi && > - vmx->loaded_vmcs->soft_vnmi_blocked)) > - vmx->loaded_vmcs->entry_time = ktime_get(); > - > - /* Don't enter VMX if guest state is invalid, let the exit handler > - start emulation until we arrive back to a valid state */ > - if (vmx->emulation_required) > - return; > - > - if (vmx->ple_window_dirty) { > - vmx->ple_window_dirty = false; > - vmcs_write32(PLE_WINDOW, vmx->ple_window); > - } > - > - if (vmx->nested.need_vmcs12_sync) > - nested_sync_from_vmcs12(vcpu); > - > - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) > - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); > - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) > - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); > - > - cr3 = __get_current_cr3_fast(); > - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { > - vmcs_writel(HOST_CR3, cr3); > - vmx->loaded_vmcs->host_state.cr3 = cr3; > - } > - > - cr4 = cr4_read_shadow(); > - if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { > - vmcs_writel(HOST_CR4, cr4); > - vmx->loaded_vmcs->host_state.cr4 = cr4; > - } > - > - /* When single-stepping over STI and MOV SS, we must clear the > - * corresponding interruptibility bits in the guest state. Otherwise > - * vmentry fails as it then expects bit 14 (BS) in pending debug > - * exceptions being set, but that's not correct for the guest debugging > - * case. */ > - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) > - vmx_set_interrupt_shadow(vcpu, 0); > - > - if (static_cpu_has(X86_FEATURE_PKU) && > - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && > - vcpu->arch.pkru != vmx->host_pkru) > - __write_pkru(vcpu->arch.pkru); > - > - pt_guest_enter(vmx); > - > - atomic_switch_perf_msrs(vmx); > - > - vmx_update_hv_timer(vcpu); > - > - /* > - * If this vCPU has touched SPEC_CTRL, restore the guest's value if > - * it's non-zero. Since vmentry is serialising on affected CPUs, there > - * is no need to worry about the conditional branch over the wrmsr > - * being speculatively taken. > - */ > - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); > + unsigned long evmcs_rsp; > > vmx->__launched = vmx->loaded_vmcs->launched; > > @@ -6567,6 +6504,77 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) > , "eax", "ebx", "edi" > #endif > ); > +} > +STACK_FRAME_NON_STANDARD(__vmx_vcpu_run); > + > +static void vmx_vcpu_run(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + unsigned long cr3, cr4; > + > + /* Record the guest's net vcpu time for enforced NMI injections. */ > + if (unlikely(!enable_vnmi && > + vmx->loaded_vmcs->soft_vnmi_blocked)) > + vmx->loaded_vmcs->entry_time = ktime_get(); > + > + /* Don't enter VMX if guest state is invalid, let the exit handler > + start emulation until we arrive back to a valid state */ > + if (vmx->emulation_required) > + return; > + > + if (vmx->ple_window_dirty) { > + vmx->ple_window_dirty = false; > + vmcs_write32(PLE_WINDOW, vmx->ple_window); > + } > + > + if (vmx->nested.need_vmcs12_sync) > + nested_sync_from_vmcs12(vcpu); > + > + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) > + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); > + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) > + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); > + > + cr3 = __get_current_cr3_fast(); > + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { > + vmcs_writel(HOST_CR3, cr3); > + vmx->loaded_vmcs->host_state.cr3 = cr3; > + } > + > + cr4 = cr4_read_shadow(); > + if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { > + vmcs_writel(HOST_CR4, cr4); > + vmx->loaded_vmcs->host_state.cr4 = cr4; > + } > + > + /* When single-stepping over STI and MOV SS, we must clear the > + * corresponding interruptibility bits in the guest state. Otherwise > + * vmentry fails as it then expects bit 14 (BS) in pending debug > + * exceptions being set, but that's not correct for the guest debugging > + * case. */ > + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) > + vmx_set_interrupt_shadow(vcpu, 0); > + > + if (static_cpu_has(X86_FEATURE_PKU) && > + kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && > + vcpu->arch.pkru != vmx->host_pkru) > + __write_pkru(vcpu->arch.pkru); > + > + pt_guest_enter(vmx); > + > + atomic_switch_perf_msrs(vmx); > + > + vmx_update_hv_timer(vcpu); > + > + /* > + * If this vCPU has touched SPEC_CTRL, restore the guest's value if > + * it's non-zero. Since vmentry is serialising on affected CPUs, there > + * is no need to worry about the conditional branch over the wrmsr > + * being speculatively taken. > + */ > + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); > + > + __vmx_vcpu_run(vcpu, vmx); > > /* > * We do not use IBRS in the kernel. If this vCPU has used the > @@ -6648,7 +6656,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) > vmx_recover_nmi_blocking(vmx); > vmx_complete_interrupts(vmx); > } > -STACK_FRAME_NON_STANDARD(vmx_vcpu_run); > > static struct kvm *vmx_vm_alloc(void) > { > Queued, thanks. Paolo