On Mon, Jun 05, 2023 at 11:31:34AM -0700, Sean Christopherson wrote: > Is there an actual bug here, or are we just micro-optimizing something that may or > may not need additional optimization? Unless there's a bug to be fixed, moving > code into ASM and increasing complexity doesn't seem worthwhile. I can't really argue with that. FWIW, here's the (completely untested) patch. ---8<--- From: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Subject: [PATCH] KVM: VMX: Convert vmx_spec_ctrl_restore_host() to assembly Convert vmx_spec_ctrl_restore_host() to assembly. This allows the removal of a redundant LFENCE. It also "feels" safer as it doesn't allow the compiler to insert any surprises. And it's more symmetrical with the pre-vmentry SPEC_CTRL handling, which is also done in assembly. Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> --- arch/x86/kvm/vmx/vmenter.S | 71 ++++++++++++++++++++++++++++++++------ arch/x86/kvm/vmx/vmx.c | 25 -------------- arch/x86/kvm/vmx/vmx.h | 1 - 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 631fd7da2bc3..977f3487469c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -108,7 +108,7 @@ SYM_FUNC_START(__vmx_vcpu_run) lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp - ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL + ALTERNATIVE "jmp .Lguest_spec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL /* * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the @@ -122,13 +122,13 @@ SYM_FUNC_START(__vmx_vcpu_run) movl VMX_spec_ctrl(%_ASM_DI), %edi movl PER_CPU_VAR(x86_spec_ctrl_current), %esi cmp %edi, %esi - je .Lspec_ctrl_done + je .Lguest_spec_ctrl_done mov $MSR_IA32_SPEC_CTRL, %ecx xor %edx, %edx mov %edi, %eax wrmsr -.Lspec_ctrl_done: +.Lguest_spec_ctrl_done: /* * Since vmentry is serializing on affected CPUs, there's no need for @@ -253,9 +253,65 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) #endif /* - * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before - * the first unbalanced RET after vmexit! + * IMPORTANT: The below SPEC_CTRL handling and RSB filling must be done + * before the first RET after vmexit! + */ + + ALTERNATIVE "jmp .Lhost_spec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL + + pop %_ASM_SI /* @flags */ + pop %_ASM_DI /* @vmx */ + + /* + * if (flags & VMX_RUN_SAVE_SPEC_CTRL) + * vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); + */ + test $VMX_RUN_SAVE_SPEC_CTRL, %_ASM_SI + jz .Lhost_spec_ctrl_check + + mov $MSR_IA32_SPEC_CTRL, %ecx + rdmsr + mov %eax, VMX_spec_ctrl(%_ASM_DI) + +.Lhost_spec_ctrl_check: + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. * + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. + * + * if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || + * vmx->spec_ctrl != this_cpu_read(x86_spec_ctrl_current)) + * native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + */ + ALTERNATIVE "", "jmp .Lhost_spec_ctrl_write", X86_FEATURE_KERNEL_IBRS + movl VMX_spec_ctrl(%_ASM_DI), %edi + movl PER_CPU_VAR(x86_spec_ctrl_current), %esi + cmp %edi, %esi + je .Lhost_spec_ctrl_done_lfence + +.Lhost_spec_ctrl_write: + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + mov %esi, %eax + wrmsr + +.Lhost_spec_ctrl_done_lfence: + /* + * This ensures that speculative execution doesn't incorrectly bypass + * the above SPEC_CTRL wrmsr by mispredicting the 'je'. + * + * It's only needed if the below FILL_RETURN_BUFFER doesn't do an + * LFENCE. Thus the X86_FEATURE_RSB_VMEXIT{_LITE} alternatives. + */ + ALTERNATIVE_2 "lfence", \ + "", X86_FEATURE_RSB_VMEXIT, \ + "", X86_FEATURE_RSB_VMEXIT_LITE + +.Lhost_spec_ctrl_done: + + /* * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB * entries and (in some cases) RSB underflow. * @@ -267,11 +323,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ X86_FEATURE_RSB_VMEXIT_LITE - pop %_ASM_ARG2 /* @flags */ - pop %_ASM_ARG1 /* @vmx */ - - call vmx_spec_ctrl_restore_host - /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 44fb619803b8..d353b0e23918 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7109,31 +7109,6 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } -void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, - unsigned int flags) -{ - u64 hostval = this_cpu_read(x86_spec_ctrl_current); - - if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) - return; - - if (flags & VMX_RUN_SAVE_SPEC_CTRL) - vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); - - /* - * If the guest/host SPEC_CTRL values differ, restore the host value. - * - * For legacy IBRS, the IBRS bit always needs to be written after - * transitioning from a less privileged predictor mode, regardless of - * whether the guest/host values differ. - */ - if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || - vmx->spec_ctrl != hostval) - native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); - - barrier_nospec(); -} - static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 9e66531861cf..f9fab33f4d76 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -420,7 +420,6 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, unsigned int flags); -- 2.40.1