Restoration of the host IA32_SPEC_CTRL value is probably too late with respect to the return thunk training sequence. With respect to the user/kernel boundary, AMD says, "If software chooses to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel exit), software should set STIBP to 1 before executing the return thunk training sequence." I assume the same requirements apply to the guest/host boundary. The return thunk training sequence is in vmenter.S, quite close to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's IA32_SPEC_CTRL value is not restored until much later. To avoid this, move the restoration of host SPEC_CTRL to assembly and, for consistency, move the restoration of the guest SPEC_CTRL as well. This is not particularly difficult, apart from some care to cover both 32- and 64-bit, and to share code between SEV-ES and normal vmentry. Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/x86/kernel/asm-offsets.c | 1 + arch/x86/kernel/cpu/bugs.c | 13 ++--- arch/x86/kvm/svm/svm.c | 34 +++++-------- arch/x86/kvm/svm/svm.h | 4 +- arch/x86/kvm/svm/vmenter.S | 93 +++++++++++++++++++++++++++++++++-- 5 files changed, 109 insertions(+), 36 deletions(-) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 7f1dd1138117..9dad8849c1ef 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -113,6 +113,7 @@ static void __used common(void) if (IS_ENABLED(CONFIG_KVM_AMD)) { BLANK(); OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); } if (IS_ENABLED(CONFIG_KVM_INTEL)) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index da7c361f47e0..6ec0b7ce7453 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -196,22 +196,15 @@ void __init check_bugs(void) } /* - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is - * done in vmenter.S. + * NOTE: This function is *only* called for SVM, since Intel uses + * MSR_IA32_SPEC_CTRL for SSBD. */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + u64 guestval, hostval; struct thread_info *ti = current_thread_info(); - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - if (hostval != guestval) { - msrval = setguest ? guestval : hostval; - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); - } - } - /* * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 64f5f0544b4f..a79cdeebc181 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3918,10 +3918,21 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned long vmcb_pa = svm->current_vmcb->pa; + /* + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); + guest_state_enter_irqoff(); if (sev_es_guest(vcpu->kvm)) { - __svm_sev_es_vcpu_run(vmcb_pa); + __svm_sev_es_vcpu_run(vmcb_pa, svm, spec_ctrl_intercepted); } else { struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); @@ -3932,7 +3943,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) * vmcb02 when switching vmcbs for nested virtualization. */ vmload(svm->vmcb01.pa); - __svm_vcpu_run(vmcb_pa, svm); + __svm_vcpu_run(vmcb_pa, svm, spec_ctrl_intercepted); vmsave(svm->vmcb01.pa); vmload(__sme_page_pa(sd->save_area)); @@ -4004,25 +4015,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) svm_vcpu_enter_exit(vcpu); - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (!sev_es_guest(vcpu->kvm)) reload_tss(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5f8dfc9cd9a7..f61c05116ea5 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -483,7 +483,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ -void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); -void __svm_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm); +void __svm_sev_es_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm, bool spec_ctrl_intercepted); #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 51a63a47d74f..89402e450071 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -29,10 +29,65 @@ .section .noinstr.text, "ax" +.macro RESTORE_GUEST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "jmp 999f", \ + "", X86_FEATURE_MSR_SPEC_CTRL, \ + "jmp 999f", X86_FEATURE_V_SPEC_CTRL + + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + movl SVM_spec_ctrl(%_ASM_DI), %eax + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax + je 999f + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr +999: + +.endm + +.macro RESTORE_HOST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "jmp 999f", \ + "", X86_FEATURE_MSR_SPEC_CTRL, \ + "jmp 999f", X86_FEATURE_V_SPEC_CTRL + + mov $MSR_IA32_SPEC_CTRL, %ecx + + /* + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, + * if it was not intercepted during guest execution. + */ + cmpb $0, (%_ASM_SP) + jnz 998f + rdmsr + movl %eax, SVM_spec_ctrl(%_ASM_DI) +998: + + /* Now restore the host value of the MSR if different from the guest's. */ + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax + cmp SVM_spec_ctrl(%_ASM_DI), %eax + je 999f + xor %edx, %edx + wrmsr +999: + +.endm + + + /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode * @vmcb_pa: unsigned long * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP @@ -47,6 +102,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif push %_ASM_BX + /* Save @spec_ctrl_intercepted. */ + push %_ASM_ARG3 + /* Save @svm. */ push %_ASM_ARG2 @@ -56,6 +114,7 @@ SYM_FUNC_START(__svm_vcpu_run) /* Move @svm to RDI. */ mov %_ASM_ARG2, %_ASM_DI + RESTORE_GUEST_SPEC_CTRL /* "POP" @vmcb to RAX. */ pop %_ASM_AX @@ -90,7 +149,7 @@ SYM_FUNC_START(__svm_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif - /* "POP" @svm to RAX. */ + /* "POP" @svm to RAX while it's the only available register. */ pop %_ASM_AX /* Save all guest registers. */ @@ -111,6 +170,9 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + mov %_ASM_AX, %_ASM_DI + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -146,6 +208,9 @@ SYM_FUNC_START(__svm_vcpu_run) xor %r15d, %r15d #endif + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 @@ -171,6 +236,8 @@ SYM_FUNC_END(__svm_vcpu_run) /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode * @vmcb_pa: unsigned long + * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) push %_ASM_BP @@ -185,8 +252,22 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) #endif push %_ASM_BX - /* Move @vmcb to RAX. */ - mov %_ASM_ARG1, %_ASM_AX + /* Save @spec_ctrl_intercepted. */ + push %_ASM_ARG3 + + /* Save @svm. */ + push %_ASM_ARG2 + + /* Save @vmcb. */ + push %_ASM_ARG1 + + /* Move @svm to RDI for RESTORE_GUEST_SPEC_CTRL. */ + mov %_ASM_ARG2, %_ASM_DI + + RESTORE_GUEST_SPEC_CTRL + + /* Pop @vmcb to RAX. */ + pop %_ASM_AX /* Enter guest mode */ sti @@ -200,6 +281,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + pop %_ASM_DI + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -209,6 +293,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 -- 2.31.1