On Mon, Nov 7, 2022 at 6:54 AM Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > > Restoration of the host IA32_SPEC_CTRL value is probably too late > with respect to the return thunk training sequence. > > With respect to the user/kernel boundary, AMD says, "If software chooses > to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel > exit), software should set STIBP to 1 before executing the return thunk > training sequence." I assume the same requirements apply to the guest/host > boundary. The return thunk training sequence is in vmenter.S, quite close > to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's > IA32_SPEC_CTRL value is not restored until much later. > > To avoid this, move the restoration of host SPEC_CTRL to assembly and, > for consistency, move the restoration of the guest SPEC_CTRL as well. > This is not particularly difficult, apart from some care to cover both > 32- and 64-bit, and to share code between SEV-ES and normal vmentry. > > Cc: stable@xxxxxxxxxxxxxxx > Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") > Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kernel/asm-offsets.c | 1 + > arch/x86/kernel/cpu/bugs.c | 13 ++--- > arch/x86/kvm/svm/svm.c | 38 ++++++--------- > arch/x86/kvm/svm/svm.h | 4 +- > arch/x86/kvm/svm/vmenter.S | 92 ++++++++++++++++++++++++++++++++++- > 5 files changed, 111 insertions(+), 37 deletions(-) > > diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c > index 69d1fed51086..d0bd68af0a5a 100644 > --- a/arch/x86/kernel/asm-offsets.c > +++ b/arch/x86/kernel/asm-offsets.c > @@ -115,6 +115,7 @@ static void __used common(void) > OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); > OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); > OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); > + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); > OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); > } > > diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c > index da7c361f47e0..6ec0b7ce7453 100644 > --- a/arch/x86/kernel/cpu/bugs.c > +++ b/arch/x86/kernel/cpu/bugs.c > @@ -196,22 +196,15 @@ void __init check_bugs(void) > } > > /* > - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is > - * done in vmenter.S. > + * NOTE: This function is *only* called for SVM, since Intel uses > + * MSR_IA32_SPEC_CTRL for SSBD. > */ > void > x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) > { > - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); > + u64 guestval, hostval; > struct thread_info *ti = current_thread_info(); > > - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { > - if (hostval != guestval) { > - msrval = setguest ? guestval : hostval; > - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); > - } > - } > - > /* > * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update > * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 381c7dcffe25..31aa158a2e10 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -731,6 +731,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) > u32 offset; > u32 *msrpm; > > + /* > + * For non-nested case: > + * If the L01 MSR bitmap does not intercept the MSR, then we need to > + * save it. > + * > + * For nested case: > + * If the L02 MSR bitmap does not intercept the MSR, then we need to > + * save it. > + */ > msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: > to_svm(vcpu)->msrpm; > > @@ -3912,18 +3921,19 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) > return EXIT_FASTPATH_NONE; > } > > -static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) > +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) > { > struct vcpu_svm *svm = to_svm(vcpu); > > guest_state_enter_irqoff(); > > if (sev_es_guest(vcpu->kvm)) { > - __svm_sev_es_vcpu_run(svm); > + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); > } else { > struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); > > - __svm_vcpu_run(svm, __sme_page_pa(sd->save_area)); > + __svm_vcpu_run(svm, __sme_page_pa(sd->save_area), > + spec_ctrl_intercepted); > } > > guest_state_exit_irqoff(); > @@ -3932,6 +3942,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) > static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); > > trace_kvm_entry(vcpu); > > @@ -3990,26 +4001,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) > if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) > x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); > > - svm_vcpu_enter_exit(vcpu); > - > - /* > - * We do not use IBRS in the kernel. If this vCPU has used the > - * SPEC_CTRL MSR it may have left it on; save the value and > - * turn it off. This is much more efficient than blindly adding > - * it to the atomic save/restore list. Especially as the former > - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. > - * > - * For non-nested case: > - * If the L01 MSR bitmap does not intercept the MSR, then we need to > - * save it. > - * > - * For nested case: > - * If the L02 MSR bitmap does not intercept the MSR, then we need to > - * save it. > - */ > - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && > - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) > - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); > + svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); > > if (!sev_es_guest(vcpu->kvm)) > reload_tss(vcpu); > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 99410651f2a5..9d940d8736f0 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -483,7 +483,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); > > /* vmenter.S */ > > -void __svm_sev_es_vcpu_run(struct vcpu_svm *svm); > -void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa); > +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); > +void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa, bool spec_ctrl_intercepted); > > #endif > diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S > index 45a4bd002494..9e381386ffdc 100644 > --- a/arch/x86/kvm/svm/vmenter.S > +++ b/arch/x86/kvm/svm/vmenter.S > @@ -32,10 +32,64 @@ > > .section .noinstr.text, "ax" > > +.macro RESTORE_GUEST_SPEC_CTRL > + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ > + ALTERNATIVE_2 "jmp 999f", \ > + "", X86_FEATURE_MSR_SPEC_CTRL, \ > + "jmp 999f", X86_FEATURE_V_SPEC_CTRL > + > + /* > + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the > + * host's, write the MSR. > + * > + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, > + * there must not be any returns or indirect branches between this code > + * and vmentry. > + */ > + movl SVM_spec_ctrl(%_ASM_DI), %eax > + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax > + je 999f > + mov $MSR_IA32_SPEC_CTRL, %ecx > + xor %edx, %edx > + wrmsr > +999: > + > +.endm > + > +.macro RESTORE_HOST_SPEC_CTRL > + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ > + ALTERNATIVE_2 "jmp 999f", \ > + "", X86_FEATURE_MSR_SPEC_CTRL, \ > + "jmp 999f", X86_FEATURE_V_SPEC_CTRL > + > + mov $MSR_IA32_SPEC_CTRL, %ecx > + > + /* > + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, > + * if it was not intercepted during guest execution. > + */ > + cmpb $0, (%_ASM_SP) > + jnz 998f > + rdmsr > + movl %eax, SVM_spec_ctrl(%_ASM_DI) > +998: > + > + /* Now restore the host value of the MSR if different from the guest's. */ > + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax > + cmp SVM_spec_ctrl(%_ASM_DI), %eax > + je 999f > + xor %edx, %edx > + wrmsr > +999: > + > +.endm > + > + It seems unfortunate to have the unconditional branches in the more common cases.