Newer AMD processors have a feature to virtualize the use of the SPEC_CTRL MSR. Presence of this feature is indicated via CPUID function 0x8000000A_EDX[20]: GuestSpecCtrl. Hypervisors are not required to enable this feature since it is automatically enabled on processors that support it. A hypervisor may wish to impose speculation controls on guest execution or a guest may want to impose its own speculation controls. Therefore, the processor implements both host and guest versions of SPEC_CTRL. When in host mode, the host SPEC_CTRL value is in effect and writes update only the host version of SPEC_CTRL. On a VMRUN, the processor loads the guest version of SPEC_CTRL from the VMCB. When the guest writes SPEC_CTRL, only the guest version is updated. On a VMEXIT, the guest version is saved into the VMCB and the processor returns to only using the host SPEC_CTRL for speculation control. The guest SPEC_CTRL is located at offset 0x2E0 in the VMCB. The effective SPEC_CTRL setting is the guest SPEC_CTRL setting or'ed with the hypervisor SPEC_CTRL setting. This allows the hypervisor to ensure a minimum SPEC_CTRL if desired. This support also fixes an issue where a guest may sometimes see an inconsistent value for the SPEC_CTRL MSR on processors that support this feature. With the current SPEC_CTRL support, the first write to SPEC_CTRL is intercepted and the virtualized version of the SPEC_CTRL MSR is not updated. When the guest reads back the SPEC_CTRL MSR, it will be 0x0, instead of the actual expected value. There isn’t a security concern here, because the host SPEC_CTRL value is or’ed with the Guest SPEC_CTRL value to generate the effective SPEC_CTRL value. KVM writes with the guest's virtualized SPEC_CTRL value to SPEC_CTRL MSR just before the VMRUN, so it will always have the actual value even though it doesn’t appear that way in the guest. The guest will only see the proper value for the SPEC_CTRL register if the guest was to write to the SPEC_CTRL register again. With Virtual SPEC_CTRL support, the save area spec_ctrl is properly saved and restored. So, the guest will always see the proper value when it is read back. Signed-off-by: Babu Moger <babu.moger@xxxxxxx> --- arch/x86/include/asm/svm.h | 4 +++- arch/x86/kvm/svm/nested.c | 2 ++ arch/x86/kvm/svm/svm.c | 27 ++++++++++++++++++++++----- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 1c561945b426..772e60efe243 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -269,7 +269,9 @@ struct vmcb_save_area { * SEV-ES guests when referenced through the GHCB or for * saving to the host save area. */ - u8 reserved_7[80]; + u8 reserved_7[72]; + u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ + u8 reserved_7b[4]; u32 pkru; u8 reserved_7a[20]; u64 reserved_8; /* rax already available at 0x01f8 */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 7a605ad8254d..9e51f9e4f631 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -534,6 +534,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm) hsave->save.cr3 = vmcb->save.cr3; else hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); + hsave->save.spec_ctrl = vmcb->save.spec_ctrl; copy_vmcb_control_area(&hsave->control, &vmcb->control); @@ -675,6 +676,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) kvm_rip_write(&svm->vcpu, hsave->save.rip); svm->vmcb->save.dr7 = DR7_FIXED_1; svm->vmcb->save.cpl = 0; + svm->vmcb->save.spec_ctrl = hsave->save.spec_ctrl; svm->vmcb->control.exit_int_info = 0; vmcb_mark_all_dirty(svm->vmcb); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f923e14e87df..756129caa611 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1244,6 +1244,14 @@ static void init_vmcb(struct vcpu_svm *svm) svm_check_invpcid(svm); + /* + * If the host supports V_SPEC_CTRL then disable the interception + * of MSR_IA32_SPEC_CTRL. + */ + if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL)) + set_msr_interception(&svm->vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, + 1, 1); + if (kvm_vcpu_apicv_active(&svm->vcpu)) avic_init_vmcb(svm); @@ -2678,7 +2686,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_has_spec_ctrl_msr(vcpu)) return 1; - msr_info->data = svm->spec_ctrl; + if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL)) + msr_info->data = svm->vmcb->save.spec_ctrl; + else + msr_info->data = svm->spec_ctrl; break; case MSR_AMD64_VIRT_SPEC_CTRL: if (!msr_info->host_initiated && @@ -2779,7 +2790,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (kvm_spec_ctrl_test_value(data)) return 1; - svm->spec_ctrl = data; + if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL)) + svm->vmcb->save.spec_ctrl = data; + else + svm->spec_ctrl = data; if (!data) break; @@ -3791,7 +3805,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); svm_vcpu_enter_exit(vcpu, svm); @@ -3810,13 +3825,15 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && + unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (!sev_es_guest(svm->vcpu.kvm)) reload_tss(vcpu); - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); if (!sev_es_guest(svm->vcpu.kvm)) { vcpu->arch.cr2 = svm->vmcb->save.cr2;