On Tue, Jan 23, 2024 at 06:41:59PM -0800, Yang Weijiang wrote: >Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting >to enable CET for nested VM. > >vmcs12 and vmcs02 needs to be synced when L2 exits to L1 or when L1 wants >to resume L2, that way correct CET states can be observed by one another. > >Suggested-by: Chao Gao <chao.gao@xxxxxxxxx> >Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx> >Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> >--- > arch/x86/kvm/vmx/nested.c | 57 +++++++++++++++++++++++++++++++++++++-- > arch/x86/kvm/vmx/vmcs12.c | 6 +++++ > arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++- > arch/x86/kvm/vmx/vmx.c | 2 ++ > 4 files changed, 76 insertions(+), 3 deletions(-) > >diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >index 468a7cf75035..e330897a7e5e 100644 >--- a/arch/x86/kvm/vmx/nested.c >+++ b/arch/x86/kvm/vmx/nested.c >@@ -691,6 +691,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, > nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > MSR_IA32_FLUSH_CMD, MSR_TYPE_W); > >+ /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_U_CET, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_S_CET, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL0_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL1_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL2_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL3_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); >+ > kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); > > vmx->nested.force_msr_bitmap_recalc = false; >@@ -2506,6 +2528,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) > if (kvm_mpx_supported() && vmx->nested.nested_run_pending && > (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) > vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); >+ >+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) { >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { >+ vmcs_writel(GUEST_SSP, vmcs12->guest_ssp); >+ vmcs_writel(GUEST_INTR_SSP_TABLE, >+ vmcs12->guest_ssp_tbl); >+ } >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || >+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) >+ vmcs_writel(GUEST_S_CET, vmcs12->guest_s_cet); >+ } I think you need to move this hunk outside the outmost if-statement, i.e., if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) { otherwise, the whole block may be skipped (e.g., when evmcs is enabled and GUEST_GRP1 is clean), leaving CET state not context-switched. And if VM_ENTRY_LOAD_CET_STATE of vmcs12 is cleared, L1's values should be propagated to vmcs02 on nested VMenter; see pre_vmenter_debugctl in struct nested_vmx. I believe we need similar handling for the three CET fields. > } > > if (nested_cpu_has_xsaves(vmcs12)) >@@ -4344,6 +4377,15 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, > vmcs12->guest_pending_dbg_exceptions = > vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { >+ vmcs12->guest_ssp = vmcs_readl(GUEST_SSP); >+ vmcs12->guest_ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE); >+ } >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || >+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) { >+ vmcs12->guest_s_cet = vmcs_readl(GUEST_S_CET); >+ } unnecessary braces.