On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote: > Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting > to enable CET for nested VM. > > Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx> > --- > arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- > arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ > arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- > arch/x86/kvm/vmx/vmx.c | 2 ++ > 4 files changed, 46 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 78a3be394d00..2c4ff13fddb0 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, > nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > MSR_IA32_FLUSH_CMD, MSR_TYPE_W); > > + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_U_CET, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_S_CET, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL0_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL1_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL2_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL3_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); > + > kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); > > vmx->nested.force_msr_bitmap_recalc = false; > @@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, > VM_EXIT_HOST_ADDR_SPACE_SIZE | > #endif > VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > - VM_EXIT_CLEAR_BNDCFGS; > + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; > msrs->exit_ctls_high |= > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | > @@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, > #ifdef CONFIG_X86_64 > VM_ENTRY_IA32E_MODE | > #endif > - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; > + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | > + VM_ENTRY_LOAD_CET_STATE; > msrs->entry_ctls_high |= > (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | > VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); > diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c > index 106a72c923ca..4233b5ca9461 100644 > --- a/arch/x86/kvm/vmx/vmcs12.c > +++ b/arch/x86/kvm/vmx/vmcs12.c > @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { > FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), > FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), > FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), > + FIELD(GUEST_S_CET, guest_s_cet), > + FIELD(GUEST_SSP, guest_ssp), > + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), > FIELD(HOST_CR0, host_cr0), > FIELD(HOST_CR3, host_cr3), > FIELD(HOST_CR4, host_cr4), > @@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = { > FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip), > FIELD(HOST_RSP, host_rsp), > FIELD(HOST_RIP, host_rip), > + FIELD(HOST_S_CET, host_s_cet), > + FIELD(HOST_SSP, host_ssp), > + FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl), > }; > const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); > diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h > index 01936013428b..3884489e7f7e 100644 > --- a/arch/x86/kvm/vmx/vmcs12.h > +++ b/arch/x86/kvm/vmx/vmcs12.h > @@ -117,7 +117,13 @@ struct __packed vmcs12 { > natural_width host_ia32_sysenter_eip; > natural_width host_rsp; > natural_width host_rip; > - natural_width paddingl[8]; /* room for future expansion */ > + natural_width host_s_cet; > + natural_width host_ssp; > + natural_width host_ssp_tbl; > + natural_width guest_s_cet; > + natural_width guest_ssp; > + natural_width guest_ssp_tbl; > + natural_width paddingl[2]; /* room for future expansion */ > u32 pin_based_vm_exec_control; > u32 cpu_based_vm_exec_control; > u32 exception_bitmap; > @@ -292,6 +298,12 @@ static inline void vmx_check_vmcs12_offsets(void) > CHECK_OFFSET(host_ia32_sysenter_eip, 656); > CHECK_OFFSET(host_rsp, 664); > CHECK_OFFSET(host_rip, 672); > + CHECK_OFFSET(host_s_cet, 680); > + CHECK_OFFSET(host_ssp, 688); > + CHECK_OFFSET(host_ssp_tbl, 696); > + CHECK_OFFSET(guest_s_cet, 704); > + CHECK_OFFSET(guest_ssp, 712); > + CHECK_OFFSET(guest_ssp_tbl, 720); > CHECK_OFFSET(pin_based_vm_exec_control, 744); > CHECK_OFFSET(cpu_based_vm_exec_control, 748); > CHECK_OFFSET(exception_bitmap, 752); > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index f0dea8ecd0c6..2c43f1088d77 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -7731,6 +7731,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) > cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); > cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); > cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); > + cr4_fixed1_update(X86_CR4_CET, ecx, feature_bit(SHSTK)); > + cr4_fixed1_update(X86_CR4_CET, edx, feature_bit(IBT)); > > #undef cr4_fixed1_update > } It is surprising how little needs to be done to support the nested mode, but it does look correct. I might have missed something though, can't be 100% sure in this case. Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Best regards, Maxim Levitsky