Jim Mattson <jmattson@xxxxxxxxxx> writes: > On Wed, Jun 29, 2022 at 8:07 AM Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> wrote: >> >> Using raw host MSR values for setting up nested VMX control MSRs is >> incorrect as some features need to disabled, e.g. when KVM runs as >> a nested hypervisor on Hyper-V and uses Enlightened VMCS or when a >> workaround for IA32_PERF_GLOBAL_CTRL is applied. For non-nested VMX, this >> is done in setup_vmcs_config() and the result is stored in vmcs_config. >> Use it for setting up allowed-1 bits in nested VMX MSRs too. >> >> Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> >> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> >> --- >> arch/x86/kvm/vmx/nested.c | 34 ++++++++++++++++------------------ >> arch/x86/kvm/vmx/nested.h | 2 +- >> arch/x86/kvm/vmx/vmx.c | 5 ++--- >> 3 files changed, 19 insertions(+), 22 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >> index 88625965f7b7..e5b19b5e6cab 100644 >> --- a/arch/x86/kvm/vmx/nested.c >> +++ b/arch/x86/kvm/vmx/nested.c >> @@ -6565,8 +6565,13 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void) >> * bit in the high half is on if the corresponding bit in the control field >> * may be on. See also vmx_control_verify(). >> */ >> -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) >> +void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) >> { >> + struct nested_vmx_msrs *msrs = &vmcs_conf->nested; >> + >> + /* Take the allowed-1 bits from KVM's sanitized VMCS configuration. */ >> + u32 ignore_high; >> + > > Giving this object a name seems gauche. > >> /* >> * Note that as a general rule, the high half of the MSRs (bits in >> * the control fields which may be 1) should be initialized by the >> @@ -6583,11 +6588,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) >> */ >> >> /* pin-based controls */ >> - rdmsr(MSR_IA32_VMX_PINBASED_CTLS, >> - msrs->pinbased_ctls_low, >> - msrs->pinbased_ctls_high); >> + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, msrs->pinbased_ctls_low, ignore_high); > > Perhaps "(u32){0}" rather than "ignore_high"? > While this certainly looks like a cool trick (thanks!), both rdmsr() and 'ignore_high' are gone later in the series. I will, however, adopt the change, even if just to show off) >> msrs->pinbased_ctls_low |= >> PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; > > NYC, but why is this one '|=', and the rest just '='? Does there exist > a CPU that requires more than PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR? > Looking at the commit which introduced this, commit eabeaaccfca0ed61b8e00a09b8cfa703c4f11b59 Author: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> Date: Wed Mar 13 11:30:50 2013 +0100 KVM: nVMX: Clean up and fix pin-based execution controls I don't think '|=' is needed. It is, of course, possible that when KVM is running nested, required-1 bits are mangled by the underlying hypervisor but this is a) unlikely b) will only be observed by KVM's L1 (which means we're talking about 3-level nesting here). Let's be brave and 'fix' '|=' here. >> + >> + msrs->pinbased_ctls_high = vmcs_conf->pin_based_exec_ctrl; >> msrs->pinbased_ctls_high &= >> PIN_BASED_EXT_INTR_MASK | >> PIN_BASED_NMI_EXITING | >> @@ -6598,12 +6603,10 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) >> PIN_BASED_VMX_PREEMPTION_TIMER; >> >> /* exit controls */ >> - rdmsr(MSR_IA32_VMX_EXIT_CTLS, >> - msrs->exit_ctls_low, >> - msrs->exit_ctls_high); >> msrs->exit_ctls_low = >> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; >> >> + msrs->exit_ctls_high = vmcs_conf->vmexit_ctrl; >> msrs->exit_ctls_high &= >> #ifdef CONFIG_X86_64 >> VM_EXIT_HOST_ADDR_SPACE_SIZE | >> @@ -6619,11 +6622,10 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) >> msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; >> >> /* entry controls */ >> - rdmsr(MSR_IA32_VMX_ENTRY_CTLS, >> - msrs->entry_ctls_low, >> - msrs->entry_ctls_high); >> msrs->entry_ctls_low = >> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; >> + >> + msrs->entry_ctls_high = vmcs_conf->vmentry_ctrl; >> msrs->entry_ctls_high &= >> #ifdef CONFIG_X86_64 >> VM_ENTRY_IA32E_MODE | >> @@ -6637,11 +6639,10 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) >> msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; >> >> /* cpu-based controls */ >> - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, >> - msrs->procbased_ctls_low, >> - msrs->procbased_ctls_high); >> msrs->procbased_ctls_low = >> CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; >> + >> + msrs->procbased_ctls_high = vmcs_conf->cpu_based_exec_ctrl; >> msrs->procbased_ctls_high &= >> CPU_BASED_INTR_WINDOW_EXITING | >> CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING | >> @@ -6675,12 +6676,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) >> * depend on CPUID bits, they are added later by >> * vmx_vcpu_after_set_cpuid. >> */ >> - if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) >> - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, >> - msrs->secondary_ctls_low, >> - msrs->secondary_ctls_high); >> - >> msrs->secondary_ctls_low = 0; >> + >> + msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl; >> msrs->secondary_ctls_high &= >> SECONDARY_EXEC_DESC | >> SECONDARY_EXEC_ENABLE_RDTSCP | >> diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h >> index c92cea0b8ccc..fae047c6204b 100644 >> --- a/arch/x86/kvm/vmx/nested.h >> +++ b/arch/x86/kvm/vmx/nested.h >> @@ -17,7 +17,7 @@ enum nvmx_vmentry_status { >> }; >> >> void vmx_leave_nested(struct kvm_vcpu *vcpu); >> -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); >> +void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps); >> void nested_vmx_hardware_unsetup(void); >> __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); >> void nested_vmx_set_vmcs_shadowing_bitmap(void); >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >> index 5f7ef1f8d2c6..5d4158b7421c 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -7310,7 +7310,7 @@ static int __init vmx_check_processor_compat(void) >> if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) >> return -EIO; >> if (nested) >> - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept); >> + nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept); >> if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { >> printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", >> smp_processor_id()); >> @@ -8285,8 +8285,7 @@ static __init int hardware_setup(void) >> setup_default_sgx_lepubkeyhash(); >> >> if (nested) { >> - nested_vmx_setup_ctls_msrs(&vmcs_config.nested, >> - vmx_capability.ept); >> + nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept); >> >> r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); >> if (r) >> -- >> 2.35.3 >> > -- Vitaly