When the guest can use VMX instructions (when the "nested" module option is on), it should also be able to read and write VMX MSRs, e.g., to query about VMX capabilities. This patch adds this support. Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> --- --- .before/arch/x86/kvm/x86.c 2010-06-13 15:01:28.000000000 +0300 +++ .after/arch/x86/kvm/x86.c 2010-06-13 15:01:28.000000000 +0300 @@ -702,7 +702,11 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA + MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, + MSR_IA32_FEATURE_CONTROL, MSR_IA32_VMX_BASIC, + MSR_IA32_VMX_PINBASED_CTLS, MSR_IA32_VMX_PROCBASED_CTLS, + MSR_IA32_VMX_EXIT_CTLS, MSR_IA32_VMX_ENTRY_CTLS, + MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_EPT_VPID_CAP, }; static unsigned num_msrs_to_save; --- .before/arch/x86/kvm/vmx.c 2010-06-13 15:01:28.000000000 +0300 +++ .after/arch/x86/kvm/vmx.c 2010-06-13 15:01:28.000000000 +0300 @@ -1231,6 +1231,98 @@ static void guest_write_tsc(u64 guest_ts } /* + * If we allow our guest to use VMX instructions, we should also let it use + * VMX-specific MSRs. + */ +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +{ + u64 vmx_msr = 0; + u32 vmx_msr_high, vmx_msr_low; + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + /* + * This MSR reports some information about VMX support of the + * processor. We should return information about the VMX we + * emulate for the guest, and the VMCS structure we give it - + * not about the VMX support of the underlying hardware. Some + * However, some capabilities of the underlying hardware are + * used directly by our emulation (e.g., the physical address + * width), so these are copied from what the hardware reports. + */ + *pdata = VMCS12_REVISION | + (((u64)sizeof(struct vmcs12)) << 32); + rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); +#define VMX_BASIC_64 0x0001000000000000LLU +#define VMX_BASIC_MEM_TYPE 0x003c000000000000LLU +#define VMX_BASIC_INOUT 0x0040000000000000LLU + *pdata |= vmx_msr & + (VMX_BASIC_64 | VMX_BASIC_MEM_TYPE | VMX_BASIC_INOUT); + break; +#define CORE2_PINBASED_CTLS_MUST_BE_ONE 0x00000016 +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x48d + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_PINBASED_CTLS: + vmx_msr_low = CORE2_PINBASED_CTLS_MUST_BE_ONE; + vmx_msr_high = CORE2_PINBASED_CTLS_MUST_BE_ONE | + PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | + PIN_BASED_VIRTUAL_NMIS; + *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32); + break; + case MSR_IA32_VMX_PROCBASED_CTLS: + /* This MSR determines which vm-execution controls the L1 + * hypervisor may ask, or may not ask, to enable. Normally we + * can only allow enabling features which the hardware can + * support, but we limit ourselves to allowing only known + * features that were tested nested. We allow disabling any + * feature (even if the hardware can't disable it). + */ + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + + vmx_msr_low = 0; /* allow disabling any feature */ + vmx_msr_high &= /* do not expose new untested features */ + CPU_BASED_HLT_EXITING | CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | + CPU_BASED_INVLPG_EXITING | CPU_BASED_TPR_SHADOW | + CPU_BASED_USE_MSR_BITMAPS | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32); + break; + case MSR_IA32_VMX_EXIT_CTLS: + *pdata = 0; +#ifdef CONFIG_X86_64 + *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + break; + case MSR_IA32_VMX_ENTRY_CTLS: + *pdata = 0; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + *pdata = 0; + if (vm_need_virtualize_apic_accesses(vcpu->kvm)) + *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + break; + case MSR_IA32_VMX_EPT_VPID_CAP: + *pdata = 0; + break; + default: + return 1; + } + + return 0; +} + +/* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. @@ -1278,6 +1370,8 @@ static int vmx_get_msr(struct kvm_vcpu * /* Otherwise falls through */ default: vmx_load_host_state(to_vmx(vcpu)); + if (nested && !nested_vmx_get_msr(vcpu, msr_index, &data)) + break; msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { vmx_load_host_state(to_vmx(vcpu)); @@ -1292,6 +1386,27 @@ static int vmx_get_msr(struct kvm_vcpu * } /* + * Writes msr value for nested virtualization + * Returns 0 on success, non-0 otherwise. + */ +static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +{ + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + if ((data & (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)) + != (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)) + return 1; + break; + default: + return 1; + } + + return 0; +} + +/* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. @@ -1349,6 +1464,9 @@ static int vmx_set_msr(struct kvm_vcpu * return 1; /* Otherwise falls through */ default: + if (nested && + !nested_vmx_set_msr(vcpu, msr_index, data)) + break; msr = find_msr_entry(vmx, msr_index); if (msr) { vmx_load_host_state(vmx); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html