On 10/17/2010 12:06 PM, Nadav Har'El wrote:
When the guest can use VMX instructions (when the "nested" module option is on), it should also be able to read and write VMX MSRs, e.g., to query about VMX capabilities. This patch adds this support. Signed-off-by: Nadav Har'El<nyh@xxxxxxxxxx> --- arch/x86/kvm/vmx.c | 117 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 6 +- 2 files changed, 122 insertions(+), 1 deletion(-) --- .before/arch/x86/kvm/x86.c 2010-10-17 11:52:00.000000000 +0200 +++ .after/arch/x86/kvm/x86.c 2010-10-17 11:52:00.000000000 +0200 @@ -789,7 +789,11 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, + MSR_IA32_FEATURE_CONTROL, MSR_IA32_VMX_BASIC, + MSR_IA32_VMX_PINBASED_CTLS, MSR_IA32_VMX_PROCBASED_CTLS, + MSR_IA32_VMX_EXIT_CTLS, MSR_IA32_VMX_ENTRY_CTLS, + MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_EPT_VPID_CAP, };
These MSRs are read-only by the guest (except FEATURE_CONTROL). No need to save/restore them.
static unsigned num_msrs_to_save; --- .before/arch/x86/kvm/vmx.c 2010-10-17 11:52:00.000000000 +0200 +++ .after/arch/x86/kvm/vmx.c 2010-10-17 11:52:00.000000000 +0200 @@ -1216,6 +1216,119 @@ static void vmx_adjust_tsc_offset(struct } /* + * If we allow our guest to use VMX instructions (i.e., nested VMX), we should + * also let it use VMX-specific MSRs. + * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 0 when we handled a + * VMX-specific MSR, or 1 when we haven't (and the caller should handled it + * like all other MSRs). + */ +static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +{ + u64 vmx_msr = 0; + u32 vmx_msr_high, vmx_msr_low; + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + /* + * This MSR reports some information about VMX support of the + * processor. We should return information about the VMX we + * emulate for the guest, and the VMCS structure we give it - + * not about the VMX support of the underlying hardware. + * However, some capabilities of the underlying hardware are + * used directly by our emulation (e.g., the physical address + * width), so these are copied from what the hardware reports. + */ + *pdata = VMCS12_REVISION | (((u64)sizeof(struct vmcs12))<< 32);
Let's reserve 4K unconditionally to avoid future complications.
+ rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); +#define VMX_BASIC_64 0x0001000000000000LLU +#define VMX_BASIC_MEM_TYPE 0x003c000000000000LLU +#define VMX_BASIC_INOUT 0x0040000000000000LLU
Please move the defines to vmx.h (or msr-index.h).
+ *pdata |= vmx_msr& + (VMX_BASIC_64 | VMX_BASIC_MEM_TYPE | VMX_BASIC_INOUT);
I don't see why we need the real data here. Nothing prevents us from supporting 64-bit physical addresses on 32-bit hosts (so long as we use gpa_t for addresses; ditto for MEM_TYPE and INOUT.
It's helpful to have fixed values here to remove obstacles to live migration.
+ break; +#define CORE2_PINBASED_CTLS_MUST_BE_ONE 0x00000016
Please use the bit names instead.
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x48d
msr-index.h
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_PINBASED_CTLS: + vmx_msr_low = CORE2_PINBASED_CTLS_MUST_BE_ONE; + vmx_msr_high = CORE2_PINBASED_CTLS_MUST_BE_ONE | + PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | + PIN_BASED_VIRTUAL_NMIS; + *pdata = vmx_msr_low | ((u64)vmx_msr_high<< 32); + break; + case MSR_IA32_VMX_PROCBASED_CTLS: + /* This MSR determines which vm-execution controls the L1 + * hypervisor may ask, or may not ask, to enable. Normally we + * can only allow enabling features which the hardware can + * support, but we limit ourselves to allowing only known + * features that were tested nested. We allow disabling any + * feature (even if the hardware can't disable it). + */ + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + + vmx_msr_low = 0; /* allow disabling any feature */
What if the host doesn't allow disabling a feature? I think we can't modify vmx_msr_low.
+ vmx_msr_high&= /* do not expose new untested features */ + CPU_BASED_HLT_EXITING | CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | + CPU_BASED_INVLPG_EXITING | CPU_BASED_TPR_SHADOW | + CPU_BASED_USE_MSR_BITMAPS | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + *pdata = vmx_msr_low | ((u64)vmx_msr_high<< 32); + break; + case MSR_IA32_VMX_EXIT_CTLS: + *pdata = 0; +#ifdef CONFIG_X86_64 + *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + break; + case MSR_IA32_VMX_ENTRY_CTLS: + *pdata = 0; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + *pdata = 0; + if (vm_need_virtualize_apic_accesses(vcpu->kvm)) + *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + break; + case MSR_IA32_VMX_EPT_VPID_CAP: + *pdata = 0; + break; + default: + return 1; + } + + return 0; +} + +static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +{ + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_VMX_BASIC: + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS2: + case MSR_IA32_VMX_EPT_VPID_CAP: + pr_unimpl(vcpu, "unimplemented VMX MSR write: 0x%x data %llx\n", + msr_index, data); + return 0;
These are illegal to write anyway and should #GP (except FEATURE_CONTROL). We will however need a way for userspace to write these MSRs to allow fine tuning the exposed features (as we do with cpuid).
+ default: + return 1; + } +}
-- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html