On 23/11/2016 02:14, David Matlack wrote: > The VMX capability MSRs advertise the set of features the KVM virtual > CPU can support. This set of features vary across different host CPUs > and KVM versions. This patch aims to addresses both sources of > differences, allowing VMs to be migrated across CPUs and KVM versions > without guest-visible changes to these MSRs. Note that cross-KVM- > version migration is only supported from this point forward. > > When the VMX capability MSRs are restored, they are audited to check > that the set of features advertised are a subset of what KVM and the > CPU support. > > Since the VMX capability MSRs are read-only, they do not need to be on > the default MSR save/restore lists. The userspace hypervisor can set > the values of these MSRs or read them from KVM at VCPU creation time, > and restore the same value after every save/restore. > > Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> > --- > arch/x86/include/asm/vmx.h | 31 +++++ > arch/x86/kvm/vmx.c | 317 +++++++++++++++++++++++++++++++++++++++++---- > 2 files changed, 324 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index a002b07..a4ca897 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -25,6 +25,7 @@ > #define VMX_H > > > +#include <linux/bitops.h> > #include <linux/types.h> > #include <uapi/asm/vmx.h> > > @@ -110,6 +111,36 @@ > #define VMX_MISC_SAVE_EFER_LMA 0x00000020 > #define VMX_MISC_ACTIVITY_HLT 0x00000040 > > +static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) > +{ > + return vmx_basic & GENMASK_ULL(30, 0); > +} > + > +static inline u32 vmx_basic_vmcs_size(u64 vmx_basic) > +{ > + return (vmx_basic & GENMASK_ULL(44, 32)) >> 32; > +} > + > +static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) > +{ > + return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; > +} > + > +static inline int vmx_misc_cr3_count(u64 vmx_misc) > +{ > + return (vmx_misc & GENMASK_ULL(24, 16)) >> 16; > +} > + > +static inline int vmx_misc_max_msr(u64 vmx_misc) > +{ > + return (vmx_misc & GENMASK_ULL(27, 25)) >> 25; > +} > + > +static inline int vmx_misc_mseg_revid(u64 vmx_misc) > +{ > + return (vmx_misc & GENMASK_ULL(63, 32)) >> 32; > +} > + > /* VMCS Encodings */ > enum vmcs_field { > VIRTUAL_PROCESSOR_ID = 0x00000000, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 5382b82..6ec3832 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -463,6 +463,12 @@ struct nested_vmx { > u32 nested_vmx_misc_high; > u32 nested_vmx_ept_caps; > +/* > + * Called when userspace is restoring VMX MSRs. > + * > + * Returns 0 on success, non-0 otherwise. > + */ > +static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > > switch (msr_index) { > case MSR_IA32_VMX_BASIC: > + return vmx_restore_vmx_basic(vmx, data); > + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: > + case MSR_IA32_VMX_PINBASED_CTLS: > + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: > + case MSR_IA32_VMX_PROCBASED_CTLS: > + case MSR_IA32_VMX_TRUE_EXIT_CTLS: > + case MSR_IA32_VMX_EXIT_CTLS: > + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: > + case MSR_IA32_VMX_ENTRY_CTLS: PINBASED_CTLS, PROCBASED_CTLS, EXIT_CTLS and ENTRY_CTLS can be derived from their "true" counterparts, so I think it's better to remove the "non-true" ones from struct nested_vmx (and/or add the "true" ones when missing) and make them entirely computed. But it can be done on top. Paolo > + case MSR_IA32_VMX_PROCBASED_CTLS2: > + return vmx_restore_control_msr(vmx, msr_index, data); > + case MSR_IA32_VMX_MISC: > + return vmx_restore_vmx_misc(vmx, data); > + case MSR_IA32_VMX_CR0_FIXED0: > + case MSR_IA32_VMX_CR4_FIXED0: > + return vmx_restore_fixed0_msr(vmx, msr_index, data); > + case MSR_IA32_VMX_CR0_FIXED1: > + case MSR_IA32_VMX_CR4_FIXED1: > + return vmx_restore_fixed1_msr(vmx, msr_index, data); > + case MSR_IA32_VMX_EPT_VPID_CAP: > + return vmx_restore_vmx_ept_vpid_cap(vmx, data); > + case MSR_IA32_VMX_VMCS_ENUM: > + vmx->nested.nested_vmx_vmcs_enum = data; > + return 0; > + default: > /* > - * This MSR reports some information about VMX support. We > - * should return information about the VMX we emulate for the > - * guest, and the VMCS structure we give it - not about the > - * VMX support of the underlying hardware. > + * The rest of the VMX capability MSRs do not support restore. > */ > - *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | > - ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | > - (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); > - if (cpu_has_vmx_basic_inout()) > - *pdata |= VMX_BASIC_INOUT; > + return -EINVAL; > + } > + > + return 0; > +} > + > +/* Returns 0 on success, non-0 otherwise. */ > +static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + > + switch (msr_index) { > + case MSR_IA32_VMX_BASIC: > + *pdata = vmx->nested.nested_vmx_basic; > break; > case MSR_IA32_VMX_TRUE_PINBASED_CTLS: > case MSR_IA32_VMX_PINBASED_CTLS: > @@ -2904,27 +3176,20 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) > vmx->nested.nested_vmx_misc_low, > vmx->nested.nested_vmx_misc_high); > break; > - /* > - * These MSRs specify bits which the guest must keep fixed (on or off) > - * while L1 is in VMXON mode (in L1's root mode, or running an L2). > - * We picked the standard core2 setting. > - */ > -#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) > -#define VMXON_CR4_ALWAYSON X86_CR4_VMXE > case MSR_IA32_VMX_CR0_FIXED0: > - *pdata = VMXON_CR0_ALWAYSON; > + *pdata = vmx->nested.nested_vmx_cr0_fixed0; > break; > case MSR_IA32_VMX_CR0_FIXED1: > - *pdata = -1ULL; > + *pdata = vmx->nested.nested_vmx_cr0_fixed1; > break; > case MSR_IA32_VMX_CR4_FIXED0: > - *pdata = VMXON_CR4_ALWAYSON; > + *pdata = vmx->nested.nested_vmx_cr4_fixed0; > break; > case MSR_IA32_VMX_CR4_FIXED1: > - *pdata = -1ULL; > + *pdata = vmx->nested.nested_vmx_cr4_fixed1; > break; > case MSR_IA32_VMX_VMCS_ENUM: > - *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */ > + *pdata = vmx->nested.nested_vmx_vmcs_enum; > break; > case MSR_IA32_VMX_PROCBASED_CTLS2: > *pdata = vmx_control_msr( > @@ -3107,7 +3372,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > vmx_leave_nested(vcpu); > break; > case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: > - return 1; /* they are read-only */ > + if (!msr_info->host_initiated) > + return 1; /* they are read-only */ > + if (!nested_vmx_allowed(vcpu)) > + return 1; > + return vmx_set_vmx_msr(vcpu, msr_index, data); > case MSR_IA32_XSS: > if (!vmx_xsaves_supported()) > return 1; > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html