On Tue, Sep 17, 2019 at 4:05 PM Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx> wrote: > > > > On 09/17/2019 11:50 AM, Marc Orr wrote: > > Allowing an unlimited number of MSRs to be specified via the VMX > > load/store MSR lists (e.g., vm-entry MSR load list) is bad for two > > reasons. First, a guest can specify an unreasonable number of MSRs, > > forcing KVM to process all of them in software. Second, the SDM bounds > > the number of MSRs allowed to be packed into the atomic switch MSR lists. > > Quoting the "Miscellaneous Data" section in the "VMX Capability > > Reporting Facility" appendix: > > > > "Bits 27:25 is used to compute the recommended maximum number of MSRs > > that should appear in the VM-exit MSR-store list, the VM-exit MSR-load > > list, or the VM-entry MSR-load list. Specifically, if the value bits > > 27:25 of IA32_VMX_MISC is N, then 512 * (N + 1) is the recommended > > maximum number of MSRs to be included in each list. If the limit is > > exceeded, undefined processor behavior may result (including a machine > > check during the VMX transition)." > > > > Because KVM needs to protect itself and can't model "undefined processor > > behavior", arbitrarily force a VM-entry to fail due to MSR loading when > > the MSR load list is too large. Similarly, trigger an abort during a VM > > exit that encounters an MSR load list or MSR store list that is too large. > > > > The MSR list size is intentionally not pre-checked so as to maintain > > compatibility with hardware inasmuch as possible. > > > > Test these new checks with the kvm-unit-test "x86: nvmx: test max atomic > > switch MSRs". > > > > Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> > > Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx> > > Reviewed-by: Peter Shier <pshier@xxxxxxxxxx> > > Signed-off-by: Marc Orr <marcorr@xxxxxxxxxx> > > --- > > v2 -> v3 > > * Updated commit message. > > * Removed superflous function declaration. > > * Expanded in-line comment. > > > > arch/x86/include/asm/vmx.h | 1 + > > arch/x86/kvm/vmx/nested.c | 44 ++++++++++++++++++++++++++++---------- > > 2 files changed, 34 insertions(+), 11 deletions(-) > > > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > > index a39136b0d509..a1f6ed187ccd 100644 > > --- a/arch/x86/include/asm/vmx.h > > +++ b/arch/x86/include/asm/vmx.h > > @@ -110,6 +110,7 @@ > > #define VMX_MISC_SAVE_EFER_LMA 0x00000020 > > #define VMX_MISC_ACTIVITY_HLT 0x00000040 > > #define VMX_MISC_ZERO_LEN_INS 0x40000000 > > +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 > > > > /* VMFUNC functions */ > > #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > > index ced9fba32598..0e29882bb45f 100644 > > --- a/arch/x86/kvm/vmx/nested.c > > +++ b/arch/x86/kvm/vmx/nested.c > > @@ -190,6 +190,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) > > pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator); > > } > > > > +static inline bool vmx_control_verify(u32 control, u32 low, u32 high) > > +{ > > + return fixed_bits_valid(control, low, high); > > +} > > + > > +static inline u64 vmx_control_msr(u32 low, u32 high) > > +{ > > + return low | ((u64)high << 32); > > +} > > + > > static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) > > { > > secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); > > @@ -856,18 +866,36 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, > > return 0; > > } > > > > +static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu) > > +{ > > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > + u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, > > + vmx->nested.msrs.misc_high); > > + > > + return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER; > > +} > > + > > /* > > * Load guest's/host's msr at nested entry/exit. > > * return 0 for success, entry index for failure. > > + * > > + * One of the failure modes for MSR load/store is when a list exceeds the > > + * virtual hardware's capacity. To maintain compatibility with hardware inasmuch > > + * as possible, process all valid entries before failing rather than precheck > > + * for a capacity violation. > > */ > > static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) > > { > > u32 i; > > struct vmx_msr_entry e; > > struct msr_data msr; > > + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); > > > > msr.host_initiated = false; > > for (i = 0; i < count; i++) { > > + if (unlikely(i >= max_msr_list_size)) > > + goto fail; > > + > > if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), > > &e, sizeof(e))) { > > pr_debug_ratelimited( > > @@ -899,9 +927,14 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) > > { > > u32 i; > > struct vmx_msr_entry e; > > + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); > > > > for (i = 0; i < count; i++) { > > struct msr_data msr_info; > > + > > + if (unlikely(i >= max_msr_list_size)) > > + return -EINVAL; > > + > > if (kvm_vcpu_read_guest(vcpu, > > gpa + i * sizeof(e), > > &e, 2 * sizeof(u32))) { > > @@ -1009,17 +1042,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu) > > return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; > > } > > > > - > > -static inline bool vmx_control_verify(u32 control, u32 low, u32 high) > > -{ > > - return fixed_bits_valid(control, low, high); > > -} > > - > > -static inline u64 vmx_control_msr(u32 low, u32 high) > > -{ > > - return low | ((u64)high << 32); > > -} > > - > > static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) > > { > > superset &= mask; > Reviewed-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx> +Paolo Bonzini +Radim Krčmář Ping. Thanks.