Re: [PATCH v3] kvm: nvmx: limit atomic switch MSRs

Marc Orr <marcorr@xxxxxxxxxx> · Fri, 20 Sep 2019 16:06:09 -0700

On Tue, Sep 17, 2019 at 4:05 PM Krish Sadhukhan
<krish.sadhukhan@xxxxxxxxxx> wrote:
>
>
>
> On 09/17/2019 11:50 AM, Marc Orr wrote:
> > Allowing an unlimited number of MSRs to be specified via the VMX
> > load/store MSR lists (e.g., vm-entry MSR load list) is bad for two
> > reasons. First, a guest can specify an unreasonable number of MSRs,
> > forcing KVM to process all of them in software. Second, the SDM bounds
> > the number of MSRs allowed to be packed into the atomic switch MSR lists.
> > Quoting the "Miscellaneous Data" section in the "VMX Capability
> > Reporting Facility" appendix:
> >
> > "Bits 27:25 is used to compute the recommended maximum number of MSRs
> > that should appear in the VM-exit MSR-store list, the VM-exit MSR-load
> > list, or the VM-entry MSR-load list. Specifically, if the value bits
> > 27:25 of IA32_VMX_MISC is N, then 512 * (N + 1) is the recommended
> > maximum number of MSRs to be included in each list. If the limit is
> > exceeded, undefined processor behavior may result (including a machine
> > check during the VMX transition)."
> >
> > Because KVM needs to protect itself and can't model "undefined processor
> > behavior", arbitrarily force a VM-entry to fail due to MSR loading when
> > the MSR load list is too large. Similarly, trigger an abort during a VM
> > exit that encounters an MSR load list or MSR store list that is too large.
> >
> > The MSR list size is intentionally not pre-checked so as to maintain
> > compatibility with hardware inasmuch as possible.
> >
> > Test these new checks with the kvm-unit-test "x86: nvmx: test max atomic
> > switch MSRs".
> >
> > Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx>
> > Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx>
> > Reviewed-by: Peter Shier <pshier@xxxxxxxxxx>
> > Signed-off-by: Marc Orr <marcorr@xxxxxxxxxx>
> > ---
> > v2 -> v3
> > * Updated commit message.
> > * Removed superflous function declaration.
> > * Expanded in-line comment.
> >
> >   arch/x86/include/asm/vmx.h |  1 +
> >   arch/x86/kvm/vmx/nested.c  | 44 ++++++++++++++++++++++++++++----------
> >   2 files changed, 34 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> > index a39136b0d509..a1f6ed187ccd 100644
> > --- a/arch/x86/include/asm/vmx.h
> > +++ b/arch/x86/include/asm/vmx.h
> > @@ -110,6 +110,7 @@
> >   #define VMX_MISC_SAVE_EFER_LMA                      0x00000020
> >   #define VMX_MISC_ACTIVITY_HLT                       0x00000040
> >   #define VMX_MISC_ZERO_LEN_INS                       0x40000000
> > +#define VMX_MISC_MSR_LIST_MULTIPLIER         512
> >
> >   /* VMFUNC functions */
> >   #define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
> > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > index ced9fba32598..0e29882bb45f 100644
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -190,6 +190,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
> >       pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
> >   }
> >
> > +static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
> > +{
> > +     return fixed_bits_valid(control, low, high);
> > +}
> > +
> > +static inline u64 vmx_control_msr(u32 low, u32 high)
> > +{
> > +     return low | ((u64)high << 32);
> > +}
> > +
> >   static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
> >   {
> >       secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
> > @@ -856,18 +866,36 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
> >       return 0;
> >   }
> >
> > +static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
> > +{
> > +     struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +     u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
> > +                                    vmx->nested.msrs.misc_high);
> > +
> > +     return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
> > +}
> > +
> >   /*
> >    * Load guest's/host's msr at nested entry/exit.
> >    * return 0 for success, entry index for failure.
> > + *
> > + * One of the failure modes for MSR load/store is when a list exceeds the
> > + * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
> > + * as possible, process all valid entries before failing rather than precheck
> > + * for a capacity violation.
> >    */
> >   static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
> >   {
> >       u32 i;
> >       struct vmx_msr_entry e;
> >       struct msr_data msr;
> > +     u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
> >
> >       msr.host_initiated = false;
> >       for (i = 0; i < count; i++) {
> > +             if (unlikely(i >= max_msr_list_size))
> > +                     goto fail;
> > +
> >               if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
> >                                       &e, sizeof(e))) {
> >                       pr_debug_ratelimited(
> > @@ -899,9 +927,14 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
> >   {
> >       u32 i;
> >       struct vmx_msr_entry e;
> > +     u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
> >
> >       for (i = 0; i < count; i++) {
> >               struct msr_data msr_info;
> > +
> > +             if (unlikely(i >= max_msr_list_size))
> > +                     return -EINVAL;
> > +
> >               if (kvm_vcpu_read_guest(vcpu,
> >                                       gpa + i * sizeof(e),
> >                                       &e, 2 * sizeof(u32))) {
> > @@ -1009,17 +1042,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
> >       return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
> >   }
> >
> > -
> > -static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
> > -{
> > -     return fixed_bits_valid(control, low, high);
> > -}
> > -
> > -static inline u64 vmx_control_msr(u32 low, u32 high)
> > -{
> > -     return low | ((u64)high << 32);
> > -}
> > -
> >   static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
> >   {
> >       superset &= mask;
> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx>

+Paolo Bonzini +Radim Krčmář

Ping. Thanks.