Re: [PATCH 06/10] KVM: x86: do not load vmcs12 pages while still in SMM

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Tue, 31 Jul 2018 09:40:00 +0200

On 30/07/2018 21:27, Jim Mattson wrote:
> On Sat, Jul 28, 2018 at 4:10 PM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
>> If the vCPU enters system management mode while running a nested guest,
>> RSM starts processing the vmentry while still in SMM.  In that case,
>> however, the pages pointed to by the vmcs12 might be incorrectly
>> loaded from SMRAM.  To avoid this, delay the handling of the pages
>> until just before the next vmentry.  This is done with a new request
>> and a new entry in kvm_x86_ops, which we will be able to reuse for
>> nested VMX state migration.
>>
>> Extracted from a patch by Jim Mattson and KarimAllah Ahmed.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
>> ---
>>  arch/x86/include/asm/kvm_host.h |  3 +++
>>  arch/x86/kvm/vmx.c              | 53 +++++++++++++++++++++++++++--------------
>>  arch/x86/kvm/x86.c              |  2 ++
>>  3 files changed, 40 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index c13cd28d9d1b..da957725992d 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -75,6 +75,7 @@
>>  #define KVM_REQ_HV_EXIT                        KVM_ARCH_REQ(21)
>>  #define KVM_REQ_HV_STIMER              KVM_ARCH_REQ(22)
>>  #define KVM_REQ_LOAD_EOI_EXITMAP       KVM_ARCH_REQ(23)
>> +#define KVM_REQ_GET_VMCS12_PAGES       KVM_ARCH_REQ(24)
>>
>>  #define CR0_RESERVED_BITS                                               \
>>         (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
>> @@ -1085,6 +1086,8 @@ struct kvm_x86_ops {
>>
>>         void (*setup_mce)(struct kvm_vcpu *vcpu);
>>
>> +       void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
>> +
>>         int (*smi_allowed)(struct kvm_vcpu *vcpu);
>>         int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
>>         int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 2630ab38d72c..17aede06ae0e 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -10636,9 +10636,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
>>  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>>                                                  struct vmcs12 *vmcs12);
>>
>> -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
>> -                                       struct vmcs12 *vmcs12)
>> +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
>>  {
>> +       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>>         struct page *page;
>>         u64 hpa;
>> @@ -11750,13 +11750,18 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>>         return 0;
>>  }
>>
>> -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
>> +/*
>> + * If p_exit_qual is NULL, this is being called from state restore (either
>> + * kvm_set_nested_state or RSM).  Otherwise it's called from vmlaunch/vmresume.
>> + */
>> +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *p_exit_qual)
>>  {
>>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>>         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> +       bool from_vmentry = !!p_exit_qual;
>>         u32 msr_entry_idx;
>> -       u32 exit_qual;
>> -       int r;
>> +       u32 dummy_exit_qual;
>> +       int r = 0;
>>
>>         enter_guest_mode(vcpu);
>>
>> @@ -11770,17 +11775,27 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
>>                 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
>>
>>         r = EXIT_REASON_INVALID_STATE;
>> -       if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
>> +       if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? p_exit_qual : &dummy_exit_qual))
>>                 goto fail;
>>
>> -       nested_get_vmcs12_pages(vcpu, vmcs12);
>> +       if (from_vmentry) {
>> +               nested_get_vmcs12_pages(vcpu);
>>
>> -       r = EXIT_REASON_MSR_LOAD_FAIL;
>> -       msr_entry_idx = nested_vmx_load_msr(vcpu,
>> -                                           vmcs12->vm_entry_msr_load_addr,
>> -                                           vmcs12->vm_entry_msr_load_count);
>> -       if (msr_entry_idx)
>> -               goto fail;
>> +               r = EXIT_REASON_MSR_LOAD_FAIL;
>> +               msr_entry_idx = nested_vmx_load_msr(vcpu,
>> +                                                   vmcs12->vm_entry_msr_load_addr,
>> +                                                   vmcs12->vm_entry_msr_load_count);
>> +               if (msr_entry_idx)
>> +                       goto fail;
>> +       } else {
>> +               /*
>> +                * The MMU is not initialized to point at the right entities yet and
>> +                * "get pages" would need to read data from the guest (i.e. we will
>> +                * need to perform gpa to hpa translation). Request a call
>> +                * to nested_get_vmcs12_pages before the next VM-entry.
>> +                */
>> +               kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
>> +       }
> 
> The from_vmentry variable seems unnecessary. Why not (a) hoist the
> call to nested_vmx_load_msr() into nested_vmx_run and (b) always use
> the deferred KVM_REQ_GET_VMCS12_PAGES?

Yeah, I thought about that.  from_vmentry is ugly.

However, for (a) you would have to duplicate the code at the "fail"
label, which is uglier than from_vmentry; as to (b), it is a little bit
slower and our nested vmentry is already slow enough...

Paolo