Re: [PATCH 06/10] KVM: x86: do not load vmcs12 pages while still in SMM

Jim Mattson <jmattson@xxxxxxxxxx> · Mon, 30 Jul 2018 12:27:48 -0700

On Sat, Jul 28, 2018 at 4:10 PM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
> If the vCPU enters system management mode while running a nested guest,
> RSM starts processing the vmentry while still in SMM.  In that case,
> however, the pages pointed to by the vmcs12 might be incorrectly
> loaded from SMRAM.  To avoid this, delay the handling of the pages
> until just before the next vmentry.  This is done with a new request
> and a new entry in kvm_x86_ops, which we will be able to reuse for
> nested VMX state migration.
>
> Extracted from a patch by Jim Mattson and KarimAllah Ahmed.
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/vmx.c              | 53 +++++++++++++++++++++++++++--------------
>  arch/x86/kvm/x86.c              |  2 ++
>  3 files changed, 40 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c13cd28d9d1b..da957725992d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -75,6 +75,7 @@
>  #define KVM_REQ_HV_EXIT                        KVM_ARCH_REQ(21)
>  #define KVM_REQ_HV_STIMER              KVM_ARCH_REQ(22)
>  #define KVM_REQ_LOAD_EOI_EXITMAP       KVM_ARCH_REQ(23)
> +#define KVM_REQ_GET_VMCS12_PAGES       KVM_ARCH_REQ(24)
>
>  #define CR0_RESERVED_BITS                                               \
>         (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> @@ -1085,6 +1086,8 @@ struct kvm_x86_ops {
>
>         void (*setup_mce)(struct kvm_vcpu *vcpu);
>
> +       void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
> +
>         int (*smi_allowed)(struct kvm_vcpu *vcpu);
>         int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
>         int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2630ab38d72c..17aede06ae0e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10636,9 +10636,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
>  static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>                                                  struct vmcs12 *vmcs12);
>
> -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
> -                                       struct vmcs12 *vmcs12)
> +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
>  {
> +       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>         struct page *page;
>         u64 hpa;
> @@ -11750,13 +11750,18 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>         return 0;
>  }
>
> -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
> +/*
> + * If p_exit_qual is NULL, this is being called from state restore (either
> + * kvm_set_nested_state or RSM).  Otherwise it's called from vmlaunch/vmresume.
> + */
> +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *p_exit_qual)
>  {
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +       bool from_vmentry = !!p_exit_qual;
>         u32 msr_entry_idx;
> -       u32 exit_qual;
> -       int r;
> +       u32 dummy_exit_qual;
> +       int r = 0;
>
>         enter_guest_mode(vcpu);
>
> @@ -11770,17 +11775,27 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
>                 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
>
>         r = EXIT_REASON_INVALID_STATE;
> -       if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
> +       if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? p_exit_qual : &dummy_exit_qual))
>                 goto fail;
>
> -       nested_get_vmcs12_pages(vcpu, vmcs12);
> +       if (from_vmentry) {
> +               nested_get_vmcs12_pages(vcpu);
>
> -       r = EXIT_REASON_MSR_LOAD_FAIL;
> -       msr_entry_idx = nested_vmx_load_msr(vcpu,
> -                                           vmcs12->vm_entry_msr_load_addr,
> -                                           vmcs12->vm_entry_msr_load_count);
> -       if (msr_entry_idx)
> -               goto fail;
> +               r = EXIT_REASON_MSR_LOAD_FAIL;
> +               msr_entry_idx = nested_vmx_load_msr(vcpu,
> +                                                   vmcs12->vm_entry_msr_load_addr,
> +                                                   vmcs12->vm_entry_msr_load_count);
> +               if (msr_entry_idx)
> +                       goto fail;
> +       } else {
> +               /*
> +                * The MMU is not initialized to point at the right entities yet and
> +                * "get pages" would need to read data from the guest (i.e. we will
> +                * need to perform gpa to hpa translation). Request a call
> +                * to nested_get_vmcs12_pages before the next VM-entry.
> +                */
> +               kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
> +       }

The from_vmentry variable seems unnecessary. Why not (a) hoist the
call to nested_vmx_load_msr() into nested_vmx_run and (b) always use
the deferred KVM_REQ_GET_VMCS12_PAGES?

>
>         /*
>          * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> @@ -11795,8 +11810,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
>                 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
>         leave_guest_mode(vcpu);
>         vmx_switch_vmcs(vcpu, &vmx->vmcs01);
> -       nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
> -       return 1;
> +       return r;
>  }
>
>  /*
> @@ -11873,10 +11887,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
>          */
>
>         vmx->nested.nested_run_pending = 1;
> -       ret = enter_vmx_non_root_mode(vcpu);
> +       ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
>         if (ret) {
> +               nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
>                 vmx->nested.nested_run_pending = 0;
> -               return ret;
> +               return 1;
>         }
>
>         /*
> @@ -12962,7 +12977,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
>
>         if (vmx->nested.smm.guest_mode) {
>                 vcpu->arch.hflags &= ~HF_SMM_MASK;
> -               ret = enter_vmx_non_root_mode(vcpu);
> +               ret = enter_vmx_non_root_mode(vcpu, NULL);
>                 vcpu->arch.hflags |= HF_SMM_MASK;
>                 if (ret)
>                         return ret;
> @@ -13111,6 +13126,8 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
>
>         .setup_mce = vmx_setup_mce,
>
> +       .get_vmcs12_pages = nested_get_vmcs12_pages,
> +
>         .smi_allowed = vmx_smi_allowed,
>         .pre_enter_smm = vmx_pre_enter_smm,
>         .pre_leave_smm = vmx_pre_leave_smm,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2b812b3c5088..8ddf5f94876f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7257,6 +7257,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>         bool req_immediate_exit = false;
>
>         if (kvm_request_pending(vcpu)) {
> +               if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
> +                       kvm_x86_ops->get_vmcs12_pages(vcpu);
>                 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
>                         kvm_mmu_unload(vcpu);
>                 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
> --
> 1.8.3.1
>