On Sat, Jul 28, 2018 at 4:10 PM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > If the vCPU enters system management mode while running a nested guest, > RSM starts processing the vmentry while still in SMM. In that case, > however, the pages pointed to by the vmcs12 might be incorrectly > loaded from SMRAM. To avoid this, delay the handling of the pages > until just before the next vmentry. This is done with a new request > and a new entry in kvm_x86_ops, which we will be able to reuse for > nested VMX state migration. > > Extracted from a patch by Jim Mattson and KarimAllah Ahmed. > > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 3 +++ > arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++-------------- > arch/x86/kvm/x86.c | 2 ++ > 3 files changed, 40 insertions(+), 18 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index c13cd28d9d1b..da957725992d 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -75,6 +75,7 @@ > #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) > #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) > #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) > +#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) > > #define CR0_RESERVED_BITS \ > (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ > @@ -1085,6 +1086,8 @@ struct kvm_x86_ops { > > void (*setup_mce)(struct kvm_vcpu *vcpu); > > + void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); > + > int (*smi_allowed)(struct kvm_vcpu *vcpu); > int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); > int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 2630ab38d72c..17aede06ae0e 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -10636,9 +10636,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, > static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, > struct vmcs12 *vmcs12); > > -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, > - struct vmcs12 *vmcs12) > +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) > { > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > struct vcpu_vmx *vmx = to_vmx(vcpu); > struct page *page; > u64 hpa; > @@ -11750,13 +11750,18 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, > return 0; > } > > -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu) > +/* > + * If p_exit_qual is NULL, this is being called from state restore (either > + * kvm_set_nested_state or RSM). Otherwise it's called from vmlaunch/vmresume. > + */ > +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *p_exit_qual) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + bool from_vmentry = !!p_exit_qual; > u32 msr_entry_idx; > - u32 exit_qual; > - int r; > + u32 dummy_exit_qual; > + int r = 0; > > enter_guest_mode(vcpu); > > @@ -11770,17 +11775,27 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu) > vcpu->arch.tsc_offset += vmcs12->tsc_offset; > > r = EXIT_REASON_INVALID_STATE; > - if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) > + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? p_exit_qual : &dummy_exit_qual)) > goto fail; > > - nested_get_vmcs12_pages(vcpu, vmcs12); > + if (from_vmentry) { > + nested_get_vmcs12_pages(vcpu); > > - r = EXIT_REASON_MSR_LOAD_FAIL; > - msr_entry_idx = nested_vmx_load_msr(vcpu, > - vmcs12->vm_entry_msr_load_addr, > - vmcs12->vm_entry_msr_load_count); > - if (msr_entry_idx) > - goto fail; > + r = EXIT_REASON_MSR_LOAD_FAIL; > + msr_entry_idx = nested_vmx_load_msr(vcpu, > + vmcs12->vm_entry_msr_load_addr, > + vmcs12->vm_entry_msr_load_count); > + if (msr_entry_idx) > + goto fail; > + } else { > + /* > + * The MMU is not initialized to point at the right entities yet and > + * "get pages" would need to read data from the guest (i.e. we will > + * need to perform gpa to hpa translation). Request a call > + * to nested_get_vmcs12_pages before the next VM-entry. > + */ > + kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); > + } The from_vmentry variable seems unnecessary. Why not (a) hoist the call to nested_vmx_load_msr() into nested_vmx_run and (b) always use the deferred KVM_REQ_GET_VMCS12_PAGES? > > /* > * Note no nested_vmx_succeed or nested_vmx_fail here. At this point > @@ -11795,8 +11810,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu) > vcpu->arch.tsc_offset -= vmcs12->tsc_offset; > leave_guest_mode(vcpu); > vmx_switch_vmcs(vcpu, &vmx->vmcs01); > - nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual); > - return 1; > + return r; > } > > /* > @@ -11873,10 +11887,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > */ > > vmx->nested.nested_run_pending = 1; > - ret = enter_vmx_non_root_mode(vcpu); > + ret = enter_vmx_non_root_mode(vcpu, &exit_qual); > if (ret) { > + nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual); > vmx->nested.nested_run_pending = 0; > - return ret; > + return 1; > } > > /* > @@ -12962,7 +12977,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) > > if (vmx->nested.smm.guest_mode) { > vcpu->arch.hflags &= ~HF_SMM_MASK; > - ret = enter_vmx_non_root_mode(vcpu); > + ret = enter_vmx_non_root_mode(vcpu, NULL); > vcpu->arch.hflags |= HF_SMM_MASK; > if (ret) > return ret; > @@ -13111,6 +13126,8 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) > > .setup_mce = vmx_setup_mce, > > + .get_vmcs12_pages = nested_get_vmcs12_pages, > + > .smi_allowed = vmx_smi_allowed, > .pre_enter_smm = vmx_pre_enter_smm, > .pre_leave_smm = vmx_pre_leave_smm, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 2b812b3c5088..8ddf5f94876f 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -7257,6 +7257,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > bool req_immediate_exit = false; > > if (kvm_request_pending(vcpu)) { > + if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) > + kvm_x86_ops->get_vmcs12_pages(vcpu); > if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) > kvm_mmu_unload(vcpu); > if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) > -- > 1.8.3.1 >