When saving a vCPU's nested state, the vmcs02 is discarded. Only the shadow vmcs12 is saved. The shadow vmcs12 contains all of the information needed to reconstruct an equivalent vmcs02 on restore, but we have to be able to deal with two contexts: 1. The nested state was saved immediately after an emulated VM-entry, before the vmcs02 was ever launched. 2. The nested state was saved some time after the first successful launch of the vmcs02. Though it's an implementation detail rather than an architected bit, vmx->nested_run_pending serves to distinguish between these two cases. Hence, we save it as part of the vCPU's nested state. (Yes, this is ugly.) Even when restoring from a checkpoint, it may be necessary to build the vmcs02 as if prepare_vmcs02 was called from nested_vmx_run. So, the 'from_vmentry' argument should be dropped, and vmx->nested_run_pending should be consulted instead. The nested state restoration code then has to set vmx->nested_run_pending prior to calling prepare_vmcs02. It's important that the restoration code set vmx->nested_run_pending anyway, since the flag impacts things like interrupt delivery as well. Fixes: cf8b84f48a59 ("kvm: nVMX: Prepare for checkpointing L2 state") Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx> --- arch/x86/kvm/vmx.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index dd985130c540..82bb5457122d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10868,8 +10868,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne return 0; } -static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - bool from_vmentry) +static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -11003,13 +11002,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * is assigned to entry_failure_code on failure. */ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - bool from_vmentry, u32 *entry_failure_code) + u32 *entry_failure_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control, vmcs12_exec_ctrl; if (vmx->nested.dirty_vmcs12) { - prepare_vmcs02_full(vcpu, vmcs12, from_vmentry); + prepare_vmcs02_full(vcpu, vmcs12); vmx->nested.dirty_vmcs12 = false; } @@ -11029,7 +11028,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * HOST_FS_BASE, HOST_GS_BASE. */ - if (from_vmentry && + if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); @@ -11037,7 +11036,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_set_dr(vcpu, 7, vcpu->arch.dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); } - if (from_vmentry) { + if (vmx->nested.nested_run_pending) { vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, vmcs12->vm_entry_intr_info_field); vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, @@ -11169,7 +11168,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, ~VM_ENTRY_IA32E_MODE) | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); - if (from_vmentry && + if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); vcpu->arch.pat = vmcs12->guest_ia32_pat; @@ -11237,7 +11236,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_set_cr4(vcpu, vmcs12->guest_cr4); vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); - if (from_vmentry && + if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) vcpu->arch.efer = vmcs12->guest_ia32_efer; else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) @@ -11415,7 +11414,7 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 0; } -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -11435,7 +11434,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) vcpu->arch.tsc_offset += vmcs12->tsc_offset; r = EXIT_REASON_INVALID_STATE; - if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) + if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) goto fail; nested_get_vmcs12_pages(vcpu, vmcs12); @@ -11537,20 +11536,22 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ - ret = enter_vmx_non_root_mode(vcpu, true); - if (ret) + vmx->nested.nested_run_pending = 1; + ret = enter_vmx_non_root_mode(vcpu); + if (ret) { + vmx->nested.nested_run_pending = 0; return ret; + } /* * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken * by event injection, halt vcpu. */ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && - !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) + !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) { + vmx->nested.nested_run_pending = 0; return kvm_vcpu_halt(vcpu); - - vmx->nested.nested_run_pending = 1; - + } return 1; out: @@ -12612,7 +12613,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) if (vmx->nested.smm.guest_mode) { vcpu->arch.hflags &= ~HF_SMM_MASK; - ret = enter_vmx_non_root_mode(vcpu, false); + ret = enter_vmx_non_root_mode(vcpu); vcpu->arch.hflags |= HF_SMM_MASK; if (ret) return ret; -- 2.17.0.441.gb46fe60e1d-goog