This reverts commit 15ad9762d69fd8e40a4a51828c1d6b0c1b8fbea0. David Matlack reports: "While testing some patches I ran into a VM_BUG_ON that I have been able to reproduce at kvm/queue commit 45af1bb99b72 ("KVM: VMX: Clean up PI pre/post-block WARNs"). To repro run the kvm-unit-tests on a kernel built from kvm/queue with CONFIG_DEBUG_VM=y. I was testing on an Intel Cascade Lake host and have not tested in any other environments yet. The repro is not 100% reliable, although it's fairly easy to trigger and always during a vmx* kvm-unit-tests Given the details of the crash, commit 15ad9762d69f ("KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest()") and surrounding commits look most suspect." Reported-by: David Matlack <dmatlack@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- Not tested yet. I did reproduce the WARN with debug kernels though. --- arch/x86/kvm/vmx/nested.c | 8 +++++++- arch/x86/kvm/vmx/vmx.c | 17 +++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2f6f465e575f..26b236187850 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3054,7 +3054,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; bool vm_fail; if (!nested_early_check) @@ -3077,6 +3077,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) */ vmcs_writel(GUEST_RFLAGS, 0); + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 640f4719612c..7826556b2a47 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1103,7 +1103,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 int cpu = raw_smp_processor_id(); #endif - unsigned long cr3; unsigned long fs_base, gs_base; u16 fs_sel, gs_sel; int i; @@ -1168,14 +1167,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) #endif vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); - - /* Host CR3 including its PCID is stable when guest state is loaded. */ - cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != host_state->cr3)) { - vmcs_writel(HOST_CR3, cr3); - host_state->cr3 = cr3; - } - vmx->guest_state_loaded = true; } @@ -6638,7 +6629,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -6683,6 +6674,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); -- 2.31.1