guest xcr0 could leak into host when MCE happens in guest mode. Because do_machine_check() could schedule out at a few places. For example: kvm_load_guest_xcr0 ... kvm_x86_ops->run(vcpu) { vmx_vcpu_run vmx_complete_atomic_exit kvm_machine_check do_machine_check do_memory_failure memory_failure lock_page In this case, host_xcr0 is 0x2ff, guest vcpu xcr0 is 0xff. After schedule out, host cpu has guest xcr0 loaded (0xff). In __switch_to { switch_fpu_finish copy_kernel_to_fpregs XRSTORS If any bit i in XSTATE_BV[i] == 1 and xcr0[i] == 0, XRSTORS will generate #GP (In this case, bit 9). Then ex_handler_fprestore kicks in and tries to reinitialize fpu by restoring init fpu state. Same story as last #GP, except we get DOUBLE FAULT this time. Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: WANG Chao <chao.wang@xxxxxxxxx> --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 10 ++++------ arch/x86/kvm/x86.h | 2 ++ 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e0a791c3d4fc..2bf73076de7f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5621,6 +5621,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.cr2 = vcpu->arch.cr2; clgi(); + kvm_load_guest_xcr0(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -5766,6 +5767,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); + kvm_put_guest_xcr0(vcpu); stgi(); /* Any pending NMI will happen here */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ab432a930ae8..3157598c52f1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); + kvm_load_guest_xcr0(vcpu); + if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && vcpu->arch.pkru != vmx->host_pkru) @@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vmx->host_pkru); } + kvm_put_guest_xcr0(vcpu); + vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 099b851dabaf..22f66e9a7dc5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) } EXPORT_SYMBOL_GPL(kvm_lmsw); -static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) { if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && !vcpu->guest_xcr0_loaded) { @@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 1; } } +EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); -static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) { if (vcpu->guest_xcr0_loaded) { if (vcpu->arch.xcr0 != host_xcr0) @@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 0; } } +EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0); static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { @@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } - kvm_load_guest_xcr0(vcpu); - if (req_immediate_exit) { kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_x86_ops->request_immediate_exit(vcpu); @@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_put_guest_xcr0(vcpu); - kvm_before_interrupt(vcpu); kvm_x86_ops->handle_external_intr(vcpu); kvm_after_interrupt(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 28406aa1136d..aedc5d0d4989 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) __this_cpu_write(current_vcpu, NULL); } +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); #endif -- 2.21.0