> From: Sean Christopherson <seanjc@xxxxxxxxxx> > Sent: Thursday, January 6, 2022 6:22 AM > > On Wed, Jan 05, 2022, Yang Zhong wrote: > > @@ -6399,6 +6424,26 @@ static void handle_interrupt_nmi_irqoff(struct > kvm_vcpu *vcpu, > > kvm_after_interrupt(vcpu); > > } > > > > +static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu) > > +{ > > + /* > > + * Save xfd_err to guest_fpu before interrupt is enabled, so the > > + * MSR value is not clobbered by the host activity before the guest > > + * has chance to consume it. > > + * > > + * We should not blindly read xfd_err here, since this exception > > Nit, avoid "we", and explain what KVM does (or doesn't) do, not what KVM > "should" > do, e.g. just say > > * Do not blindly read ... > > > + * might be caused by L1 interception on a platform which doesn't > > + * support xfd at all. > > + * > > + * Do it conditionally upon guest_fpu::xfd. xfd_err matters > > + * only when xfd contains a non-zero value. > > + * > > + * Queuing exception is done in vmx_handle_exit. See comment > there. > > Another nit, it's worth explaining why XFD_ERR needs to be read here > regardless > of is_guest_mode(). E.g. > > * Injecting the #NM back into the guest is handled in the standard > path > * as an #NM in L2 may be reflected into L1 as a VM-Exit. Read > XFD_ERR > * even if the #NM is from L2, as L1 may have exposed XFD to L2. sounds good > > Side topic, in a follow up series/patch, it's probably worth adding support in > nested_vmx_prepare_msr_bitmap() to allow passthrough of the MSRs to L2. will do. > > > + */ > > + if (vcpu->arch.guest_fpu.fpstate->xfd) > > + rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); > > +} > > + > > static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) > > { > > const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist; > > @@ -6407,6 +6452,9 @@ static void handle_exception_nmi_irqoff(struct > vcpu_vmx *vmx) > > /* if exit due to PF check for async PF */ > > if (is_page_fault(intr_info)) > > vmx->vcpu.arch.apf.host_apf_flags = > kvm_read_and_reset_apf_flags(); > > + /* if exit due to NM, handle before interrupts are enabled */ > > Nit, drop this comment, it's slightly misleading since the #NM isn't fully > handled > here. The comment in handle_nm_fault_irqoff() is more than sufficient. > > > + else if (is_nm_fault(intr_info)) > > + handle_nm_fault_irqoff(&vmx->vcpu); > > /* Handle machine checks before interrupts are enabled */ > > else if (is_machine_check(intr_info)) > > kvm_machine_check(); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 21ce65220e38..2c988f8ca616 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -9953,6 +9953,9 @@ static int vcpu_enter_guest(struct kvm_vcpu > *vcpu) > > if (test_thread_flag(TIF_NEED_FPU_LOAD)) > > switch_fpu_return(); > > > > + if (vcpu->arch.guest_fpu.xfd_err) > > + wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); > > + > > if (unlikely(vcpu->arch.switch_db_regs)) { > > set_debugreg(0, 7); > > set_debugreg(vcpu->arch.eff_db[0], 0); > > @@ -10016,6 +10019,9 @@ static int vcpu_enter_guest(struct kvm_vcpu > *vcpu) > > > > static_call(kvm_x86_handle_exit_irqoff)(vcpu); > > > > + if (vcpu->arch.guest_fpu.xfd_err) > > + wrmsrl(MSR_IA32_XFD_ERR, 0); > > + > > /* > > * Consume any pending interrupts, including the possible source of > > * VM-Exit on SVM and any ticks that occur between VM-Exit and > now.