On Thu, Nov 17, 2022, Maxim Levitsky wrote: > From: Santosh Shukla <santosh.shukla@xxxxxxx> > > VMCB intr_ctrl bit12 (V_NMI_MASK) is set by the processor when handling > NMI in guest and is cleared after the NMI is handled. Treat V_NMI_MASK > as read-only in the hypervisor except for the SMM case where hypervisor > before entring and after leaving SMM mode requires to set and unset > V_NMI_MASK. > > Adding API(get_vnmi_vmcb) in order to return the correct vmcb for L1 or > L2. > > Maxim: > - made set_vnmi_mask/clear_vnmi_mask/is_vnmi_mask warn if called > without vNMI enabled > - clear IRET intercept in svm_set_nmi_mask even with vNMI > > Signed-off-by: Santosh Shukla <santosh.shukla@xxxxxxx> > Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> > --- > arch/x86/kvm/svm/svm.c | 18 ++++++++++++++- > arch/x86/kvm/svm/svm.h | 52 ++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 69 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 08a7b2a0a29f3a..c16f68f6c4f7d7 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -3618,13 +3618,29 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) > > static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) > { > - return !!(vcpu->arch.hflags & HF_NMI_MASK); > + struct vcpu_svm *svm = to_svm(vcpu); > + > + if (is_vnmi_enabled(svm)) > + return is_vnmi_mask_set(svm); > + else > + return !!(vcpu->arch.hflags & HF_NMI_MASK); > } > > static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) > { > struct vcpu_svm *svm = to_svm(vcpu); > > + if (is_vnmi_enabled(svm)) { > + if (masked) > + set_vnmi_mask(svm); I believe not setting INTERCEPT_IRET is correct, but only because the existing code is unnecessary. And this all very subtly relies on KVM_REQ_EVENT being set and/or KVM already being in kvm_check_and_inject_events(). When NMIs become unblocked, INTERCEPT_IRET can be cleared, but KVM should also pending KVM_REQ_EVENT. AFAICT, that doesn't happen when this is called via the emulator. Ah, because em_iret() only handles RM for Intel's restricted guest crap. I.e. it "works" only because it never happens. All other flows set KVM_REQ_EVENT when toggling NMI blocking, e.g. the RSM path of kvm_smm_changed(). And when NMIs become blocked, there's no need to force INTERCEPT_IRET in this code because kvm_check_and_inject_events() will request an NMI window and set the intercept if necessary, and all paths that set NMI blocking are guaranteed to reach kvm_check_and_inject_events() before entering the guest. 1. RSM => kvm_smm_changed() sets KVM_REQ_EVENT 2. enter_smm() is only called from within kvm_check_and_inject_events(), before pending NMIs are processed (yay priority) 3. emulator_set_nmi_mask() never blocks NMIs, only does the half-baked IRET emulation 4. kvm_vcpu_ioctl_x86_set_vcpu_event() sets KVM_REQ_EVENT So, can you add a prep patch to drop the forced INTERCEPT_IRET? That way the logic for vNMI and !vNMI is the same. > + else { > + clear_vnmi_mask(svm); This is the only code that sets/clears the vNMI mask, so rather than have set/clear helpers, what about a single helper to do the dirty work? > + if (!sev_es_guest(vcpu->kvm)) > + svm_clr_intercept(svm, INTERCEPT_IRET); > + } > + return; > + } > + > if (masked) { > vcpu->arch.hflags |= HF_NMI_MASK; > if (!sev_es_guest(vcpu->kvm)) > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index f5383104d00580..bf7f4851dee204 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -35,6 +35,7 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; > extern bool npt_enabled; > extern int vgif; > extern bool intercept_smi; > +extern bool vnmi; > > enum avic_modes { > AVIC_MODE_NONE = 0, > @@ -531,6 +532,57 @@ static inline bool is_x2apic_msrpm_offset(u32 offset) > (msr < (APIC_BASE_MSR + 0x100)); > } > > +static inline struct vmcb *get_vnmi_vmcb(struct vcpu_svm *svm) > +{ > + if (!vnmi) > + return NULL; > + > + if (is_guest_mode(&svm->vcpu)) > + return svm->nested.vmcb02.ptr; > + else > + return svm->vmcb01.ptr; > +} > + > +static inline bool is_vnmi_enabled(struct vcpu_svm *svm) > +{ > + struct vmcb *vmcb = get_vnmi_vmcb(svm); > + > + if (vmcb) > + return !!(vmcb->control.int_ctl & V_NMI_ENABLE); > + else > + return false; Maybe just this? return vmcb && (vmcb->control.int_ctl & V_NMI_ENABLE); Or if an inner helper is added: return vmcb && __is_vnmi_enabled(vmcb); > +} > + > +static inline bool is_vnmi_mask_set(struct vcpu_svm *svm) > +{ > + struct vmcb *vmcb = get_vnmi_vmcb(svm); > + > + if (!WARN_ON_ONCE(!vmcb)) Rather than WARN, add an inner __is_vnmi_enabled() that takes the vnmi_vmcb. Actually, if you do that, the test/set/clear helpers can go away entirely. > + return false; > + > + return !!(vmcb->control.int_ctl & V_NMI_MASK); > +} > + > +static inline void set_vnmi_mask(struct vcpu_svm *svm) > +{ > + struct vmcb *vmcb = get_vnmi_vmcb(svm); > + > + if (!WARN_ON_ONCE(!vmcb)) > + return; > + > + vmcb->control.int_ctl |= V_NMI_MASK; > +} > + > +static inline void clear_vnmi_mask(struct vcpu_svm *svm) > +{ > + struct vmcb *vmcb = get_vnmi_vmcb(svm); > + > + if (!WARN_ON_ONCE(!vmcb)) > + return; > + > + vmcb->control.int_ctl &= ~V_NMI_MASK; > +} These helpers can all go in svm. There are no users oustide of svm.c, and unless I'm misunderstanding how nested works, there should never be oustide users. E.g. with HF_NMI_MASK => svm->nmi_masked, the end result can be something like: static bool __is_vnmi_enabled(struct *vmcb) { return !!(vmcb->control.int_ctl & V_NMI_ENABLE); } static bool is_vnmi_enabled(struct vcpu_svm *svm) { struct vmcb *vmcb = get_vnmi_vmcb(svm); return vmcb && __is_vnmi_enabled(vmcb); } static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = get_vnmi_vmcb(svm); if (vmcb && __is_vnmi_enabled(vmcb)) return !!(vmcb->control.int_ctl & V_NMI_MASK); else return !!(vcpu->arch.hflags & HF_NMI_MASK); } static void svm_set_or_clear_vnmi_mask(struct vmcb *vmcb, bool set) { if (set) vmcb->control.int_ctl |= V_NMI_MASK; else vmcb->control.int_ctl &= ~V_NMI_MASK; } static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = get_vnmi_vmcb(svm); if (vmcb && __is_vnmi_enabled(vmcb)) { if (masked) vmcb->control.int_ctl |= V_NMI_MASK; else vmcb->control.int_ctl &= ~V_NMI_MASK; } else { svm->nmi_masked = masked; } if (!masked) svm_disable_iret_interception(svm); }