This patch implements support for injecting pending NMIs via the .kvm_x86_set_hw_nmi_pending using new AMD's vNMI feature. Note that the vNMI can't cause a VM exit, which is needed when a nested guest intercepts NMIs. Therefore to avoid breaking nesting, the vNMI is inhibited while a nested guest is running and instead, the legacy NMI window detection and delivery method is used. While it is possible to passthrough the vNMI if a nested guest doesn't intercept NMIs, such usage is very uncommon, and it's not worth to optimize for. Signed-off-by: Santosh Shukla <santosh.shukla@xxxxxxx> Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> --- arch/x86/kvm/svm/nested.c | 42 +++++++++++++++ arch/x86/kvm/svm/svm.c | 111 ++++++++++++++++++++++++++++++-------- arch/x86/kvm/svm/svm.h | 10 ++++ 3 files changed, 140 insertions(+), 23 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index e891318595113e..5bea672bf8b12d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -623,6 +623,42 @@ static bool is_evtinj_nmi(u32 evtinj) return type == SVM_EVTINJ_TYPE_NMI; } +static void nested_svm_save_vnmi(struct vcpu_svm *svm) +{ + struct vmcb *vmcb01 = svm->vmcb01.ptr; + + /* + * Copy the vNMI state back to software NMI tracking state + * for the duration of the nested run + */ + + svm->nmi_masked = vmcb01->control.int_ctl & V_NMI_MASK; + svm->vcpu.arch.nmi_pending += vmcb01->control.int_ctl & V_NMI_PENDING; +} + +static void nested_svm_restore_vnmi(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct vmcb *vmcb01 = svm->vmcb01.ptr; + + /* + * Restore the vNMI state from the software NMI tracking state + * after a nested run + */ + + if (svm->nmi_masked) + vmcb01->control.int_ctl |= V_NMI_MASK; + else + vmcb01->control.int_ctl &= ~V_NMI_MASK; + + if (vcpu->arch.nmi_pending) { + vcpu->arch.nmi_pending--; + vmcb01->control.int_ctl |= V_NMI_PENDING; + } else + vmcb01->control.int_ctl &= ~V_NMI_PENDING; +} + + static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, unsigned long vmcb12_rip, unsigned long vmcb12_csbase) @@ -646,6 +682,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, else int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); + if (vnmi) + nested_svm_save_vnmi(svm); + /* Copied from vmcb01. msrpm_base can be overwritten later. */ vmcb02->control.nested_ctl = vmcb01->control.nested_ctl; vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa; @@ -1049,6 +1088,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm_update_lbrv(vcpu); } + if (vnmi) + nested_svm_restore_vnmi(svm); + /* * On vmexit the GIF is set to false and * no event can be injected in L1. diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cfed6ab29c839a..bf10adcf3170a8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -230,6 +230,8 @@ module_param(dump_invalid_vmcb, bool, 0644); bool intercept_smi = true; module_param(intercept_smi, bool, 0444); +bool vnmi = true; +module_param(vnmi, bool, 0444); static bool svm_gp_erratum_intercept = true; @@ -1299,6 +1301,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu) if (kvm_vcpu_apicv_active(vcpu)) avic_init_vmcb(svm, vmcb); + if (vnmi) + svm->vmcb->control.int_ctl |= V_NMI_ENABLE; + if (vgif) { svm_clr_intercept(svm, INTERCEPT_STGI); svm_clr_intercept(svm, INTERCEPT_CLGI); @@ -3487,6 +3492,39 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) ++vcpu->stat.nmi_injections; } + +static bool svm_get_hw_nmi_pending(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!is_vnmi_enabled(svm)) + return false; + + return !!(svm->vmcb->control.int_ctl & V_NMI_MASK); +} + +static bool svm_set_hw_nmi_pending(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!is_vnmi_enabled(svm)) + return false; + + if (svm->vmcb->control.int_ctl & V_NMI_PENDING) + return false; + + svm->vmcb->control.int_ctl |= V_NMI_PENDING; + vmcb_mark_dirty(svm->vmcb, VMCB_INTR); + + /* + * NMI isn't yet technically injected but + * this rough estimation should be good enough + */ + ++vcpu->stat.nmi_injections; + + return true; +} + static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3582,11 +3620,38 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) svm_set_intercept(svm, INTERCEPT_CR8_WRITE); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (is_vnmi_enabled(svm)) + return svm->vmcb->control.int_ctl & V_NMI_MASK; + else + return svm->nmi_masked; +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (is_vnmi_enabled(svm)) { + if (masked) + svm->vmcb->control.int_ctl |= V_NMI_MASK; + else + svm->vmcb->control.int_ctl &= ~V_NMI_MASK; + } else { + svm->nmi_masked = masked; + if (masked) + svm_enable_iret_interception(svm); + else + svm_disable_iret_interception(svm); + } +} + bool svm_nmi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - bool ret; if (!gif_set(svm)) return true; @@ -3594,10 +3659,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu) if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) return false; - ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || - (svm->nmi_masked); + if (svm_get_nmi_mask(vcpu)) + return true; - return ret; + return vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK; } static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) @@ -3615,24 +3680,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return 1; } -static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) -{ - return to_svm(vcpu)->nmi_masked; -} - -static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (masked) { - svm->nmi_masked = true; - svm_enable_iret_interception(svm); - } else { - svm->nmi_masked = false; - svm_disable_iret_interception(svm); - } -} - bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3725,10 +3772,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) /* * Something prevents NMI from been injected. Single step over possible * problem (IRET or exception injection or interrupt shadow) + * + * With vNMI we should never need an NMI window + * (we can always inject vNMI either by setting VNMI_PENDING or by EVENTINJ) */ + if (WARN_ON_ONCE(is_vnmi_enabled(svm))) + return; + svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu); - svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); + svm->nmi_singlestep = true; } static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) @@ -4770,6 +4823,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .patch_hypercall = svm_patch_hypercall, .inject_irq = svm_inject_irq, .inject_nmi = svm_inject_nmi, + .get_hw_nmi_pending = svm_get_hw_nmi_pending, + .set_hw_nmi_pending = svm_set_hw_nmi_pending, .inject_exception = svm_inject_exception, .cancel_injection = svm_cancel_injection, .interrupt_allowed = svm_interrupt_allowed, @@ -5058,6 +5113,16 @@ static __init int svm_hardware_setup(void) pr_info("Virtual GIF supported\n"); } + + vnmi = vgif && vnmi && boot_cpu_has(X86_FEATURE_AMD_VNMI); + if (vnmi) + pr_info("Virtual NMI enabled\n"); + + if (!vnmi) { + svm_x86_ops.get_hw_nmi_pending = NULL; + svm_x86_ops.set_hw_nmi_pending = NULL; + } + if (lbrv) { if (!boot_cpu_has(X86_FEATURE_LBRV)) lbrv = false; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 587ddc150f9f34..0b7e1790fadde1 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -35,6 +35,7 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; extern int vgif; extern bool intercept_smi; +extern bool vnmi; enum avic_modes { AVIC_MODE_NONE = 0, @@ -553,6 +554,15 @@ static inline bool is_x2apic_msrpm_offset(u32 offset) (msr < (APIC_BASE_MSR + 0x100)); } +static inline bool is_vnmi_enabled(struct vcpu_svm *svm) +{ + /* L1's vNMI is inhibited while nested guest is running */ + if (is_guest_mode(&svm->vcpu)) + return false; + + return !!(svm->vmcb01.ptr->control.int_ctl & V_NMI_ENABLE); +} + /* svm.c */ #define MSR_INVALID 0xffffffffU -- 2.26.3