In order to support nested VNMI requires saving and restoring the VNMI bits during nested entry and exit. In case of L1 and L2 both using VNMI- Copy VNMI bits from vmcb12 to vmcb02 during entry and vice-versa during exit. And in case of L1 uses VNMI and L2 doesn't- Copy VNMI bits from vmcb01 to vmcb02 during entry and vice-versa during exit. Tested with the KVM-unit-test and Nested Guest scenario. Signed-off-by: Santosh Shukla <santosh.shukla@xxxxxxx> --- v3: - Added code to save and restore VNMI bit for L1 using vnmi and L2 doesn't for the entry and exit case. v2: - Save the V_NMI_PENDING/MASK state in vmcb12 on vmexit. - Tested nested environment - L1 injecting vnmi to L2. - Tested vnmi in kvm-unit-test. arch/x86/kvm/svm/nested.c | 27 +++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 5 +++++ arch/x86/kvm/svm/svm.h | 6 ++++++ 3 files changed, 38 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 76dcc8a3e849..3d986ec83147 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -662,6 +662,11 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, else int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); + if (nested_vnmi_enabled(svm)) + int_ctl_vmcb12_bits |= (V_NMI_PENDING | V_NMI_ENABLE | V_NMI_MASK); + else + int_ctl_vmcb01_bits |= (V_NMI_PENDING | V_NMI_ENABLE | V_NMI_MASK); + /* Copied from vmcb01. msrpm_base can be overwritten later. */ vmcb02->control.nested_ctl = vmcb01->control.nested_ctl; vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa; @@ -1010,6 +1015,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.event_inj = svm->nested.ctl.event_inj; vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; + if (nested_vnmi_enabled(svm)) { + if (vmcb02->control.int_ctl & V_NMI_MASK) + vmcb12->control.int_ctl |= V_NMI_MASK; + else + vmcb12->control.int_ctl &= ~V_NMI_MASK; + + if (vmcb02->control.int_ctl & V_NMI_PENDING) + vmcb12->control.int_ctl |= V_NMI_PENDING; + else + vmcb12->control.int_ctl &= ~V_NMI_PENDING; + } else { + if (vmcb02->control.int_ctl & V_NMI_MASK) + vmcb01->control.int_ctl |= V_NMI_MASK; + else + vmcb01->control.int_ctl &= ~V_NMI_MASK; + + if (vmcb02->control.int_ctl & V_NMI_PENDING) + vmcb01->control.int_ctl |= V_NMI_PENDING; + else + vmcb01->control.int_ctl &= ~V_NMI_PENDING; + } + if (!kvm_pause_in_guest(vcpu->kvm)) { vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4aa7606a9aa2..2e50a7ab32db 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4217,6 +4217,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF); + svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_V_NMI); + svm_recalc_instruction_intercepts(vcpu, svm); /* For sev guests, the memory encryption bit is not reserved in CR3. */ @@ -4967,6 +4969,9 @@ static __init void svm_set_cpu_caps(void) if (vgif) kvm_cpu_cap_set(X86_FEATURE_VGIF); + if (vnmi) + kvm_cpu_cap_set(X86_FEATURE_V_NMI); + /* Nested VM can receive #VMEXIT instead of triggering #GP */ kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7857a89d0ec8..4d6245ef5b6c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -253,6 +253,7 @@ struct vcpu_svm { bool pause_filter_enabled : 1; bool pause_threshold_enabled : 1; bool vgif_enabled : 1; + bool vnmi_enabled : 1; u32 ldr_reg; u32 dfr_reg; @@ -533,6 +534,11 @@ static inline bool is_x2apic_msrpm_offset(u32 offset) (msr < (APIC_BASE_MSR + 0x100)); } +static inline bool nested_vnmi_enabled(struct vcpu_svm *svm) +{ + return svm->vnmi_enabled && (svm->nested.ctl.int_ctl & V_NMI_ENABLE); +} + static inline struct vmcb *get_vnmi_vmcb(struct vcpu_svm *svm) { if (!vnmi) -- 2.25.1