This plugs an NMI-related hole in the VCPU synchronization between kernel and user space. So far, neither pending NMIs nor the inhibit NMI mask was properly read/set which was able to cause problems on vmsave/restore, live migration and system reset. Fix it by making use of the new VCPU substate interface. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- Documentation/kvm/api.txt | 12 ++++++++++++ arch/x86/include/asm/kvm.h | 7 +++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 22 ++++++++++++++++++++++ arch/x86/kvm/vmx.c | 30 ++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 26 ++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 0 deletions(-) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index bee5bbd..e483edb 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -848,3 +848,15 @@ Deprecates: KVM_GET/SET_CPUID2 Architectures: x86 Payload: struct kvm_lapic Deprecates: KVM_GET/SET_LAPIC + +6.8 KVM_X86_VCPU_STATE_NMI + +Architectures: x86 +Payload: struct kvm_nmi_state +Deprecates: - + +struct kvm_nmi_state { + __u8 pending; + __u8 masked; + __u8 pad1[6]; +}; diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 326615a..6ad4448 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -256,5 +256,12 @@ struct kvm_reinject_control { #define KVM_X86_VCPU_STATE_MSRS 1000 #define KVM_X86_VCPU_STATE_CPUID 1001 #define KVM_X86_VCPU_STATE_LAPIC 1002 +#define KVM_X86_VCPU_STATE_NMI 1003 + +struct kvm_nmi_state { + __u8 pending; + __u8 masked; + __u8 pad1[6]; +}; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 179a919..b6b2db4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -513,6 +513,8 @@ struct kvm_x86_ops { unsigned char *hypercall_addr); void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 170b2d9..a16ee6e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2498,6 +2498,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) !(svm->vcpu.arch.hflags & HF_NMI_MASK); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (masked) { + svm->vcpu.arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + } else { + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + } +} + static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2945,6 +2965,8 @@ static struct kvm_x86_ops svm_x86_ops = { .queue_exception = svm_queue_exception, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .get_nmi_mask = svm_get_nmi_mask, + .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 364263a..6e032e4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2655,6 +2655,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_virtual_nmis()) + return to_vmx(vcpu)->soft_vnmi_blocked; + else + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + GUEST_INTR_STATE_NMI); +} + +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + if (vmx->soft_vnmi_blocked != masked) { + vmx->soft_vnmi_blocked = masked; + vmx->vnmi_blocked_time = 0; + } + } else { + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -4006,6 +4034,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .queue_exception = vmx_queue_exception, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 46fad88..e7ce505 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4686,6 +4686,19 @@ out_free_lapic: kfree(lapic); break; } + case KVM_X86_VCPU_STATE_NMI: { + struct kvm_nmi_state nmi; + + vcpu_load(vcpu); + nmi.pending = vcpu->arch.nmi_pending; + nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + vcpu_put(vcpu); + r = -EFAULT; + if (copy_to_user(argp, &nmi, sizeof(struct kvm_nmi_state))) + break; + r = 0; + break; + } default: r = -EINVAL; } @@ -4733,6 +4746,19 @@ out_free_lapic: kfree(lapic); break; } + case KVM_X86_VCPU_STATE_NMI: { + struct kvm_nmi_state nmi; + + r = -EFAULT; + if (copy_from_user(&nmi, argp, sizeof(struct kvm_nmi_state))) + break; + vcpu_load(vcpu); + vcpu->arch.nmi_pending = nmi.pending; + kvm_x86_ops->set_nmi_mask(vcpu, nmi.masked); + vcpu_put(vcpu); + r = 0; + break; + } default: r = -EINVAL; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html