This new substate exports all yet user-invisible states related to exceptions, interrupts, and NMIs. Together with appropriate user space changes, this fixes sporadic problems of vmsave/restore, live migration and system reset. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- Documentation/kvm/api.txt | 28 ++++++++++++++ arch/x86/include/asm/kvm.h | 23 +++++++++++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/svm.c | 22 +++++++++++ arch/x86/kvm/vmx.c | 30 +++++++++++++++ arch/x86/kvm/x86.c | 79 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 184 insertions(+), 0 deletions(-) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 6421aee..66a0814 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -929,3 +929,31 @@ Deprecates: KVM_GET/SET_CPUID2 Architectures: x86 Payload: struct kvm_lapic Deprecates: KVM_GET/SET_LAPIC + +6.8 KVM_X86_VCPU_STATE_EVENTS + +Architectures: x86 +Payload: struct kvm_x86_event_state +Deprecates: interrupt_bitmap of struct kvm_sregs (pass it cleared) + +struct kvm_x86_event_state { + struct { + __u8 injected; + __u8 nr; + __u8 pad[2]; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; +}; diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 42286ef..66c0843 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -257,5 +257,28 @@ struct kvm_reinject_control { #define KVM_X86_VCPU_STATE_MSRS 1000 #define KVM_X86_VCPU_STATE_CPUID 1001 #define KVM_X86_VCPU_STATE_LAPIC 1002 +#define KVM_X86_VCPU_STATE_EVENTS 1003 + +struct kvm_x86_event_state { + struct { + __u8 injected; + __u8 nr; + __u8 pad[2]; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; +}; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26a74b7..06e0856 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,6 +523,8 @@ struct kvm_x86_ops { bool has_error_code, u32 error_code); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 34b700f..3de0b37 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2499,6 +2499,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) !(svm->vcpu.arch.hflags & HF_NMI_MASK); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (masked) { + svm->vcpu.arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + } else { + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + } +} + static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2946,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { .queue_exception = svm_queue_exception, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .get_nmi_mask = svm_get_nmi_mask, + .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b923f2a..63e4a50 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2633,6 +2633,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_virtual_nmis()) + return to_vmx(vcpu)->soft_vnmi_blocked; + else + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + GUEST_INTR_STATE_NMI); +} + +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + if (vmx->soft_vnmi_blocked != masked) { + vmx->soft_vnmi_blocked = masked; + vmx->vnmi_blocked_time = 0; + } + } else { + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -3973,6 +4001,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .queue_exception = vmx_queue_exception, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3484787..8dc968d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4851,12 +4851,56 @@ out_free_lapic: kfree(lapic); break; } + case KVM_X86_VCPU_STATE_EVENTS: { + struct kvm_x86_event_state events; + + vcpu_load(vcpu); + + events.exception.injected = vcpu->arch.exception.pending; + events.exception.nr = vcpu->arch.exception.nr; + events.exception.error_code = vcpu->arch.exception.error_code; + + events.interrupt.injected = vcpu->arch.interrupt.pending; + events.interrupt.nr = vcpu->arch.interrupt.nr; + events.interrupt.soft = vcpu->arch.interrupt.soft; + + events.nmi.injected = vcpu->arch.nmi_injected; + events.nmi.pending = vcpu->arch.nmi_pending; + events.nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + + events.sipi_vector = vcpu->arch.sipi_vector; + + vcpu_put(vcpu); + + r = -EFAULT; + if (copy_to_user(argp, &events, + sizeof(struct kvm_x86_event_state))) + break; + r = 0; + break; + } default: r = -EINVAL; } return r; } +static bool exception_has_error_code(int nr) +{ + switch (nr) { + case 8: + case 10: + case 11: + case 12: + case 13: + case 14: + case 17: + return true; + default: + return false; + } +} + int kvm_arch_vcpu_set_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, struct kvm_vcpu_substate *substate) { @@ -4901,6 +4945,40 @@ out_free_lapic: kfree(lapic); break; } + case KVM_X86_VCPU_STATE_EVENTS: { + struct kvm_x86_event_state events; + + r = -EFAULT; + if (copy_from_user(&events, argp, + sizeof(struct kvm_x86_event_state))) + break; + + vcpu_load(vcpu); + + vcpu->arch.exception.pending = events.exception.injected; + vcpu->arch.exception.nr = events.exception.nr; + vcpu->arch.exception.has_error_code = + exception_has_error_code(events.exception.nr); + vcpu->arch.exception.error_code = events.exception.error_code; + + vcpu->arch.interrupt.pending = events.interrupt.injected; + vcpu->arch.interrupt.nr = events.interrupt.nr; + vcpu->arch.interrupt.soft = events.interrupt.soft; + if (vcpu->arch.interrupt.pending && + irqchip_in_kernel(vcpu->kvm)) + kvm_pic_clear_isr_ack(vcpu->kvm); + + vcpu->arch.nmi_injected = events.nmi.injected; + vcpu->arch.nmi_pending = events.nmi.pending; + kvm_x86_ops->set_nmi_mask(vcpu, events.nmi.masked); + + vcpu->arch.sipi_vector = events.sipi_vector; + + vcpu_put(vcpu); + + r = 0; + break; + } default: r = -EINVAL; } @@ -4913,6 +4991,7 @@ bool kvm_arch_check_substate(u32 type) case KVM_X86_VCPU_STATE_MSRS: case KVM_X86_VCPU_STATE_CPUID: case KVM_X86_VCPU_STATE_LAPIC: + case KVM_X86_VCPU_STATE_EVENTS: return true; default: return false; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html