The interrupt shadow created by STI or MOV-SS-like operations is part of the VCPU state and must be preserved across migration. Transfer it in the spare padding field of kvm_vcpu_events.interrupt. As a side effect we now have to make vmx_set_interrupt_shadow robust against both shadow types being set. Give MOV SS a higher priority and skip STI in that case to avoid that VMX throws a fault on next entry. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- Changes in v3 (aka "back to square #1"): - export both MOV SS and STI shadows, do not collapse them Documentation/kvm/api.txt | 11 ++++++++++- arch/x86/include/asm/kvm.h | 7 ++++++- arch/x86/include/asm/kvm_emulate.h | 3 --- arch/x86/kvm/emulate.c | 5 +++-- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 8 ++++---- arch/x86/kvm/x86.c | 12 ++++++++++-- include/linux/kvm.h | 1 + 8 files changed, 35 insertions(+), 14 deletions(-) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index beb444a..9e5de5a 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -656,6 +656,7 @@ struct kvm_clock_data { 4.29 KVM_GET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS +Extended by: KVM_CAP_INTR_SHADOW Architectures: x86 Type: vm ioctl Parameters: struct kvm_vcpu_event (out) @@ -676,7 +677,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 pad; + __u8 shadow; } interrupt; struct { __u8 injected; @@ -688,9 +689,13 @@ struct kvm_vcpu_events { __u32 flags; }; +KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that +interrupt.shadow contains a valid state. Otherwise, this field is undefined. + 4.30 KVM_SET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS +Extended by: KVM_CAP_INTR_SHADOW Architectures: x86 Type: vm ioctl Parameters: struct kvm_vcpu_event (in) @@ -709,6 +714,10 @@ current in-kernel state. The bits are: KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector +If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in +the flags field to signal that interrupt.shadow contains a valid state and +shall be written into the VCPU. + 5. The kvm_run structure diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f..fb61170 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -257,6 +257,11 @@ struct kvm_reinject_control { /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 +#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 + +/* Interrupt shadow states */ +#define KVM_X86_SHADOW_INT_MOV_SS 0x01 +#define KVM_X86_SHADOW_INT_STI 0x02 /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { @@ -271,7 +276,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 pad; + __u8 shadow; } interrupt; struct { __u8 injected; diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54f..2666d7a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -153,9 +153,6 @@ struct decode_cache { struct fetch_cache fetch; }; -#define X86_SHADOW_INT_MOV_SS 1 -#define X86_SHADOW_INT_STI 2 - struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9beda8e..135bc56 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2123,7 +2123,8 @@ special_insn: sel = c->src.val; if (c->modrm_reg == VCPU_SREG_SS) - toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); + toggle_interruptibility(ctxt, + KVM_X86_SHADOW_INT_MOV_SS); if (c->modrm_reg <= 5) { type_bits = (c->modrm_reg == 1) ? 9 : 1; @@ -2374,7 +2375,7 @@ special_insn: if (emulator_bad_iopl(ctxt)) kvm_inject_gp(ctxt->vcpu, 0); else { - toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); + toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); ctxt->eflags |= X86_EFLAGS_IF; c->dst.type = OP_NONE; /* Disable writeback. */ } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2e1e8d6..5f88a73 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -263,7 +263,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) u32 ret = 0; if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) - ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; + ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; return ret & mask; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7c815b..ce5ec41 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -838,9 +838,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) int ret = 0; if (interruptibility & GUEST_INTR_STATE_STI) - ret |= X86_SHADOW_INT_STI; + ret |= KVM_X86_SHADOW_INT_STI; if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= X86_SHADOW_INT_MOV_SS; + ret |= KVM_X86_SHADOW_INT_MOV_SS; return ret & mask; } @@ -852,9 +852,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - if (mask & X86_SHADOW_INT_MOV_SS) + if (mask & KVM_X86_SHADOW_INT_MOV_SS) interruptibility |= GUEST_INTR_STATE_MOV_SS; - if (mask & X86_SHADOW_INT_STI) + else if (mask & KVM_X86_SHADOW_INT_STI) interruptibility |= GUEST_INTR_STATE_STI; if ((interruptibility != interruptibility_old)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d1b5024..9b13e15 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2132,6 +2132,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; events->interrupt.soft = 0; + events->interrupt.shadow = + kvm_x86_ops->get_interrupt_shadow(vcpu, + KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending; @@ -2140,7 +2143,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR); + | KVM_VCPUEVENT_VALID_SIPI_VECTOR + | KVM_VCPUEVENT_VALID_SHADOW); vcpu_put(vcpu); } @@ -2149,7 +2153,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) + | KVM_VCPUEVENT_VALID_SIPI_VECTOR + | KVM_VCPUEVENT_VALID_SHADOW)) return -EINVAL; vcpu_load(vcpu); @@ -2164,6 +2169,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.soft = events->interrupt.soft; if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) kvm_pic_clear_isr_ack(vcpu->kvm); + if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) + kvm_x86_ops->set_interrupt_shadow(vcpu, + events->interrupt.shadow); vcpu->arch.nmi_injected = events->nmi.injected; if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index dfa54be..46fb860 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -501,6 +501,7 @@ struct kvm_ioeventfd { #define KVM_CAP_HYPERV_VAPIC 45 #define KVM_CAP_HYPERV_SPIN 46 #define KVM_CAP_PCI_SEGMENT 47 +#define KVM_CAP_INTR_SHADOW 48 #ifdef KVM_CAP_IRQ_ROUTING -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html