Under nested virtualization, the L1 hypervisor may intercept an exception raised during the execution of L2 before the exception is delivered. When the intercepted exception is #PF, the VM-exit to the L1 hypervisor precedes the modification of CR2. When the intercepted exception is #DB, the VM-exit to the L1 hypervisor precedes the modifications of DR6 and DR7 under VMX, but the VM-exit to the L1 hypervisor follows the modifications of DR6 and DR7 under SVM. At present, CR2 is modified too early under both VMX and SVM. DR6 is modified too early under VMX. DR7 is modified at the appropriate time. Unfortunately, it is possible to exit to userspace with one of these exceptions pending, and userspace may rely on the premature side-effects. It is also possible for userspace to inject one of these exceptions, in which case, userspace will presumably have already processed the side-effects. To address this problem, a new per-VM capability (KVM_CAP_EXCEPTION_PAYLOAD) will be introduced. When this capability is enabled by userspace, the faulting linear address will be included with the information about a pending #PF in L2, and the "new DR6 bits" will be included with the information about a pending #DB in L2. This ancillary exception information is carried in a new "payload" field. Reported-by: Jim Mattson <jmattson@xxxxxxxxxx> Suggested-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx> Reviewed-by: Peter Shier <pshier@xxxxxxxxxx> --- Documentation/virtual/kvm/api.txt | 13 +++++++++++-- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/include/uapi/asm/kvm.h | 6 ++++-- arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++--- tools/arch/x86/include/uapi/asm/kvm.h | 6 ++++-- 5 files changed, 47 insertions(+), 9 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 647f94128a85..2df2cca81cf5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -850,7 +850,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 has_payload; __u32 error_code; } exception; struct { @@ -873,9 +873,11 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; + __u32 reserved[7]; + __u64 exception_payload; }; -Only two fields are defined in the flags field: +The following bits are defined in the flags field: - KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that interrupt.shadow contains a valid state. @@ -883,6 +885,10 @@ Only two fields are defined in the flags field: - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that smi contains a valid state. +- KVM_VCPUEVENT_VALID_PAYLOAD may be set in the flags field to signal that + exception.has_payload and payload contain valid state + (i.e. KVM_CAP_EXCEPTION_PAYLOAD is enabled). + ARM/ARM64: If the guest accesses a device that is being emulated by the host kernel in @@ -961,6 +967,9 @@ shall be written into the VCPU. KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. +KVM_VCPUEVENT_VALID_PAYLOAD can only be set if KVM_CAP_EXCEPTION_PAYLOAD +is enabled. + ARM/ARM64: Set the pending SError exception state for this VCPU. It is not possible to diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09b2e3e2cf1b..026229a593f2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -585,6 +585,8 @@ struct kvm_vcpu_arch { bool has_error_code; u8 nr; u32 error_code; + unsigned long payload; + bool has_payload; u8 nested_apf; } exception; @@ -871,6 +873,7 @@ struct kvm_arch { bool x2apic_broadcast_quirk_disabled; bool guest_can_read_msr_platform_info; + bool exception_payload_enabled; }; struct kvm_vm_stat { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index fd23d5778ea1..e3ea52bdd461 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 has_payload; __u32 error_code; } exception; struct { @@ -322,7 +323,8 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u32 reserved[7]; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edbf00ec56b3..dbc538d66505 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -431,6 +431,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; vcpu->arch.exception.error_code = error_code; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; return; } @@ -455,6 +457,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = true; vcpu->arch.exception.nr = DF_VECTOR; vcpu->arch.exception.error_code = 0; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; } else /* replace previous exception with a new one in a hope that instruction re-execution will regenerate lost @@ -3373,8 +3377,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; - events->exception.pad = 0; events->exception.error_code = vcpu->arch.exception.error_code; + events->exception.has_payload = vcpu->arch.exception.has_payload; + events->exception_payload = vcpu->arch.exception.payload; events->interrupt.injected = vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; @@ -3398,6 +3403,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SHADOW | KVM_VCPUEVENT_VALID_SMM); + if (vcpu->kvm->arch.exception_payload_enabled) + events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -3409,12 +3416,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW - | KVM_VCPUEVENT_VALID_SMM)) + | KVM_VCPUEVENT_VALID_SMM + | KVM_VCPUEVENT_VALID_PAYLOAD)) + return -EINVAL; + + if ((events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) && + !vcpu->kvm->arch.exception_payload_enabled) return -EINVAL; if (events->exception.injected && (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || - is_guest_mode(vcpu))) + (is_guest_mode(vcpu) && + !vcpu->kvm->arch.exception_payload_enabled))) return -EINVAL; /* INITs are latched while in SMM */ @@ -3429,6 +3442,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.exception.nr = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; vcpu->arch.exception.error_code = events->exception.error_code; + if (vcpu->kvm->arch.exception_payload_enabled) { + vcpu->arch.exception.has_payload = events->exception.has_payload; + vcpu->arch.exception.payload = events->exception_payload; + } else { + vcpu->arch.exception.has_payload = 0; + vcpu->arch.exception.payload = 0; + } vcpu->arch.interrupt.injected = events->interrupt.injected; vcpu->arch.interrupt.nr = events->interrupt.nr; @@ -9463,6 +9483,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, vcpu->arch.exception.nr = 0; vcpu->arch.exception.has_error_code = false; vcpu->arch.exception.error_code = 0; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { fault.vector = PF_VECTOR; fault.error_code_valid = true; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..c4e4f03ce436 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 has_payload; __u32 error_code; } exception; struct { @@ -322,7 +323,8 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u32 reserved[7]; + __u64 payload; }; /* for KVM_GET/SET_DEBUGREGS */ -- 2.19.0.605.g01d371f741-goog