Add a fallback mechanism to the in-kernel instruction emulator that allows userspace the opportunity to process an instruction the emulator was unable to. When the in-kernel instruction emulator fails to process an instruction it will either inject a #UD into the guest or exit to userspace with exit reason KVM_INTERNAL_ERROR. This is because it does not know how to proceed in an appropriate manner. This feature lets userspace get involved to see if it can figure out a better path forward. Signed-off-by: Aaron Lewis <aaronlewis@xxxxxxxxxx> Change-Id: If9876bc73d26f6c3ff9a8bce177c2fc6f160e629 --- Documentation/virt/kvm/api.rst | 16 ++++++++++++++++ arch/x86/include/asm/kvm_host.h | 6 ++++++ arch/x86/kvm/x86.c | 33 +++++++++++++++++++++++++++++---- include/uapi/linux/kvm.h | 20 ++++++++++++++++++++ tools/include/uapi/linux/kvm.h | 20 ++++++++++++++++++++ 5 files changed, 91 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 307f2fcf1b02..f8278e893fbe 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6233,6 +6233,22 @@ KVM_RUN_BUS_LOCK flag is used to distinguish between them. This capability can be used to check / enable 2nd DAWR feature provided by POWER10 processor. +7.24 KVM_CAP_EXIT_ON_EMULATION_FAILURE +-------------------------------------- + +:Architectures: x86 +:Parameters: args[0] whether the feature should be enabled or not + +With this capability enabled, the in-kernel instruction emulator packs the exit +struct of KVM_INTERNAL_ERROR with the instruction length and instruction bytes +when an error occurs while emulating an instruction. This allows userspace to +then take a look at the instruction and see if it is able to handle it more +gracefully than the in-kernel emulator. + +When this capability is enabled use the emulation_failure struct instead of the +internal struct for the exit struct. They have the same layout, but the +emulation_failure struct matches the content better. + 8. Other capabilities. ====================== diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3768819693e5..07235d08e976 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1049,6 +1049,12 @@ struct kvm_arch { bool exception_payload_enabled; bool bus_lock_detection_enabled; + /* + * If exit_on_emulation_error is set, and the in-kernel instruction + * emulator fails to emulate an instruction, allow userspace + * the opportunity to look at it. + */ + bool exit_on_emulation_error; /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eca63625aee4..f9a207f815fb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3771,6 +3771,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_X86_USER_SPACE_MSR: case KVM_CAP_X86_MSR_FILTER: case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: + case KVM_CAP_EXIT_ON_EMULATION_FAILURE: r = 1; break; #ifdef CONFIG_KVM_XEN @@ -5357,6 +5358,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.bus_lock_detection_enabled = true; r = 0; break; + case KVM_CAP_EXIT_ON_EMULATION_FAILURE: + kvm->arch.exit_on_emulation_error = cap->args[0]; + r = 0; + break; default: r = -EINVAL; break; @@ -7119,8 +7124,29 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + u64 insn_size = ctxt->fetch.end - ctxt->fetch.data; + struct kvm *kvm = vcpu->kvm; + + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->emulation_failure.ndata = 0; + if (kvm->arch.exit_on_emulation_error && insn_size > 0) { + vcpu->run->emulation_failure.ndata = 3; + vcpu->run->emulation_failure.flags = + KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES; + vcpu->run->emulation_failure.insn_size = insn_size; + memcpy(vcpu->run->emulation_failure.insn_bytes, + ctxt->fetch.data, sizeof(ctxt->fetch.data)); + } +} + static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { + struct kvm *kvm = vcpu->kvm; + ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); @@ -7129,10 +7155,9 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return 1; } - if (emulation_type & EMULTYPE_SKIP) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + if (kvm->arch.exit_on_emulation_error || + (emulation_type & EMULTYPE_SKIP)) { + prepare_emulation_failure_exit(vcpu); return 0; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f6afee209620..7c77099235b2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -279,6 +279,17 @@ struct kvm_xen_exit { /* Encounter unexpected vm-exit reason */ #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 +/* + * When using the suberror KVM_INTERNAL_ERROR_EMULATION, these flags are used + * to describe what is contained in the exit struct. The flags are used to + * describe it's contents, and the contents should be in ascending numerical + * order of the flag values. For example, if the flag + * KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES is set, the instruction + * length and instruction bytes would be expected to show up first because this + * flag has the lowest numerical value (1) of all the other flags. + */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -382,6 +393,14 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* KVM_EXIT_INTERNAL_ERROR, too (not 2) */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + __u8 insn_size; + __u8 insn_bytes[15]; + } emulation_failure; /* KVM_EXIT_OSI */ struct { __u64 gprs[32]; @@ -1078,6 +1097,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 195 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f6afee209620..7c77099235b2 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -279,6 +279,17 @@ struct kvm_xen_exit { /* Encounter unexpected vm-exit reason */ #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 +/* + * When using the suberror KVM_INTERNAL_ERROR_EMULATION, these flags are used + * to describe what is contained in the exit struct. The flags are used to + * describe it's contents, and the contents should be in ascending numerical + * order of the flag values. For example, if the flag + * KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES is set, the instruction + * length and instruction bytes would be expected to show up first because this + * flag has the lowest numerical value (1) of all the other flags. + */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -382,6 +393,14 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* KVM_EXIT_INTERNAL_ERROR, too (not 2) */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + __u8 insn_size; + __u8 insn_bytes[15]; + } emulation_failure; /* KVM_EXIT_OSI */ struct { __u64 gprs[32]; @@ -1078,6 +1097,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 195 #ifdef KVM_CAP_IRQ_ROUTING -- 2.31.1.368.gbe11c130af-goog