Intel SGX adds a new CPL3-only execution environment referred to as an "enclave". To protect the secrets of an enclave, the CPU's state is loaded with synthetic data when exiting the enclave (the enclave's state is saved/restored via protected memory), and the RIP is set to a defined exit value. This behavior also applies to VMi-Exits from the enclave, e.g. GUEST_RIP may not necessarily reflect the actual RIP that triggered the VMExit. To help a VMM recognize and handle exits from enclaves, SGX adds bits to existing VMCS fields, VM_EXIT_REASON.VMX_EXIT_REASON_FROM_ENCLAVE and GUEST_INTERRUPTIBILITY_INFO.GUEST_INTR_STATE_ENCLAVE_INTR. Define the new architectural bits and add a boolean to struct vcpu_vmx to cache VMX_EXIT_REASON_FROM_ENCLAVE and clear the bit in exit_reason so that checks against exit_reason do not need to account for SGX, e.g. exit_reason == EXIT_REASON_EXCEPTION_NMI continues to work. As for new behavior for VM-Exits from enclaves, KVM is for the most part a passive observer of both bits, e.g. it needs to account for the bits when propagating information to a nested VMM, but otherwise doesn't need to act differently for VMExits from enclaves. The one scenario that is impacted is emulation, which becomes impossible since KVM does not have access to the RIP or instruction stream that triggered the VMExit[2]. This is largely a non-issue as most instructions that might trigger VM-Exit are designed to unconditionally that may VM-Exit but do not #UD, KVM either never sets the exiting control, e.g. PAUSE_EXITING[1], or sets it if and only if the feature is not exposed to the guest in order to inject a #UD, e.g. RDRAND_EXITING. But, because it is still possible for a guest to trigger emulation, e.g. MMIO, inject a #UD if KVM ever attempts emulation after a VM-Exit from an enclave. This is architecturally accurate for instruction VM-Exits, and for MMIO it's the least bad choice, e.g. it's preferable to killing the VM. In practice, only broken or particularly stupid guests should ever encounter this behavior. Add a WARN in skip_emulated_instruction to detect any attempt to modify the guest's RIP during an SGX enclave VM-Exit as all such flows should either be unreachable or must handle exits from enclaves before getting to skip_emulated_instruction. [1] PAUSE_LOOP_EXITING only affects CPL0 and enclaves exist only at CPL3, so we also don't need to worry about that interaction. [2] Impossible for all practical purposes. Not truly impossible since KVM could implement some form of para-virtualization scheme. Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> --- arch/x86/include/asm/vmx.h | 1 + arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx/nested.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 42 ++++++++++++++++++++++++++++++--- arch/x86/kvm/vmx/vmx.h | 3 +++ 5 files changed, 46 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index a39136b0d509..a62ac47d2006 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -364,6 +364,7 @@ enum vmcs_field { #define GUEST_INTR_STATE_MOV_SS 0x00000002 #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +#define GUEST_INTR_STATE_ENCLAVE_INTR 0x00000010 /* GUEST_ACTIVITY_STATE flags */ #define GUEST_ACTIVITY_ACTIVE 0 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d213ec5c3766..501a35bd4cc7 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -27,6 +27,7 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASON_FROM_ENCLAVE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 46af3a5e9209..fef4fb3e1aaa 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3523,6 +3523,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* update exit information fields: */ vmcs12->vm_exit_reason = exit_reason; + if (to_vmx(vcpu)->sgx_enclave_exit) + vmcs12->vm_exit_reason |= VMX_EXIT_REASON_FROM_ENCLAVE; vmcs12->exit_qualification = exit_qualification; vmcs12->vm_exit_intr_info = exit_intr_info; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f48fc990ca6d..abcd2f7a36f5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1460,16 +1460,40 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) static bool vmx_is_emulatable(struct kvm_vcpu *vcpu, void *insn, int insn_len) { + if (unlikely(to_vmx(vcpu)->sgx_enclave_exit)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return false; + } return true; } static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { + u32 instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); unsigned long rip; - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); + /* + * Emulating an enclave instruction is not supported as KVM cannot + * access the enclave's memory or its true RIP, e.g. the GUEST_RIP + * field points at the exit point of the enclave, not the RIP that + * actually triggered the VMExit. But, because most instructions + * that cause VM-Exit will #UD in an enclave, this conundrum is + * handled for us. There are a few exceptions, notably the debug + * instructions INT1ICEBRK and INT3, as they are allowed in debug + * enclaves and generate #DB/#BP as expected, which KVM might + * intercept. But again, the CPU does the dirty work and saves an + * instr length of zero so that VMMs don't shoot themselves in the + * foot. WARN if we try to skip a non-zero length instruction after + * a VMExit from an enclave. + */ + if (likely(instr_len)) { + WARN(to_vmx(vcpu)->sgx_enclave_exit, + "KVM: skipping instruction after SGX enclave VM-Exit"); + + rip = kvm_rip_read(vcpu); + rip += instr_len; + kvm_rip_write(vcpu, rip); + } /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); @@ -5104,6 +5128,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; + if (!vmx_is_emulatable(vcpu, NULL, 0)) + return 1; + /* * A nested guest cannot optimize MMIO vmexits, because we have an * nGPA here instead of the required GPA. @@ -6537,6 +6564,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->idt_vectoring_info = 0; vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); + + /* + * Immediately cache and clear the "exit from SGX enclave" bit so that + * KVM can directly compare exit_reason against the base exit reasons, + * e.g. exit_reason == EXIT_REASON_EXCEPTION_NMI. + */ + vmx->sgx_enclave_exit = + (vmx->exit_reason & VMX_EXIT_REASON_FROM_ENCLAVE); + vmx->exit_reason &= ~VMX_EXIT_REASON_FROM_ENCLAVE; if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) return; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 61128b48c503..6d1b57e0337e 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -247,6 +247,9 @@ struct vcpu_vmx { bool req_immediate_exit; + /* Exit from SGX enclave */ + bool sgx_enclave_exit; + /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; -- 2.22.0