Add new kvm_sgx structure to keep per-guest SGX staff, including guest's CPUID and EPC slot info. The initialization function checks consistency of SGX cpuid info from Qemu (return error in case Qemu did something wrong) and creates EPC slot (only once when firstly called). If anything goes wrong by returning error to Qemu, it is able to stop creating vcpu or just kill guest. EPC slot is implemented as private memory slot by KVM. It is the easiest way as we don't expose a 'file' to userspace to let Qemu issue mmap to get userspace virtual address for EPC slot, and we don't want to use SGX driver's mmap for this purpose either. EPC page is actually allocated via vma->vm_ops->fault associated to EPC slot's vma, to comply with current hva_to_pfn implementation, so that hva_to_pfn works for EPC as well. A new kvm_epc structure is also added to represent EPC slot info for guest, and a new kvm_epc_page structure is added to track each guest's EPC page status. It is used to keep all physical EPC pages allocated to guest, which is needed when KVM wants to free all EPC pages allocated to guest when guest is destroyed. Btw SGX driver doesn't have sgx_epc_pfn_to_page so KVM needs to do the bookkeeping. What's more, we can expend it in the future, ex, to support EPC oversubscription between KVM guests (where we will have more status of guest's EPC page). Signed-off-by: Kai Huang <kai.huang@xxxxxxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 4 +- arch/x86/kvm/sgx.c | 300 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/sgx.h | 71 ++++++++++ 3 files changed, 374 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74ef58c8ff53..1d622334fc0e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -40,7 +40,7 @@ #define KVM_MAX_VCPU_ID 1023 #define KVM_USER_MEM_SLOTS 509 /* memory slots that are not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 3 +#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) #define KVM_PIO_PAGE_OFFSET 1 @@ -817,6 +817,8 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + + void *priv; /* x86 vendor specific data */ }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/sgx.c b/arch/x86/kvm/sgx.c index 4b65b1bb1f30..a7040e6380a5 100644 --- a/arch/x86/kvm/sgx.c +++ b/arch/x86/kvm/sgx.c @@ -104,6 +104,306 @@ static inline u64 sgx_epc_page_to_pfn(struct sgx_epc_page *epg) return (u64)(epg->pa >> PAGE_SHIFT); } +static int __sgx_eremove(struct sgx_epc_page *epg) +{ + void *addr; + int r; + + addr = sgx_kmap_epc_page(epg); + r = __eremove(addr); + sgx_kunmap_epc_page(addr); + if (unlikely(r)) { + sgx_err("__eremove error: EPC pfn 0x%lx, r %d\n", + (unsigned long)sgx_epc_page_to_pfn(epg), + r); + } + + return r; +} + +/* By reaching here the mmap_sem should be already hold */ +static int kvm_epc_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct kvm_sgx *sgx = (struct kvm_sgx *)vma->vm_private_data; + struct kvm *kvm; + struct sgx_epc_page *epg; + struct kvm_epc_page *gepg; + u64 gfn, pfn; + + BUG_ON(!sgx); + kvm = sgx->kvm; + + gfn = to_epc(sgx)->base_gfn + (((unsigned long)vmf->address - + vma->vm_start) >> PAGE_SHIFT); + gepg = gfn_to_guest_epc_page(kvm, gfn); + + /* + * SGX driver doesn't support recycling EPC pages back from KVM + * guests yet, and it doesn't support out-of-EPC killer either, + * therefore if we don't use SGX_ALLOC_ATOMIC here, this function + * may never return in case SGX driver cannot recycle enough EPC + * pages from host SGX applications. + */ + epg = sgx_alloc_epc_page(SGX_ALLOC_ATOMIC); + if (!epg) { + /* Unable to allocate EPC. Kill the guest */ + sgx_err("kvm 0x%p, gfn 0x%lx: out of EPC when trying to " + "map EPC to guest.\n", kvm, (unsigned long)gfn); + goto error; + } + + pfn = sgx_epc_page_to_pfn(epg); + if (vm_insert_pfn(vma, (unsigned long)vmf->address, + (unsigned long)pfn)) { + sgx_err("kvm 0x%p, gfn 0x%lx: failed to install host mapping " + "on: hva 0x%lx, pfn 0x%lx\n", kvm, + (unsigned long)gfn, + (unsigned long)vmf->address, + (unsigned long)pfn); + sgx_free_epc_page(epg); + goto error; + } + + /* Book keeping physical EPC page allocated/mapped to particular GFN */ + gepg->epg = epg; + + return VM_FAULT_NOPAGE; /* EPC has not 'struct page' associated */ +error: + return VM_FAULT_SIGBUS; +} + +static void kvm_epc_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct kvm_epc_ops = { + .fault = kvm_epc_fault, + /* close to prevent vma to be merged. */ + .close = kvm_epc_close, +}; + +static void kvm_init_epc_table(struct kvm_epc_page *epc_table, u64 npages) +{ + u64 i; + + for (i = 0; i < npages; i++) { + struct kvm_epc_page *gepg = epc_table + i; + + gepg->epg = NULL; + } +} + +static void kvm_destroy_epc_table(struct kvm_epc_page *epc_table, + u64 npages) +{ + u64 i; + int r; + + /* + * + */ + /* + * We need to call EREMOVE explicitly but not sgx_free_epc_page here + * for the first round as sgx_free_page (sgx_free_epc_page calls it) + * provided by SGX driver always does EREMOVE and adds EPC page back + * to sgx_free_list if there's no error. We don't keep SECS page to + * a temporary list but rely on sgx_free_epc_page to free all EPC pages + * in second round so just use EREMOVE at first round. + */ + for (i = 0; i < npages; i++) { + struct kvm_epc_page *gepg = epc_table + i; + struct sgx_epc_page *epg; + + if (!gepg->epg) + continue; + + epg = gepg->epg; + r = __sgx_eremove(epg); + if (r == SGX_CHILD_PRESENT) { + sgx_debug("EREMOVE SECS (0x%lx) prior to regular EPC\n", + (unsigned long)sgx_epc_page_to_pfn(epg)); + } + } + + /* + * EREMOVE on invalid EPC (which has been removed from enclave) will + * simply return success. + */ + for (i = 0; i < npages; i++) { + struct kvm_epc_page *gepg = epc_table + i; + struct sgx_epc_page *epg; + + if (!gepg->epg) + continue; + + epg = gepg->epg; + sgx_free_epc_page(epg); + } +} + +static int kvm_init_epc(struct kvm *kvm, u64 epc_base_pfn, u64 epc_npages) +{ + struct kvm_sgx *sgx = to_sgx(kvm); + struct vm_area_struct *vma; + struct kvm_memory_slot *slot; + struct kvm_epc_page *epc_table; + int r; + + r = x86_set_memory_region(kvm, SGX_EPC_MEMSLOT, + epc_base_pfn << PAGE_SHIFT, epc_npages << PAGE_SHIFT); + if (r) { + sgx_debug("x86_set_memory_region failed: %d\n", r); + return r; + } + + slot = id_to_memslot(kvm_memslots(kvm), SGX_EPC_MEMSLOT); + BUG_ON(!slot); + + epc_table = alloc_pages_exact(epc_npages * sizeof (struct kvm_epc_page), + GFP_KERNEL); + if (!epc_table) { + sgx_debug("unable to alloc guest EPC table.\n"); + x86_set_memory_region(kvm, SGX_EPC_MEMSLOT, 0, 0); + return -ENOMEM; + } + + kvm_init_epc_table(epc_table, epc_npages); + + sgx->epc.epc_table = epc_table; + sgx->epc.base_gfn = slot->base_gfn; + sgx->epc.npages = slot->npages; + + vma = find_vma_intersection(kvm->mm, slot->userspace_addr, + slot->userspace_addr + 1); + BUG_ON(!vma); + + /* EPC has no 'struct page' associated */ + vma->vm_flags |= VM_PFNMAP; + vma->vm_flags &= ~(VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_MAYSHARE); + vma->vm_ops = &kvm_epc_ops; + vma->vm_private_data = (void *)sgx; + + return 0; +} + +static void kvm_destroy_epc(struct kvm *kvm) +{ + struct kvm_sgx *sgx = to_sgx(kvm); + struct kvm_epc_page *epc_table = to_epc(sgx)->epc_table; + u64 npages = to_epc(sgx)->npages; + + /* + * See kvm_arch_destroy_vm, which is also the reason that we don't + * keep slot in kvm_epc structure, as slot may already have been + * destroyed during abnormal exit. + */ + if (current->mm == kvm->mm) + x86_set_memory_region(kvm, SGX_EPC_MEMSLOT, 0, 0); + + kvm_destroy_epc_table(epc_table, npages); + + free_pages_exact(epc_table, npages * sizeof (struct kvm_epc_page)); +} + +static int kvm_populate_epc(struct kvm *kvm, u64 epc_base_pfn, + u64 epc_npages) +{ + int i; + + for (i = 0; i < epc_npages; i++) { + gfn_t gfn = epc_base_pfn + i; + /* This will trigger vma->vm_ops->fault to populate EPC */ + kvm_pfn_t pfn = gfn_to_pfn(kvm, gfn); + if (is_error_pfn(pfn)) + return -EFAULT; /* Cannot use ENOMEM */ + } + return 0; +} + +/* + * Initialize SGX for particular guest. This function may be called several + * times from caller. If guest SGX has not been initialized (this function is + * firstly called), we create kvm_sgx structure and initialize it. If guest SGX + * has already been initialized, we then check whether SGX cpuid from Qemu is + * consistent with existing one. If Qemu did something wrong by returning error + * here we can allow Qemu to stop creating vcpu, or just kill guest. We also + * populate all EPC for guest if oversubscription is not supported. + */ +int kvm_init_sgx(struct kvm *kvm, struct sgx_cpuinfo *sgxinfo) +{ + struct kvm_sgx *sgx = to_sgx(kvm); + u64 epc_base_pfn, epc_npages; + int r; + + if (!sgxinfo) + return -EINVAL; + + if (sgx) { + /* + * Already inited? We then check whether EPC base and size + * equal to saved value. + */ + + if (memcmp(&(sgx->sgxinfo), sgxinfo, + sizeof(struct sgx_cpuinfo))) { + sgx_debug("SGX CPUID inconsistency from Qemu\n"); + return -EINVAL; + } + else + return 0; + } + + epc_base_pfn = sgxinfo->epc_base >> PAGE_SHIFT; + epc_npages = sgxinfo->epc_size >> PAGE_SHIFT; + + sgx = kzalloc(sizeof(struct kvm_sgx), GFP_KERNEL); + if (!sgx) { + sgx_debug("out of memory\n"); + return -ENOMEM; + } + sgx->kvm = kvm; + memcpy(&(sgx->sgxinfo), sgxinfo, sizeof(struct sgx_cpuinfo)); + /* Make to_sgx(kvm) work */ + kvm->arch.priv = sgx; + + /* Init EPC for guest */ + r = kvm_init_epc(kvm, epc_base_pfn, epc_npages); + if (r) { + sgx_debug("kvm_create_epc_slot failed.\n"); + kfree(sgx); + kvm->arch.priv = NULL; + return r; + } + + /* Populate all EPC pages for guest when it is created. */ + r = kvm_populate_epc(kvm, epc_base_pfn, epc_npages); + if (r) { + sgx_debug("kvm_populate_epc failed.\n"); + /* EPC slot will be destroyed when guest is destoryed */ + kvm_destroy_epc(kvm); + kfree(sgx); + kvm->arch.priv = NULL; + return r; + } + + return 0; +} + +void kvm_destroy_sgx(struct kvm *kvm) +{ + struct kvm_sgx *sgx = to_sgx(kvm); + + if (sgx) { + kvm_destroy_epc(kvm); + kfree(sgx); + } + + kvm->arch.priv = NULL; +} + + + static void put_sgx_driver_symbols(void); static int get_sgx_driver_symbols(void) diff --git a/arch/x86/kvm/sgx.h b/arch/x86/kvm/sgx.h index ff2766eeae33..8a8f1235c19c 100644 --- a/arch/x86/kvm/sgx.h +++ b/arch/x86/kvm/sgx.h @@ -27,8 +27,79 @@ #include <linux/bitops.h> #include <linux/kvm_host.h> #include <asm/sgx.h> +#include <uapi/asm/sgx.h> /* ENCLS error code */ int sgx_init(void); void sgx_destroy(void); +struct kvm_epc_page { + /* valid if physical EPC page is mapped to guest EPC gfn */ + struct sgx_epc_page *epg; +}; + +struct kvm_epc { + u64 base_gfn; + u64 npages; + struct kvm_epc_page *epc_table; +}; + +/* + * SGX capability from SGX CPUID. + */ +struct sgx_cpuinfo { +#define SGX_CAP_SGX1 (1UL << 0) +#define SGX_CAP_SGX2 (1UL << 1) + u32 cap; + u32 miscselect; + u32 max_enclave_size64; + u32 max_enclave_size32; + u32 secs_attr_bitmask[4]; + u64 epc_base; + u64 epc_size; +}; + +/* + * SGX per-VM structure + */ +struct kvm_sgx { + struct kvm *kvm; + struct sgx_cpuinfo sgxinfo; + struct kvm_epc epc; +}; + +#define to_sgx(_kvm) ((struct kvm_sgx *)(kvm->arch.priv)) +#define to_epc(_sgx) ((struct kvm_epc *)(&((_sgx)->epc))) + +static inline bool is_valid_epc_gfn(struct kvm *kvm, u64 gfn) +{ + struct kvm_sgx *sgx = to_sgx(kvm); + struct kvm_epc *epc = to_epc(sgx); + + return ((gfn >= epc->base_gfn) && (gfn < epc->base_gfn + epc->npages)); +} + +static inline struct kvm_epc_page *gfn_to_guest_epc_page(struct kvm *kvm, u64 gfn) +{ + struct kvm_sgx *sgx = to_sgx(kvm); + struct kvm_epc *epc = to_epc(sgx); + + BUG_ON(!is_valid_epc_gfn(kvm, gfn)); + + return epc->epc_table + (gfn - epc->base_gfn); +} + +static inline u64 guest_epc_page_to_gfn(struct kvm *kvm, struct kvm_epc_page *gepg) +{ + struct kvm_sgx *sgx = to_sgx(kvm); + struct kvm_epc *epc = to_epc(sgx); + + return epc->base_gfn + (gepg - epc->epc_table); +} + +/* EPC slot is created by KVM as private slot. */ +#define SGX_EPC_MEMSLOT (KVM_USER_MEM_SLOTS + 3) + +int kvm_init_sgx(struct kvm *kvm, struct sgx_cpuinfo *sgxinfo); +void kvm_destroy_sgx(struct kvm *kvm); + #endif -- 2.11.0