> +/** > + * struct sgx_enclave_add_pages - parameter structure for the > + * %SGX_IOC_ENCLAVE_ADD_PAGE ioctl > + * @src: start address for the page data > + * @offset: starting page offset Is this the offset *within* the page? Might be nice to say that. > + * @length: length of the data (multiple of the page size) > + * @secinfo: address for the SECINFO data > + * @flags: page control flags > + * @count: number of bytes added (multiple of the page size) > + */ > +struct sgx_enclave_add_pages { > + __u64 src; > + __u64 offset; > + __u64 length; > + __u64 secinfo; > + __u64 flags; > + __u64 count; > +}; > + > #endif /* _UAPI_ASM_X86_SGX_H */ > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c > index 9bb4694e57c1..e13e04737683 100644 > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > @@ -194,6 +194,302 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) > return ret; > } > > +static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl, > + unsigned long offset, > + u64 secinfo_flags) > +{ > + struct sgx_encl_page *encl_page; > + unsigned long prot; > + > + encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL); > + if (!encl_page) > + return ERR_PTR(-ENOMEM); > + > + encl_page->desc = encl->base + offset; > + encl_page->encl = encl; Somewhere, we need an explanation of why we have 'sgx_epc_page' and 'sgx_encl_page'. I think they're 1:1 at least after sgx_encl_page_alloc(), so I'm wondering why we need two. > + prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) | > + _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) | > + _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC); > + > + /* > + * TCS pages must always RW set for CPU access while the SECINFO > + * permissions are *always* zero - the CPU ignores the user provided > + * values and silently overwrites them with zero permissions. > + */ > + if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS) > + prot |= PROT_READ | PROT_WRITE; > + > + /* Calculate maximum of the VM flags for the page. */ > + encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0); > + > + return encl_page; > +} > + > +static int sgx_validate_secinfo(struct sgx_secinfo *secinfo) > +{ > + u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK; > + u64 pt = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK; I'd align the ='s up there ^^ > + > + if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS) > + return -EINVAL; > + > + if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R)) > + return -EINVAL; > + > + /* > + * CPU will silently overwrite the permissions as zero, which means > + * that we need to validate it ourselves. > + */ > + if (pt == SGX_SECINFO_TCS && perm) > + return -EINVAL; > + > + if (secinfo->flags & SGX_SECINFO_RESERVED_MASK) > + return -EINVAL; > + > + if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved))) > + return -EINVAL; > + > + return 0; > +} > + > +static int __sgx_encl_add_page(struct sgx_encl *encl, > + struct sgx_encl_page *encl_page, > + struct sgx_epc_page *epc_page, > + struct sgx_secinfo *secinfo, unsigned long src) > +{ > + struct sgx_pageinfo pginfo; > + struct vm_area_struct *vma; > + struct page *src_page; > + int ret; > + > + /* Deny noexec. */ > + vma = find_vma(current->mm, src); > + if (!vma) > + return -EFAULT; > + > + if (!(vma->vm_flags & VM_MAYEXEC)) > + return -EACCES; > + > + ret = get_user_pages(src, 1, 0, &src_page, NULL); > + if (ret < 1) > + return -EFAULT; > + > + pginfo.secs = (unsigned long)sgx_get_epc_addr(encl->secs.epc_page); > + pginfo.addr = SGX_ENCL_PAGE_ADDR(encl_page); > + pginfo.metadata = (unsigned long)secinfo; > + pginfo.contents = (unsigned long)kmap_atomic(src_page); > + > + ret = __eadd(&pginfo, sgx_get_epc_addr(epc_page)); Could you convince me that EADD is not going to fault and make the kmap_atomic() mad? > + kunmap_atomic((void *)pginfo.contents); All the casting is kinda nasty, but I gues you do it to ensure you can use __u64 in the hardware structs. > + put_page(src_page); > + > + return ret ? -EIO : 0; > +} > + > +/* > + * If the caller requires measurement of the page as a proof for the content, > + * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this > + * operation until the entire page is measured." > + */ > +static int __sgx_encl_extend(struct sgx_encl *encl, > + struct sgx_epc_page *epc_page) > +{ > + int ret; > + int i; > + > + for (i = 0; i < 16; i++) { No magic numbers please. #define SGX_EEXTEND_NR_BYTES 16 ?? > + ret = __eextend(sgx_get_epc_addr(encl->secs.epc_page), > + sgx_get_epc_addr(epc_page) + (i * 0x100)); What's the 0x100 for? > + if (ret) { > + if (encls_failed(ret)) > + ENCLS_WARN(ret, "EEXTEND"); > + return -EIO; How frequent should we expect these to be? Can users cause them? You should *proably* call it ENCLS_WARN_ONCE() if it's implemented that way. > + } > + } > + > + return 0; > +} > + > +static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src, > + unsigned long offset, struct sgx_secinfo *secinfo, > + unsigned long flags) > +{ > + struct sgx_encl_page *encl_page; > + struct sgx_epc_page *epc_page; > + int ret; > + > + encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags); > + if (IS_ERR(encl_page)) > + return PTR_ERR(encl_page); > + > + epc_page = __sgx_alloc_epc_page(); > + if (IS_ERR(epc_page)) { > + kfree(encl_page); > + return PTR_ERR(epc_page); > + } Looking at these, I'm forgetting why we need to both allocate an encl_page and an epc_page. Commends might remind me. So would better names. > + mmap_read_lock(current->mm); > + mutex_lock(&encl->lock); > + > + /* > + * Insert prior to EADD in case of OOM. I wouldn't say OOM. Maybe: xa_insert() and EADD can both fail. But xa_insert() is easier to unwind so do it first. > EADD modifies MRENCLAVE, i.e. What is MRENCLAVE? > + * can't be gracefully unwound, while failure on EADD/EXTEND is limited > + * to userspace errors (or kernel/hardware bugs). > + */ > + ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc), > + encl_page, GFP_KERNEL); > + if (ret) > + goto err_out_unlock; > + > + ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo, > + src); > + if (ret) > + goto err_out; > + > + /* > + * Complete the "add" before doing the "extend" so that the "add" > + * isn't in a half-baked state in the extremely unlikely scenario > + * the enclave will be destroyed in response to EEXTEND failure. > + */ > + encl_page->encl = encl; > + encl_page->epc_page = epc_page; > + encl->secs_child_cnt++; > + > + if (flags & SGX_PAGE_MEASURE) { > + ret = __sgx_encl_extend(encl, epc_page); > + if (ret) > + goto err_out; > + } Why would we never *not* measure an added page? > + mutex_unlock(&encl->lock); > + mmap_read_unlock(current->mm); > + return ret; > + > +err_out: > + xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc)); > + > +err_out_unlock: > + mutex_unlock(&encl->lock); > + mmap_read_unlock(current->mm); > + > + sgx_free_epc_page(epc_page); > + kfree(encl_page); > + > + return ret; > +} > + > +/** > + * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES > + * @encl: an enclave pointer > + * @arg: a user pointer to a struct sgx_enclave_add_pages instance > + * > + * Add one or more pages to an uninitialized enclave, and optionally extend the > + * measurement with the contents of the page. The SECINFO and measurement mask > + * are applied to all pages. > + * > + * A SECINFO for a TCS is required to always contain zero permissions because > + * CPU silently zeros them. Allowing anything else would cause a mismatch in > + * the measurement. > + * > + * mmap()'s protection bits are capped by the page permissions. For each page > + * address, the maximum protection bits are computed with the following > + * heuristics: > + * > + * 1. A regular page: PROT_R, PROT_W and PROT_X match the SECINFO permissions. > + * 2. A TCS page: PROT_R | PROT_W. > + * > + * mmap() is not allowed to surpass the minimum of the maximum protection bits > + * within the given address range. > + * > + * The function deinitializes kernel data structures for enclave and returns > + * -EIO in any of the following conditions: > + * > + * - Enclave Page Cache (EPC), the physical memory holding enclaves, has > + * been invalidated. This will cause EADD and EEXTEND to fail. > + * - If the source address is corrupted somehow when executing EADD. > + * > + * Return: > + * length of the data processed on success, > + * -EACCES if an executable source page is located in a noexec partition, > + * -ENOMEM if the system is out of EPC pages, > + * -EINTR if the call was interrupted before any data was processed, > + * -EIO if the enclave was lost > + * -errno otherwise > + */ > +static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg) > +{ > + struct sgx_enclave_add_pages addp; > + struct sgx_secinfo secinfo; > + unsigned long c; > + int ret; > + > + if ((atomic_read(&encl->flags) & SGX_ENCL_INITIALIZED) || > + !(atomic_read(&encl->flags) & SGX_ENCL_CREATED)) > + return -EINVAL; There should to be a nice state machine documented somewhere. Is ther? > + if (copy_from_user(&addp, arg, sizeof(addp))) > + return -EFAULT; > + > + if (!IS_ALIGNED(addp.offset, PAGE_SIZE) || > + !IS_ALIGNED(addp.src, PAGE_SIZE)) > + return -EINVAL; > + > + if (!(access_ok(addp.src, PAGE_SIZE))) > + return -EFAULT; This worries me. You're doing an access_ok() check on addp.src because you evidently don't trust it. But, below, it looks to be accessed directly with an offset, bound by addp.length, which I think can be >PAGE_SIZE. I'd feel a lot better if addp.src's value was being passed around as a __user pointer. > + if (addp.length & (PAGE_SIZE - 1)) > + return -EINVAL; > + > + if (addp.offset + addp.length - PAGE_SIZE >= encl->size) > + return -EINVAL; > + > + if (copy_from_user(&secinfo, (void __user *)addp.secinfo, > + sizeof(secinfo))) > + return -EFAULT; > + > + if (sgx_validate_secinfo(&secinfo)) > + return -EINVAL; > + > + for (c = 0 ; c < addp.length; c += PAGE_SIZE) { > + if (signal_pending(current)) { > + if (!c) > + ret = -ERESTARTSYS; > + > + break; > + } > + > + if (c == SGX_MAX_ADD_PAGES_LENGTH) > + break; > + > + if (need_resched()) > + cond_resched(); > + > + ret = sgx_encl_add_page(encl, addp.src + c, addp.offset + c, > + &secinfo, addp.flags); Yeah... Don't we need to do another access_ok() check here, if we needed one above since we are moving away from addrp.src? > + if (ret) > + break; > + } > + > + addp.count = c; > + > + if (copy_to_user(arg, &addp, sizeof(addp))) > + return -EFAULT; > + > + /* > + * If the enlave was lost, deinitialize the internal data structures > + * for the enclave. > + */ > + if (ret == -EIO) { > + mutex_lock(&encl->lock); > + sgx_encl_destroy(encl); > + mutex_unlock(&encl->lock); > + } > + > + return ret; > +} > + > long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) > { > struct sgx_encl *encl = filep->private_data; > @@ -212,6 +508,9 @@ long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) > case SGX_IOC_ENCLAVE_CREATE: > ret = sgx_ioc_enclave_create(encl, (void __user *)arg); > break; > + case SGX_IOC_ENCLAVE_ADD_PAGES: > + ret = sgx_ioc_enclave_add_pages(encl, (void __user *)arg); > + break; > default: > ret = -ENOIOCTLCMD; > break; > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h > index fce756c3434b..8d126070db1e 100644 > --- a/arch/x86/kernel/cpu/sgx/sgx.h > +++ b/arch/x86/kernel/cpu/sgx/sgx.h > @@ -34,6 +34,7 @@ struct sgx_epc_section { > > #define SGX_EPC_SECTION_MASK GENMASK(7, 0) > #define SGX_MAX_EPC_SECTIONS (SGX_EPC_SECTION_MASK + 1) > +#define SGX_MAX_ADD_PAGES_LENGTH 0x100000 > > extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; > >