On Sun, 2022-04-24 at 02:27 +0000, Zhang, Cathy wrote: > Hi Jarkko, > > > -----Original Message----- > > From: Jarkko Sakkinen <jarkko@xxxxxxxxxx> > > Sent: Friday, April 22, 2022 12:02 AM > > To: Zhang, Cathy <cathy.zhang@xxxxxxxxx> > > Cc: linux-sgx@xxxxxxxxxxxxxxx; x86@xxxxxxxxxx; Chatre, Reinette > > <reinette.chatre@xxxxxxxxx>; Hansen, Dave <dave.hansen@xxxxxxxxx>; Raj, > > Ashok <ashok.raj@xxxxxxxxx>; Peng, Chao P <chao.p.peng@xxxxxxxxx>; > > Zhong, Yang <yang.zhong@xxxxxxxxx> > > Subject: Re: [PATCH v4 1/9] x86/sgx: Introduce mechanism to prevent new > > initializations of EPC pages > > > > On Thu, Apr 21, 2022 at 07:03:18PM +0800, Cathy Zhang wrote: > > > == Background == > > > > > > EUPDATESVN is a new SGX instruction which allows enclave attestation > > > to include information about updated microcode without a reboot. > > > > > > The SGX hardware maintains metadata for each enclave page to help > > > enforce its security guarantees. This includes things like a record of > > > the enclave to which the page belongs and the type of the page: > > > SGX metadata like "VA" or "SECS" pages, or regular enclave pages like > > > those that store user data. > > > > > > Before an EUPDATESVN operation can be successful, all SGX memory (aka. > > > EPC) must be marked as "unused" in the SGX hardware metadata (aka, > > > EPCM). The SGX microcode now maintains a reference count of pages > > > which are unused to aid in determining when all pages reach the > > > "unused" state. > > > > > > Both bare-metal and KVM guest EPC must be made unused. To increase the > > > chance of a successful EUPDATESVN, the kernel prevents existing > > > enclaves from creating new, valid pages and prevents new enclave > > > creation (creating an enclave involves initializing a "SECS" page). > > > > > > The entire EUPDATESVN process is very slow since it potentially > > > affects gigabytes of enclave memory. It can potentially take seconds > > > or minutes to complete. Userspace may encounter -EBUSY errors during > > > the update and is expected to retry. > > > > > > == Patch contents == > > > > > > Introduce mechanism to prevent new initializations of EPC pages. > > > > > > Use a flag to indicate when SGX EPC pages are "locked", which means > > > it's not allowed to allocate new EPC page for use. Check it in all > > > paths that can initialize an EPC page. Use SRCU to ensure that the > > > flag is visible across the system before proceeding with an update. > > > > > > Add checks to all sites that call SGX instructions that can transition > > > pages from unused to initialized to ensure that the SRCU lock is held. > > > > > > Signed-off-by: Cathy Zhang <cathy.zhang@xxxxxxxxx> > > > > > > --- > > > Changes since v3: > > > - Rename lable "out" as "err" in sgx_ioc_enclave_create, update error > > > branch accordingly. (Jarrko Sakkinen) > > > > > > Changes since v2: > > > - Move out the SGX2 related change to remove the dependency. > > > (Jarkko Sakkinen, Reinette Chatre). > > > --- > > > arch/x86/kernel/cpu/sgx/encls.h | 8 ++++++ > > > arch/x86/kernel/cpu/sgx/sgx.h | 3 +++ > > > arch/x86/kernel/cpu/sgx/encl.c | 19 +++++++++++++++ > > > arch/x86/kernel/cpu/sgx/ioctl.c | 43 +++++++++++++++++++++++++++------ > > > arch/x86/kernel/cpu/sgx/main.c | 37 ++++++++++++++++++++++++++++ > > > arch/x86/kernel/cpu/sgx/virt.c | 20 +++++++++++++++ > > > 6 files changed, 123 insertions(+), 7 deletions(-) > > > > > > diff --git a/arch/x86/kernel/cpu/sgx/encls.h > > > b/arch/x86/kernel/cpu/sgx/encls.h index fa04a73daf9c..60321c5f5718 > > > 100644 > > > --- a/arch/x86/kernel/cpu/sgx/encls.h > > > +++ b/arch/x86/kernel/cpu/sgx/encls.h > > > @@ -138,6 +138,8 @@ static inline bool encls_failed(int ret) > > > > > > static inline int __ecreate(struct sgx_pageinfo *pginfo, void *secs) > > > { > > > + lockdep_assert_held(&sgx_lock_epc_srcu); > > > + > > > return __encls_2(ECREATE, pginfo, secs); } > > > > > > @@ -148,6 +150,8 @@ static inline int __eextend(void *secs, void > > > *addr) > > > > > > static inline int __eadd(struct sgx_pageinfo *pginfo, void *addr) { > > > + lockdep_assert_held(&sgx_lock_epc_srcu); > > > + > > > return __encls_2(EADD, pginfo, addr); } > > > > > > @@ -179,6 +183,8 @@ static inline int __etrack(void *addr) static > > > inline int __eldu(struct sgx_pageinfo *pginfo, void *addr, > > > void *va) > > > { > > > + lockdep_assert_held(&sgx_lock_epc_srcu); > > > + > > > return __encls_ret_3(ELDU, pginfo, addr, va); } > > > > > > @@ -191,6 +197,8 @@ static inline int __epa(void *addr) { > > > unsigned long rbx = SGX_PAGE_TYPE_VA; > > > > > > + lockdep_assert_held(&sgx_lock_epc_srcu); > > > + > > > return __encls_2(EPA, rbx, addr); > > > } > > > > > > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h > > > b/arch/x86/kernel/cpu/sgx/sgx.h index b30cee4de903..d7a1490d90bb > > > 100644 > > > --- a/arch/x86/kernel/cpu/sgx/sgx.h > > > +++ b/arch/x86/kernel/cpu/sgx/sgx.h > > > @@ -103,4 +103,7 @@ static inline int __init sgx_vepc_init(void) > > > > > > void sgx_update_lepubkeyhash(u64 *lepubkeyhash); > > > > > > +extern struct srcu_struct sgx_lock_epc_srcu; bool > > > +sgx_epc_is_locked(void); > > > + > > > #endif /* _X86_SGX_H */ > > > diff --git a/arch/x86/kernel/cpu/sgx/encl.c > > > b/arch/x86/kernel/cpu/sgx/encl.c index 02ff9ac83985..68c8d65a8dee > > > 100644 > > > --- a/arch/x86/kernel/cpu/sgx/encl.c > > > +++ b/arch/x86/kernel/cpu/sgx/encl.c > > > @@ -142,6 +142,7 @@ static vm_fault_t sgx_vma_fault(struct vm_fault > > *vmf) > > > unsigned long phys_addr; > > > struct sgx_encl *encl; > > > vm_fault_t ret; > > > + int srcu_idx; > > > > > > encl = vma->vm_private_data; > > > > > > @@ -153,11 +154,18 @@ static vm_fault_t sgx_vma_fault(struct vm_fault > > *vmf) > > > if (unlikely(!encl)) > > > return VM_FAULT_SIGBUS; > > > > > > + srcu_idx = srcu_read_lock(&sgx_lock_epc_srcu); > > > + if (sgx_epc_is_locked()) { > > > + srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > > + return VM_FAULT_SIGBUS; > > > + } > > > + > > > mutex_lock(&encl->lock); > > > > > > entry = sgx_encl_load_page(encl, addr, vma->vm_flags); > > > if (IS_ERR(entry)) { > > > mutex_unlock(&encl->lock); > > > + srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > > > > > if (PTR_ERR(entry) == -EBUSY) > > > return VM_FAULT_NOPAGE; > > > @@ -170,12 +178,14 @@ static vm_fault_t sgx_vma_fault(struct vm_fault > > *vmf) > > > ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr)); > > > if (ret != VM_FAULT_NOPAGE) { > > > mutex_unlock(&encl->lock); > > > + srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > > > > > return VM_FAULT_SIGBUS; > > > } > > > > > > sgx_encl_test_and_clear_young(vma->vm_mm, entry); > > > mutex_unlock(&encl->lock); > > > + srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > > > > > return VM_FAULT_NOPAGE; > > > } > > > @@ -323,6 +333,7 @@ static int sgx_vma_access(struct vm_area_struct > > *vma, unsigned long addr, > > > struct sgx_encl_page *entry = NULL; > > > char data[sizeof(unsigned long)]; > > > unsigned long align; > > > + int srcu_idx; > > > int offset; > > > int cnt; > > > int ret = 0; > > > @@ -339,8 +350,15 @@ static int sgx_vma_access(struct vm_area_struct > > *vma, unsigned long addr, > > > return -EFAULT; > > > > > > for (i = 0; i < len; i += cnt) { > > > + srcu_idx = srcu_read_lock(&sgx_lock_epc_srcu); > > > + if (sgx_epc_is_locked()) { > > > + ret = -EBUSY; > > > + goto out; > > > + } > > > + > > > entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK, > > > vma->vm_flags); > > > + > > > if (IS_ERR(entry)) { > > > ret = PTR_ERR(entry); > > > break; > > > @@ -366,6 +384,7 @@ static int sgx_vma_access(struct vm_area_struct > > > *vma, unsigned long addr, > > > > > > out: > > > mutex_unlock(&encl->lock); > > > + srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > > > > > if (ret) > > > break; > > > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c > > > b/arch/x86/kernel/cpu/sgx/ioctl.c index b3c2e8d58142..a4df72f715d7 > > > 100644 > > > --- a/arch/x86/kernel/cpu/sgx/ioctl.c > > > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c > > > @@ -147,24 +147,42 @@ static int sgx_encl_create(struct sgx_encl > > > *encl, struct sgx_secs *secs) static long > > > sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) { > > > struct sgx_enclave_create create_arg; > > > + int srcu_idx; > > > void *secs; > > > int ret; > > > > > > - if (test_bit(SGX_ENCL_CREATED, &encl->flags)) > > > - return -EINVAL; > > > + if (test_bit(SGX_ENCL_CREATED, &encl->flags)) { > > > + ret = -EINVAL; > > > + goto err; > > > + } > > > > > > - if (copy_from_user(&create_arg, arg, sizeof(create_arg))) > > > - return -EFAULT; > > > + if (copy_from_user(&create_arg, arg, sizeof(create_arg))) { > > > + ret = -EFAULT; > > > + goto err; > > > + } > > > > > > secs = kmalloc(PAGE_SIZE, GFP_KERNEL); > > > - if (!secs) > > > - return -ENOMEM; > > > + if (!secs) { > > > + ret = -ENOMEM; > > > + goto err; > > > + } > > > > > > if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE)) > > > ret = -EFAULT; > > > > if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE)) { > > ret = -EFAULT; > > goto err; > > } > > > > Right? > > Right! I don't change the if branch for I see it will go to the err label directly. > But it's changed now :-) > > > > > > > > - else > > > + else { > > > + srcu_idx = srcu_read_lock(&sgx_lock_epc_srcu); > > > + if (sgx_epc_is_locked()) { > > > + srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > > + ret = -EBUSY; > > > + goto err; > > > + } > > > + > > > ret = sgx_encl_create(encl, secs); > > > > > > + srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > > + } > > > > I mean less branching for the common (success) case. > > Do you mean to change as follows: > > if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE)) { > ret = -EFAULT; > goto err; > > srcu_idx = srcu_read_lock(&sgx_lock_epc_srcu); > if (sgx_epc_is_locked()) { > srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > ret = -EBUSY; > goto err; > } > > ret = sgx_encl_create(encl, secs); > > srcu_read_unlock(&sgx_lock_epc_srcu, srcu_idx); > > err: > kfree(secs); > return ret; Yeah, quickly skimmed, looks right. BR, Jarkko