> Yuck. Definitely should look at using RCU list. I think the whole > function would boil down to: > > list_for_each_entry_rcu(...) { > down_read(&mm->mm->mmap_sem); > ret = !sgx_encl_test_and_clear_young(next_mm->mm, page); > up_read(&mm->mm->mmap_sem); > > if (ret || (encl->flags & SGX_ENCL_DEAD)) > break; > } > > if (!ret || (encl->flags & SGX_ENCL_DEAD)) { > mutex_lock(&encl->lock); > page->desc |= SGX_ENCL_PAGE_RECLAIMED; > mutex_unlock(&encl->lock); > } But yuo cnot > > + > > + down_read(&next_mm->mm->mmap_sem); > > + mutex_lock(&encl->lock); > > Acquiring encl->lock just to check if its dead is a bit silly. > > > + > > + if (encl->flags & SGX_ENCL_DEAD) { > > + page->desc |= SGX_ENCL_PAGE_RECLAIMED; > > + ret = true; > > + goto out_stop; > > + } > > + > > + ret = !sgx_encl_test_and_clear_young(next_mm->mm, page); > > + if (!ret) > > + goto out_stop; > > + > > + mutex_unlock(&encl->lock); > > + up_read(&next_mm->mm->mmap_sem); > > + } > > + > > + page->desc |= SGX_ENCL_PAGE_RECLAIMED; > > SGX_ENCL_PAGE_RECLAIMED needs to be while holding encl->lock. Putting > everything together, I think the function would boil down to: > > list_for_each_entry_rcu(...) { > if (encl->flags & SGX_ENCL_DEAD) > break; > > down_read(&mm->mm->mmap_sem); > ret = !sgx_encl_test_and_clear_young(next_mm->mm, page); > up_read(&mm->mm->mmap_sem); > > if (!ret) > return false; > } > > mutex_lock(&encl->lock); > page->desc |= SGX_ENCL_PAGE_RECLAIMED; > mutex_unlock(&encl->lock); > > return true; > > > + return true; > > +out_stop: > > + mutex_unlock(&encl->lock); > > + up_read(&next_mm->mm->mmap_sem); > > + mmdrop(next_mm->mm); > > + kref_put(&next_mm->refcount, sgx_encl_release_mm); > > + return ret; > > +} > > + > > +static void sgx_reclaimer_block(struct sgx_epc_page *epc_page) > > +{ > > + struct sgx_encl_page *page = epc_page->owner; > > + unsigned long addr = SGX_ENCL_PAGE_ADDR(page); > > + struct sgx_encl *encl = page->encl; > > + struct sgx_encl_mm *next_mm = NULL; > > + struct sgx_encl_mm *prev_mm = NULL; > > + struct vm_area_struct *vma; > > + int iter; > > + int ret; > > + > > + while (true) { > > + next_mm = sgx_encl_next_mm(encl, prev_mm, &iter); > > + if (prev_mm) { > > + mmdrop(prev_mm->mm); > > + kref_put(&prev_mm->refcount, sgx_encl_release_mm); > > + } > > + prev_mm = next_mm; > > + > > + if (iter == SGX_ENCL_MM_ITER_DONE) > > + break; > > + > > + if (iter == SGX_ENCL_MM_ITER_RESTART) > > + continue; > > + > > + down_read(&next_mm->mm->mmap_sem); > > + mutex_lock(&encl->lock); > > There's no need to acquire encl->lock, only mmap_sem needs to be held > to zap PTEs. > > > + ret = sgx_encl_find(next_mm->mm, addr, &vma); > > + if (!ret && encl == vma->vm_private_data) > > + zap_vma_ptes(vma, addr, PAGE_SIZE); > > + > > + mutex_unlock(&encl->lock); > > + up_read(&next_mm->mm->mmap_sem); > > + } > > + > > + mutex_lock(&encl->lock); > > + > > + if (!(encl->flags & SGX_ENCL_DEAD)) { > > + ret = __eblock(sgx_epc_addr(epc_page)); > > + if (encls_failed(ret)) > > + ENCLS_WARN(ret, "EBLOCK"); > > + } > > + > > + mutex_unlock(&encl->lock); > > +} > > + > > +static int __sgx_encl_ewb(struct sgx_encl *encl, struct sgx_epc_page *epc_page, > > + struct sgx_va_page *va_page, unsigned int va_offset) > > +{ > > + struct sgx_encl_page *encl_page = epc_page->owner; > > + pgoff_t page_index = sgx_encl_get_index(encl, encl_page); > > + pgoff_t pcmd_index = sgx_pcmd_index(encl, page_index); > > + unsigned long pcmd_offset = sgx_pcmd_offset(page_index); > > + struct sgx_pageinfo pginfo; > > + struct page *backing; > > + struct page *pcmd; > > + int ret; > > + > > + backing = sgx_encl_get_backing_page(encl, page_index); > > + if (IS_ERR(backing)) { > > + ret = PTR_ERR(backing); > > + goto err_backing; > > + } > > + > > + pcmd = sgx_encl_get_backing_page(encl, pcmd_index); > > + if (IS_ERR(pcmd)) { > > + ret = PTR_ERR(pcmd); > > + goto err_pcmd; > > + } > > + > > + pginfo.addr = 0; > > + pginfo.contents = (unsigned long)kmap_atomic(backing); > > + pginfo.metadata = (unsigned long)kmap_atomic(pcmd) + pcmd_offset; > > + pginfo.secs = 0; > > + ret = __ewb(&pginfo, sgx_epc_addr(epc_page), > > + sgx_epc_addr(va_page->epc_page) + va_offset); > > + kunmap_atomic((void *)(unsigned long)(pginfo.metadata - pcmd_offset)); > > + kunmap_atomic((void *)(unsigned long)pginfo.contents); > > + > > + set_page_dirty(pcmd); > > + put_page(pcmd); > > + set_page_dirty(backing); > > + > > +err_pcmd: > > + put_page(backing); > > + > > +err_backing: > > + return ret; > > +} > > + > > +static void sgx_ipi_cb(void *info) > > +{ > > +} > > + > > +static void sgx_encl_ewb(struct sgx_epc_page *epc_page, bool do_free) > > +{ > > + struct sgx_encl_page *encl_page = epc_page->owner; > > + struct sgx_encl *encl = encl_page->encl; > > + struct sgx_encl_mm *next_mm = NULL; > > + struct sgx_encl_mm *prev_mm = NULL; > > + struct sgx_va_page *va_page; > > + unsigned int va_offset; > > + int iter; > > + int ret; > > + > > + cpumask_clear(&encl->cpumask); > > + > > + while (true) { > > + next_mm = sgx_encl_next_mm(encl, prev_mm, &iter); > > + if (prev_mm) { > > + mmdrop(prev_mm->mm); > > + kref_put(&prev_mm->refcount, sgx_encl_release_mm); > > + } > > + prev_mm = next_mm; > > + > > + if (iter == SGX_ENCL_MM_ITER_DONE) > > + break; > > + > > + if (iter == SGX_ENCL_MM_ITER_RESTART) > > + continue; > > + > > + cpumask_or(&encl->cpumask, &encl->cpumask, > > + mm_cpumask(next_mm->mm)); > > + } > > Sending IPIs to flush CPUs out of the enclave is only necessary if the > enclave is alive, untracked and there are threads actively running in > the enclave. I.e. calculate cpumask only when necessary. > > This open coding of IPI sending made me realize the driver no long > invalidates an enclave if an ENCLS instruction fails unexpectedly. That > is going to lead to absolute carnage if something does go wrong as there > will be no recovery path, i.e. the kernel log will be spammed to death > with ENCLS WARNings. Debugging future development will be a nightmare if > a single ENCLS bug obliterates the kernel. Responding below. I get your RCU idea but you cannot sleep inside normal RCU. Also, the current implemntation deals with that mmap_sem cn be gone. I'm open for using RCU (i.e. SRCU) if these can be somehow dealt with. /Jarkko