On Wed, Feb 05, 2020 at 10:19:33AM -0800, Yu-cheng Yu wrote: > The Shadow Stack (SHSTK) for clone/fork is handled as the following: > > (1) If ((clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM), > the kernel allocates (and frees on thread exit) a new SHSTK for the > child. > > It is possible for the kernel to complete the clone syscall and set the > child's SHSTK pointer to NULL and let the child thread allocate a SHSTK > for itself. There are two issues in this approach: It is not > compatible with existing code that does inline syscall and it cannot > handle signals before the child can successfully allocate a SHSTK. > > (2) For (clone_flags & CLONE_VFORK), the child uses the existing SHSTK. > > (3) For all other cases, the SHSTK is copied/reused whenever the parent or > the child does a call/ret. > > This patch handles cases (1) & (2). Case (3) is handled in the SHSTK page > fault patches. > > A 64-bit SHSTK has a fixed size of RLIMIT_STACK. A compat-mode thread SHSTK > has a fixed size of 1/4 RLIMIT_STACK. This allows more threads to share a > 32-bit address space. I am not understanding this part. :) Entries are sizeof(unsigned long), yes? A 1/2 RLIMIT_STACK would cover 32-bit, but 1/4 is less, so why does that provide for more threads? > > Signed-off-by: Yu-cheng Yu <yu-cheng.yu@xxxxxxxxx> > --- > arch/x86/include/asm/cet.h | 2 ++ > arch/x86/include/asm/mmu_context.h | 3 +++ > arch/x86/kernel/cet.c | 41 ++++++++++++++++++++++++++++++ > arch/x86/kernel/process.c | 7 +++++ > 4 files changed, 53 insertions(+) > > diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h > index 409d4f91a0dc..9a3e2da9c1c4 100644 > --- a/arch/x86/include/asm/cet.h > +++ b/arch/x86/include/asm/cet.h > @@ -19,10 +19,12 @@ struct cet_status { > > #ifdef CONFIG_X86_INTEL_CET > int cet_setup_shstk(void); > +int cet_setup_thread_shstk(struct task_struct *p); > void cet_disable_free_shstk(struct task_struct *p); > int cet_restore_signal(bool ia32, struct sc_ext *sc); > int cet_setup_signal(bool ia32, unsigned long rstor, struct sc_ext *sc); > #else > +static inline int cet_setup_thread_shstk(struct task_struct *p) { return 0; } > static inline void cet_disable_free_shstk(struct task_struct *p) {} > static inline int cet_restore_signal(bool ia32, struct sc_ext *sc) { return -EINVAL; } > static inline int cet_setup_signal(bool ia32, unsigned long rstor, > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h > index 5f33924e200f..6a8189308823 100644 > --- a/arch/x86/include/asm/mmu_context.h > +++ b/arch/x86/include/asm/mmu_context.h > @@ -13,6 +13,7 @@ > #include <asm/tlbflush.h> > #include <asm/paravirt.h> > #include <asm/mpx.h> > +#include <asm/cet.h> > #include <asm/debugreg.h> > > extern atomic64_t last_mm_ctx_id; > @@ -230,6 +231,8 @@ do { \ > #else > #define deactivate_mm(tsk, mm) \ > do { \ > + if (!tsk->vfork_done) \ > + cet_disable_free_shstk(tsk); \ > load_gs_index(0); \ > loadsegment(fs, 0); \ > } while (0) > diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c > index cba5c7656aab..5b45abda80a1 100644 > --- a/arch/x86/kernel/cet.c > +++ b/arch/x86/kernel/cet.c > @@ -170,6 +170,47 @@ int cet_setup_shstk(void) > return 0; > } > > +int cet_setup_thread_shstk(struct task_struct *tsk) > +{ > + unsigned long addr, size; > + struct cet_user_state *state; > + struct cet_status *cet = &tsk->thread.cet; > + > + if (!cet->shstk_enabled) > + return 0; > + > + state = get_xsave_addr(&tsk->thread.fpu.state.xsave, > + XFEATURE_CET_USER); > + > + if (!state) > + return -EINVAL; > + > + size = rlimit(RLIMIT_STACK); Is SHSTK incompatible with RLIM_INFINITY stack rlimits? > + > + /* > + * Compat-mode pthreads share a limited address space. > + * If each function call takes an average of four slots > + * stack space, we need 1/4 of stack size for shadow stack. > + */ > + if (in_compat_syscall()) > + size /= 4; > + > + addr = alloc_shstk(size); I assume it'd fail here, but I worry about Stack Clash style attacks. I'd like to see test cases that make sure the SHSTK gap is working correctly. -Kees > + > + if (IS_ERR((void *)addr)) { > + cet->shstk_base = 0; > + cet->shstk_size = 0; > + cet->shstk_enabled = 0; > + return PTR_ERR((void *)addr); > + } > + > + fpu__prepare_write(&tsk->thread.fpu); > + state->user_ssp = (u64)(addr + size); > + cet->shstk_base = addr; > + cet->shstk_size = size; > + return 0; > +} > + > void cet_disable_free_shstk(struct task_struct *tsk) > { > struct cet_status *cet = &tsk->thread.cet; > diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c > index e102e63de641..7098618142f2 100644 > --- a/arch/x86/kernel/process.c > +++ b/arch/x86/kernel/process.c > @@ -110,6 +110,7 @@ void exit_thread(struct task_struct *tsk) > > free_vm86(t); > > + cet_disable_free_shstk(tsk); > fpu__drop(fpu); > } > > @@ -180,6 +181,12 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, > if (clone_flags & CLONE_SETTLS) > ret = set_new_tls(p, tls); > > +#ifdef CONFIG_X86_64 > + /* Allocate a new shadow stack for pthread */ > + if (!ret && (clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM) > + ret = cet_setup_thread_shstk(p); > +#endif > + > if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) > io_bitmap_share(p); > > -- > 2.21.0 > -- Kees Cook