On Thu, Jun 27, 2024 at 2:17 PM Aruna Ramakrishna <aruna.ramakrishna@xxxxxxxxxx> wrote: > > Problem description: > Let's assume there's a multithreaded application that runs untrusted > user code. Each thread has its stack/code protected by a non-zero pkey, > and the PKRU register is set up such that only that particular non-zero > pkey is enabled. Each thread also sets up an alternate signal stack to > handle signals, which is protected by pkey zero. The pkeys man page > documents that the PKRU will be reset to init_pkru when the signal > handler is invoked, which means that pkey zero access will be enabled. > But this reset happens after the kernel attempts to push fpu state > to the alternate stack, which is not (yet) accessible by the kernel, > which leads to a new SIGSEGV being sent to the application, terminating > it. > > Enabling both the non-zero pkey (for the thread) and pkey zero in > userspace will not work for this use case. We cannot have the alt stack > writeable by all - the rationale here is that the code running in that > thread (using a non-zero pkey) is untrusted and should not have access > to the alternate signal stack (that uses pkey zero), to prevent the > return address of a function from being changed. The expectation is that > kernel should be able to set up the alternate signal stack and deliver > the signal to the application even if pkey zero is explicitly disabled > by the application. The signal handler accessibility should not be > dictated by whatever PKRU value the thread sets up. > > Solution: > The PKRU register is managed by XSAVE, which means the sigframe contents > must match the register contents - which is not the case here. We want > the sigframe to contain the user-defined PKRU value (so that it is > restored correctly from sigcontext) but the actual register must be > reset to init_pkru so that the alt stack is accessible and the signal > can be delivered to the application. It seems that the proper fix here > would be to remove PKRU from the XSAVE framework and manage it > separately, which is quite complicated. As a workaround, do this: > > orig_pkru = rdpkru(); > wrpkru(orig_pkru & init_pkru_value); > xsave_to_user_sigframe(); > put_user(pkru_sigframe_addr, orig_pkru) > > This change is split over multiple patches. > > In preparation for writing PKRU to sigframe in a later patch, pass in > PKRU as an additional parameter down the chain from handle_signal: > setup_rt_frame() > xxx_setup_rt_frame() Above two functions don't access altstack, therefore we can keep it the same as before. > get_sigframe() > copy_fpstate_to_sigframe() > copy_fpregs_to_sigframe() > > There are no functional changes in this patch. > > Signed-off-by: Aruna Ramakrishna <aruna.ramakrishna@xxxxxxxxxx> > --- > arch/x86/include/asm/fpu/signal.h | 2 +- > arch/x86/include/asm/sighandling.h | 10 +++++----- > arch/x86/kernel/fpu/signal.c | 6 +++--- > arch/x86/kernel/signal.c | 19 ++++++++++--------- > arch/x86/kernel/signal_32.c | 8 ++++---- > arch/x86/kernel/signal_64.c | 8 ++++---- > 6 files changed, 27 insertions(+), 26 deletions(-) > > diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h > index 611fa41711af..eccc75bc9c4f 100644 > --- a/arch/x86/include/asm/fpu/signal.h > +++ b/arch/x86/include/asm/fpu/signal.h > @@ -29,7 +29,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, > > unsigned long fpu__get_fpstate_size(void); > > -extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); > +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru); > extern void fpu__clear_user_states(struct fpu *fpu); > extern bool fpu__restore_sig(void __user *buf, int ia32_frame); > > diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h > index e770c4fc47f4..de458354a3ea 100644 > --- a/arch/x86/include/asm/sighandling.h > +++ b/arch/x86/include/asm/sighandling.h > @@ -17,11 +17,11 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where); > > void __user * > get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, > - void __user **fpstate); > + void __user **fpstate, u32 pkru); > > -int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs); > -int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); > -int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); > -int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); > +int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru); > +int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru); > +int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru); > +int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru); > > #endif /* _ASM_X86_SIGHANDLING_H */ > diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c > index 247f2225aa9f..2b3b9e140dd4 100644 > --- a/arch/x86/kernel/fpu/signal.c > +++ b/arch/x86/kernel/fpu/signal.c > @@ -156,7 +156,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, > return !err; > } > > -static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) > +static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru) > { > if (use_xsave()) > return xsave_to_user_sigframe(buf); > @@ -185,7 +185,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) > * For [f]xsave state, update the SW reserved fields in the [f]xsave frame > * indicating the absence/presence of the extended state to the user. > */ > -bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) > +bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru) > { > struct task_struct *tsk = current; > struct fpstate *fpstate = tsk->thread.fpu.fpstate; > @@ -228,7 +228,7 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) > fpregs_restore_userregs(); > > pagefault_disable(); > - ret = copy_fpregs_to_sigframe(buf_fx); > + ret = copy_fpregs_to_sigframe(buf_fx, pkru); > pagefault_enable(); > fpregs_unlock(); > > diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c > index 31b6f5dddfc2..94b894437327 100644 > --- a/arch/x86/kernel/signal.c > +++ b/arch/x86/kernel/signal.c > @@ -74,7 +74,7 @@ static inline int is_x32_frame(struct ksignal *ksig) > */ > void __user * > get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, > - void __user **fpstate) > + void __user **fpstate, u32 pkru) we can keep the signature the same, i.e. not adding pkru. > { > struct k_sigaction *ka = &ksig->ka; > int ia32_frame = is_ia32_frame(ksig); > @@ -139,7 +139,7 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, > } > > /* save i387 and extended state */ > - if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size)) > + if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size, pkru)) > return (void __user *)-1L; > > return (void __user *)sp; > @@ -206,7 +206,7 @@ unsigned long get_sigframe_size(void) > } > > static int > -setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > +setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru) > { > /* Perform fixup for the pre-signal frame. */ > rseq_signal_deliver(ksig, regs); > @@ -214,21 +214,22 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > /* Set up the stack frame */ > if (is_ia32_frame(ksig)) { > if (ksig->ka.sa.sa_flags & SA_SIGINFO) > - return ia32_setup_rt_frame(ksig, regs); > + return ia32_setup_rt_frame(ksig, regs, pkru); > else > - return ia32_setup_frame(ksig, regs); > + return ia32_setup_frame(ksig, regs, pkru); > } else if (is_x32_frame(ksig)) { > - return x32_setup_rt_frame(ksig, regs); > + return x32_setup_rt_frame(ksig, regs, pkru); > } else { > - return x64_setup_rt_frame(ksig, regs); > + return x64_setup_rt_frame(ksig, regs, pkru); > } > } > > static void > handle_signal(struct ksignal *ksig, struct pt_regs *regs) > { > - bool stepping, failed; > struct fpu *fpu = ¤t->thread.fpu; > + u32 pkru = read_pkru(); This can be moved to get_sigframe(), the same for setting pkru=0 > + bool stepping, failed; > > if (v8086_mode(regs)) > save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); > @@ -264,7 +265,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) > if (stepping) > user_disable_single_step(current); > > - failed = (setup_rt_frame(ksig, regs) < 0); > + failed = (setup_rt_frame(ksig, regs, pkru) < 0); The failure case can be handled in get_sigframe(). > if (!failed) { > /* > * Clear the direction flag as per the ABI for function entry. > diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c > index ef654530bf5a..b437d02ecfd7 100644 > --- a/arch/x86/kernel/signal_32.c > +++ b/arch/x86/kernel/signal_32.c no change to signal_64.c if you keep pkru inside get_sigframe. > @@ -228,7 +228,7 @@ do { \ > goto label; \ > } while(0) > > -int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs) > +int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru) ia32 doesn't support pkru iiuc, so no need to change the signature here. Same comments for x32 related code path. > { > sigset32_t *set = (sigset32_t *) sigmask_to_save(); > struct sigframe_ia32 __user *frame; > @@ -246,7 +246,7 @@ int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs) > 0x80cd, /* int $0x80 */ > }; > > - frame = get_sigframe(ksig, regs, sizeof(*frame), &fp); > + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp, pkru); > > if (ksig->ka.sa.sa_flags & SA_RESTORER) { > restorer = ksig->ka.sa.sa_restorer; > @@ -299,7 +299,7 @@ int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs) > return -EFAULT; > } > > -int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > +int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru) > { > sigset32_t *set = (sigset32_t *) sigmask_to_save(); > struct rt_sigframe_ia32 __user *frame; > @@ -319,7 +319,7 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > 0, > }; > > - frame = get_sigframe(ksig, regs, sizeof(*frame), &fp); > + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp, pkru); > > if (!user_access_begin(frame, sizeof(*frame))) > return -EFAULT; > diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c > index 8a94053c5444..ccfb7824ab2c 100644 > --- a/arch/x86/kernel/signal_64.c > +++ b/arch/x86/kernel/signal_64.c no change to signal_64.c if you keep pkru inside get_sigframe. > @@ -161,7 +161,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs) > return flags; > } > > -int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > +int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru) no change to this function, because it doesn't access altstack. > { > sigset_t *set = sigmask_to_save(); > struct rt_sigframe __user *frame; > @@ -172,7 +172,7 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > if (!(ksig->ka.sa.sa_flags & SA_RESTORER)) > return -EFAULT; > > - frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp); > + frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp, pkru); > uc_flags = frame_uc_flags(regs); > > if (!user_access_begin(frame, sizeof(*frame))) > @@ -300,7 +300,7 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, > return __copy_siginfo_to_user32(to, from); > } > > -int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > +int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru) > { > compat_sigset_t *set = (compat_sigset_t *) sigmask_to_save(); > struct rt_sigframe_x32 __user *frame; > @@ -311,7 +311,7 @@ int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) > if (!(ksig->ka.sa.sa_flags & SA_RESTORER)) > return -EFAULT; > > - frame = get_sigframe(ksig, regs, sizeof(*frame), &fp); > + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp, pkru); > > uc_flags = frame_uc_flags(regs); > > -- > 2.39.3 >