On Sun, Aug 21, 2016 at 10:52 AM, Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote: > On Sat, Aug 20, 2016 at 11:42 PM, Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote: >> >> It's not exactly setjmp/longjmp; what I had in mind was along the lines of > > That ends up having all the exact same issues as setjmp, and generally > you *do* want the compiler to know about it. So just in case you wanted to play around with it, here's a kernel implementation of 'setjmp/longjmp' for x86. It's very lightly tested (and I'll admit to editing it for some cleanups after that light testing), but it does look largely sane. The whole interface choice may be debatable: maybe it would be better to allocate the register buffer on the stack, and just hide a pointer to it in the task struct. Things like that could be changed fairly easily. But if you want to play around with this, this patch should get you started. Of course, you'd want to wrap things up somehow, and I would *not* want to see naked setjmp() calls in the kernel. And we'd need this for all other architectures too, but it's usually not hard to do. It needs to save all the callee-saved registers and the stack pointer and return address. That should generally be it. The 32-bit version has not been tested at all, but it compiled at some point, and the code looks mostly sane. The 64-bit code I actually had a stupid non-user-access test-case for. Linus
commit a8062ecb780bed81eaec10bd9fea60bf595a9c40 Author: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Date: Mon Aug 22 13:15:51 2016 -0700 x86: add basic setjmp/longjmp implementation To make the compiler happy, we have to actually call it setjmp/longjmp too. Even if the exact semantics aren't the same - we keep the register buffer in the thread structure, for example, rather than pass it in as an argument. Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> --- arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/asm/setjmp.h | 31 ++++++++++++++++++++++++++++++ arch/x86/kernel/asm-offsets.c | 1 + arch/x86/lib/Makefile | 1 + arch/x86/lib/setjmp_32.S | 37 ++++++++++++++++++++++++++++++++++++ arch/x86/lib/setjmp_64.S | 41 ++++++++++++++++++++++++++++++++++++++++ include/linux/setjmp.h | 19 +++++++++++++++++++ 7 files changed, 132 insertions(+) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 63def9537a2d..1af2c7025d51 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -22,6 +22,7 @@ struct vm86; #include <asm/nops.h> #include <asm/special_insns.h> #include <asm/fpu/types.h> +#include <asm/setjmp.h> #include <linux/personality.h> #include <linux/cache.h> @@ -425,6 +426,7 @@ struct thread_struct { unsigned io_bitmap_max; mm_segment_t addr_limit; + struct setjmp setjmp; unsigned int sig_on_uaccess_err:1; unsigned int uaccess_err:1; /* uaccess failed */ diff --git a/arch/x86/include/asm/setjmp.h b/arch/x86/include/asm/setjmp.h new file mode 100644 index 000000000000..6cda8608ce04 --- /dev/null +++ b/arch/x86/include/asm/setjmp.h @@ -0,0 +1,31 @@ +#ifndef _ASM_SETJMP_H +#define _ASM_SETJMP_H + +/* + * setjmp needs to save the callee-saved registers and + * the stack setup, so that it looks like a normal call. + * + * In addition, gcc needs to know that it's setjmp, but + * that seems to literally just trigger on the name. + * + * Unlike the legacy C implementation, we just have the + * save area in the task structure. + */ + +#ifdef CONFIG_X86_64 + +struct setjmp { + unsigned long rbx, r12, r13, r14, r15; + unsigned long rbp, rsp, rip; +}; + +#else + +struct setjmp { + unsigned long ebx, esi, edi; + unsigned long ebp, esp, eip; +}; + +#endif + +#endif // _ASM_SETJMP_H diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 2bd5c6ff7ee7..78a10eb048e8 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -34,6 +34,7 @@ void common(void) { BLANK(); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); + OFFSET(TASK_setjmp, task_struct, thread.setjmp); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 34a74131a12c..bb7a34648c2c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o lib-y := delay.o misc.o cmdline.o cpu.o lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += setjmp_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o diff --git a/arch/x86/lib/setjmp_32.S b/arch/x86/lib/setjmp_32.S new file mode 100644 index 000000000000..44f3d52ee40c --- /dev/null +++ b/arch/x86/lib/setjmp_32.S @@ -0,0 +1,37 @@ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/asm.h> +#include <asm/percpu.h> + +// The explicit add of TASK_setjmp keeps the +// following offset 8-bit values, and shrinks +// the modrm bytes in the instructions that +// follow. + +ENTRY(setjmp) + movl PER_CPU_VAR(current_task),%eax + addl $TASK_setjmp,%eax + + movl %ebx,(%eax) + movl %esi,4(%eax) + movl %edi,8(%eax) + movl %ebp,12(%eax) + lea 4(%esp),%edx + movl %edx,16(%eax) + movl (%esp),%edx + movl %edx,20(%eax) + xorl %eax,%eax + ret + +ENTRY(longjmp) + movl PER_CPU_VAR(current_task),%eax + addl $TASK_setjmp,%eax + + movl (%eax),%ebx + movl 4(%eax),%esi + movl 8(%eax),%edi + movl 12(%eax),%ebp + movl 16(%eax),%esp + movl 20(%eax),%edx + movl $1,%eax + jmp *%edx diff --git a/arch/x86/lib/setjmp_64.S b/arch/x86/lib/setjmp_64.S new file mode 100644 index 000000000000..c113e132e4d8 --- /dev/null +++ b/arch/x86/lib/setjmp_64.S @@ -0,0 +1,41 @@ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/asm.h> +#include <asm/percpu.h> + +// The explicit add of TASK_setjmp keeps the +// following offset 8-bit values, and shrinks +// the modrm bytes in the instructions that +// follow. + +ENTRY(setjmp) + movq PER_CPU_VAR(current_task),%rax + addq $TASK_setjmp,%rax + + movq %rbx,(%rax) + movq %r12,8(%rax) + movq %r13,16(%rax) + movq %r14,24(%rax) + movq %r15,32(%rax) + movq %rbp,40(%rax) + lea 8(%rsp),%rdx + movq %rdx,48(%rax) + movq (%rsp),%rdx + movq %rdx,56(%rax) + xorl %eax,%eax + ret + +ENTRY(longjmp) + movq PER_CPU_VAR(current_task),%rax + addq $TASK_setjmp,%rax + + movq (%rax),%rbx + movq 8(%rax),%r12 + movq 16(%rax),%r13 + movq 24(%rax),%r14 + movq 32(%rax),%r15 + movq 40(%rax),%rbp + movq 48(%rax),%rsp + movq 56(%rax),%rdx + movl $1,%eax + jmp *%rdx diff --git a/include/linux/setjmp.h b/include/linux/setjmp.h new file mode 100644 index 000000000000..42d4674791e8 --- /dev/null +++ b/include/linux/setjmp.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_SETJMP_H +#define _LINUX_SETJMP_H + +#include <linux/compiler.h> + +// +// NOTE! We call it 'setjmp' to make gcc treat it specially, +// but the calling conventions are different from the regular +// user-space setjmp. +// +// So setjmp() always returns 0/1, and the register buffer +// is always in the task struct rather than being passed in +// as an argument. +// + +extern int setjmp(void); +extern void __noreturn longjmp(void); + +#endif // _LINUX_SETJMP_H