On Fri 2023-06-23 18:55:29, Brian Gerst wrote: > When kCFI is enabled, special handling is needed for the indirect call > to the kernel thread function. Rewrite the ret_from_fork() function in > C so that the compiler can properly handle the indirect call. This patch broke livepatching. Kthreads never have a reliable stack. It works when I revert it. See also below. > --- a/arch/x86/entry/entry_64.S > +++ b/arch/x86/entry/entry_64.S > @@ -284,36 +284,19 @@ SYM_FUNC_END(__switch_to_asm) > * r12: kernel thread arg > */ > .pushsection .text, "ax" > - __FUNC_ALIGN > -SYM_CODE_START_NOALIGN(ret_from_fork) > - UNWIND_HINT_END_OF_STACK > +SYM_CODE_START(ret_from_fork_asm) > + UNWIND_HINT_REGS > ANNOTATE_NOENDBR // copy_thread > CALL_DEPTH_ACCOUNT > - movq %rax, %rdi > - call schedule_tail /* rdi: 'prev' task parameter */ > > - testq %rbx, %rbx /* from kernel_thread? */ > - jnz 1f /* kernel threads are uncommon */ > + movq %rax, %rdi /* prev */ > + movq %rsp, %rsi /* regs */ > + movq %rbx, %rdx /* fn */ > + movq %r12, %rcx /* fn_arg */ > + call ret_from_fork > > -2: > - UNWIND_HINT_REGS > - movq %rsp, %rdi > - call syscall_exit_to_user_mode /* returns with IRQs disabled */ > jmp swapgs_restore_regs_and_return_to_usermode > - > -1: > - /* kernel thread */ > - UNWIND_HINT_END_OF_STACK I think that it might be related to removal of this line. The following intructions are going to call fn(fn_arg). See below. > - movq %r12, %rdi > - CALL_NOSPEC rbx > - /* > - * A kernel thread is allowed to return here after successfully > - * calling kernel_execve(). Exit to userspace to complete the execve() > - * syscall. > - */ > - movq $0, RAX(%rsp) > - jmp 2b > -SYM_CODE_END(ret_from_fork) > +SYM_CODE_END(ret_from_fork_asm) > .popsection > > .macro DEBUG_ENTRY_ASSERT_IRQS_OFF > diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h > index 5c91305d09d2..f42dbf17f52b 100644 > --- a/arch/x86/include/asm/switch_to.h > +++ b/arch/x86/include/asm/switch_to.h > @@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, > __visible struct task_struct *__switch_to(struct task_struct *prev, > struct task_struct *next); > > -asmlinkage void ret_from_fork(void); > +asmlinkage void ret_from_fork_asm(void); > +__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, > + int (*fn)(void *), void *fn_arg); > > /* > * This is the structure pointed to by thread.sp for an inactive task. The > diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c > index cc7a642f8c9d..001e6dad9a48 100644 > --- a/arch/x86/kernel/process.c > +++ b/arch/x86/kernel/process.c > @@ -136,6 +137,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) > return do_set_thread_area_64(p, ARCH_SET_FS, tls); > } > > +__visible noinstr void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, > + int (*fn)(void *), void *fn_arg) > +{ > + schedule_tail(prev); > + > + /* Is this a kernel thread? */ > + if (unlikely(fn)) { > + fn(fn_arg); This is the related code but it does not include the annotation about the end of the stack. Honestly, I am not familiar with the stack unwinder and how this is supposed to work. I hope that Josh or anyone else might know better. > + /* > + * A kernel thread is allowed to return here after successfully > + * calling kernel_execve(). Exit to userspace to complete the > + * execve() syscall. > + */ > + regs->ax = 0; > + } > + > + syscall_exit_to_user_mode(regs); > +} > + > int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > { > unsigned long clone_flags = args->flags; Best Regards, Petr