Hi Jiri, On Tue, Jan 14, 2025 at 1:22 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote: > > On Sat, Jan 11, 2025 at 07:40:15PM +0100, Jiri Olsa wrote: > > On Sat, Jan 11, 2025 at 02:25:37AM +1100, Aleksa Sarai wrote: > > > On 2025-01-10, Eyal Birger <eyal.birger@xxxxxxxxx> wrote: > > > > Hi, > > > > > > > > When attaching uretprobes to processes running inside docker, the attached > > > > process is segfaulted when encountering the retprobe. The offending commit > > > > is: > > > > > > > > ff474a78cef5 ("uprobe: Add uretprobe syscall to speed up return probe") > > > > > > > > To my understanding, the reason is that now that uretprobe is a system call, > > > > the default seccomp filters in docker block it as they only allow a specific > > > > set of known syscalls. > > > > > > FWIW, the default seccomp profile of Docker _should_ return -ENOSYS for > > > uretprobe (runc has a bunch of ugly logic to try to guarantee this if > > > Docker hasn't updated their profile to include it). Though I guess that > > > isn't sufficient for the magic that uretprobe(2) does... > > > > > > > This behavior can be reproduced by the below bash script, which works before > > > > this commit. > > > > > > > > Reported-by: Rafael Buchbinder <rafi@xxxxxx> > > > > hi, > > nice ;-) thanks for the report, the problem seems to be that uretprobe syscall > > is blocked and uretprobe trampoline does not expect that > > > > I think we could add code to the uretprobe trampoline to detect this and > > execute standard int3 as fallback to process uretprobe, I'm checking on that > > hack below seems to fix the issue, it's using rbx to signal that uretprobe > syscall got executed, if not, trampoline does int3 and executes uretprobe > handler in the old way FWIW If I change the seccomp policy to SCMP_ACT_KILL this still fails. Eyal. > > unfortunately now the uretprobe trampoline size crosses the xol slot limit so > will need to come up with some generic/arch code solution for that, code below > is neglecting that for now > > jirka > > > --- > arch/x86/kernel/uprobes.c | 24 ++++++++++++++++++++++++ > include/linux/uprobes.h | 1 + > kernel/events/uprobes.c | 10 ++++++++-- > 3 files changed, 33 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c > index 5a952c5ea66b..b54863f6fa25 100644 > --- a/arch/x86/kernel/uprobes.c > +++ b/arch/x86/kernel/uprobes.c > @@ -315,14 +315,25 @@ asm ( > ".global uretprobe_trampoline_entry\n" > "uretprobe_trampoline_entry:\n" > "pushq %rax\n" > + "pushq %rbx\n" > "pushq %rcx\n" > "pushq %r11\n" > + "movq $1, %rbx\n" > "movq $" __stringify(__NR_uretprobe) ", %rax\n" > "syscall\n" > ".global uretprobe_syscall_check\n" > "uretprobe_syscall_check:\n" > + "or %rbx,%rbx\n" > + "jz uretprobe_syscall_return\n" > "popq %r11\n" > "popq %rcx\n" > + "popq %rbx\n" > + "popq %rax\n" > + "int3\n" > + "uretprobe_syscall_return:\n" > + "popq %r11\n" > + "popq %rcx\n" > + "popq %rbx\n" > > /* The uretprobe syscall replaces stored %rax value with final > * return address, so we don't restore %rax in here and just > @@ -338,6 +349,16 @@ extern u8 uretprobe_trampoline_entry[]; > extern u8 uretprobe_trampoline_end[]; > extern u8 uretprobe_syscall_check[]; > > +#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) > + > +bool arch_is_uretprobe_trampoline(unsigned long vaddr) > +{ > + unsigned long start = uprobe_get_trampoline_vaddr(); > + unsigned long end = start + 2*UINSNS_PER_PAGE; > + > + return vaddr >= start && vaddr < end; > +} > + > void *arch_uprobe_trampoline(unsigned long *psize) > { > static uprobe_opcode_t insn = UPROBE_SWBP_INSN; > @@ -418,6 +439,9 @@ SYSCALL_DEFINE0(uretprobe) > regs->r11 = regs->flags; > regs->cx = regs->ip; > > + /* zero rbx to signal trampoline that uretprobe syscall was executed */ > + regs->bx = 0; > + > return regs->ax; > > sigill: > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h > index e0a4c2082245..dbde57a68a1b 100644 > --- a/include/linux/uprobes.h > +++ b/include/linux/uprobes.h > @@ -213,6 +213,7 @@ extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, > extern void uprobe_handle_trampoline(struct pt_regs *regs); > extern void *arch_uprobe_trampoline(unsigned long *psize); > extern unsigned long uprobe_get_trampoline_vaddr(void); > +bool arch_is_uretprobe_trampoline(unsigned long vaddr); > #else /* !CONFIG_UPROBES */ > struct uprobes_state { > }; > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c > index fa04b14a7d72..73df64109f38 100644 > --- a/kernel/events/uprobes.c > +++ b/kernel/events/uprobes.c > @@ -1703,6 +1703,11 @@ void * __weak arch_uprobe_trampoline(unsigned long *psize) > return &insn; > } > > +bool __weak arch_is_uretprobe_trampoline(unsigned long vaddr) > +{ > + return vaddr == uprobe_get_trampoline_vaddr(); > +} > + > static struct xol_area *__create_xol_area(unsigned long vaddr) > { > struct mm_struct *mm = current->mm; > @@ -1725,8 +1730,9 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) > > area->vaddr = vaddr; > init_waitqueue_head(&area->wq); > - /* Reserve the 1st slot for get_trampoline_vaddr() */ > + /* Reserve the first two slots for get_trampoline_vaddr() */ > set_bit(0, area->bitmap); > + set_bit(1, area->bitmap); > insns = arch_uprobe_trampoline(&insns_size); > arch_uprobe_copy_ixol(area->page, 0, insns, insns_size); > > @@ -2536,7 +2542,7 @@ static void handle_swbp(struct pt_regs *regs) > int is_swbp; > > bp_vaddr = uprobe_get_swbp_addr(regs); > - if (bp_vaddr == uprobe_get_trampoline_vaddr()) > + if (arch_is_uretprobe_trampoline(bp_vaddr)) > return uprobe_handle_trampoline(regs); > > rcu_read_lock_trace(); > -- > 2.47.1 >