On 2024-12-11 14:33:55+0100, Jiri Olsa wrote: > Adding new uprobe syscall that calls uprobe handlers for given > 'breakpoint' address. > > The idea is that the 'breakpoint' address calls the user space > trampoline which executes the uprobe syscall. > > The syscall handler reads the return address of the initial call > to retrieve the original 'breakpoint' address. With this address > we find the related uprobe object and call its consumers. > > Adding the arch_uprobe_trampoline_mapping function that provides > uprobe trampoline mapping. This mapping is backed with one global > page initialized at __init time and shared by the all the mapping > instances. > > We do not allow to execute uprobe syscall if the caller is not > from uprobe trampoline mapping. > > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> > --- > arch/x86/entry/syscalls/syscall_64.tbl | 1 + > arch/x86/kernel/uprobes.c | 80 ++++++++++++++++++++++++++ > include/linux/syscalls.h | 2 + > include/linux/uprobes.h | 1 + > kernel/events/uprobes.c | 22 +++++++ > kernel/sys_ni.c | 1 + > 6 files changed, 107 insertions(+) > > diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl > index 5eb708bff1c7..88e388c7675b 100644 > --- a/arch/x86/entry/syscalls/syscall_64.tbl > +++ b/arch/x86/entry/syscalls/syscall_64.tbl > @@ -345,6 +345,7 @@ > 333 common io_pgetevents sys_io_pgetevents > 334 common rseq sys_rseq > 335 common uretprobe sys_uretprobe > +336 common uprobe sys_uprobe > # don't use numbers 387 through 423, add new calls after the last > # 'common' entry > 424 common pidfd_send_signal sys_pidfd_send_signal > diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c > index 22a17c149a55..23e4f2821cff 100644 > --- a/arch/x86/kernel/uprobes.c > +++ b/arch/x86/kernel/uprobes.c > @@ -425,6 +425,86 @@ SYSCALL_DEFINE0(uretprobe) > return -1; > } > > +static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) > +{ > + return -EPERM; > +} > + > +static struct vm_special_mapping tramp_mapping = { > + .name = "[uprobes-trampoline]", > + .mremap = tramp_mremap, > +}; > + > +SYSCALL_DEFINE0(uprobe) > +{ > + struct pt_regs *regs = task_pt_regs(current); > + struct vm_area_struct *vma; > + unsigned long bp_vaddr; > + int err; > + > + err = copy_from_user(&bp_vaddr, (void __user *)regs->sp + 3*8, sizeof(bp_vaddr)); A #define for the magic values would be nice. > + if (err) { > + force_sig(SIGILL); > + return -1; > + } > + > + /* Allow execution only from uprobe trampolines. */ > + vma = vma_lookup(current->mm, regs->ip); > + if (!vma || vma->vm_private_data != (void *) &tramp_mapping) { vma_is_special_mapping() > + force_sig(SIGILL); > + return -1; > + } > + > + handle_syscall_uprobe(regs, bp_vaddr - 5); > + return 0; > +} > + > +asm ( > + ".pushsection .rodata\n" > + ".global uprobe_trampoline_entry\n" > + "uprobe_trampoline_entry:\n" > + "endbr64\n" > + "push %rcx\n" > + "push %r11\n" > + "push %rax\n" > + "movq $" __stringify(__NR_uprobe) ", %rax\n" > + "syscall\n" > + "pop %rax\n" > + "pop %r11\n" > + "pop %rcx\n" > + "ret\n" > + ".global uprobe_trampoline_end\n" > + "uprobe_trampoline_end:\n" > + ".popsection\n" > +); > + > +extern __visible u8 uprobe_trampoline_entry[]; > +extern __visible u8 uprobe_trampoline_end[]; > + > +const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void) > +{ > + struct pt_regs *regs = task_pt_regs(current); > + > + return user_64bit_mode(regs) ? &tramp_mapping : NULL; > +} > + > +static int __init arch_uprobes_init(void) > +{ > + unsigned long size = uprobe_trampoline_end - uprobe_trampoline_entry; > + static struct page *pages[2]; > + struct page *page; > + > + page = alloc_page(GFP_HIGHUSER); That page could be in static memory, removing the need for the explicit allocation. It could also be __ro_after_init. Then tramp_mapping itself can be const. Also this seems to waste the page on 32bit kernels. > + if (!page) > + return -ENOMEM; > + pages[0] = page; > + tramp_mapping.pages = (struct page **) &pages; tramp_mapping.pages = pages; ? > + arch_uprobe_copy_ixol(page, 0, uprobe_trampoline_entry, size); > + return 0; > +} > + > +late_initcall(arch_uprobes_init); > + > /* > * If arch_uprobe->insn doesn't use rip-relative addressing, return > * immediately. Otherwise, rewrite the instruction so that it accesses [..]