On 2024-12-13 15:51:44+0100, Jiri Olsa wrote: > On Fri, Dec 13, 2024 at 02:48:00PM +0100, Thomas Weißschuh wrote: > > SNIP > > > > +static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) > > > +{ > > > + return -EPERM; > > > +} > > > + > > > +static struct vm_special_mapping tramp_mapping = { > > > + .name = "[uprobes-trampoline]", > > > + .mremap = tramp_mremap, > > > +}; > > > + > > > +SYSCALL_DEFINE0(uprobe) > > > +{ > > > + struct pt_regs *regs = task_pt_regs(current); > > > + struct vm_area_struct *vma; > > > + unsigned long bp_vaddr; > > > + int err; > > > + > > > + err = copy_from_user(&bp_vaddr, (void __user *)regs->sp + 3*8, sizeof(bp_vaddr)); > > > > A #define for the magic values would be nice. > > the 3*8 is to skip 3 values pushed on stack and get the return ip value, > I'd prefer to keep 3*8 but it's definitely missing explaining comment > above, wdyt? A comment sounds good. > > > + if (err) { > > > + force_sig(SIGILL); > > > + return -1; > > > + } > > > + > > > + /* Allow execution only from uprobe trampolines. */ > > > + vma = vma_lookup(current->mm, regs->ip); > > > + if (!vma || vma->vm_private_data != (void *) &tramp_mapping) { > > > > vma_is_special_mapping() > > did not know about this function, thanks > > > > > > + force_sig(SIGILL); > > > + return -1; > > > + } > > > + > > > + handle_syscall_uprobe(regs, bp_vaddr - 5); > > > + return 0; > > > +} > > > + > > > +asm ( > > > + ".pushsection .rodata\n" > > > + ".global uprobe_trampoline_entry\n" > > > + "uprobe_trampoline_entry:\n" > > > + "endbr64\n" > > > + "push %rcx\n" > > > + "push %r11\n" > > > + "push %rax\n" > > > + "movq $" __stringify(__NR_uprobe) ", %rax\n" > > > + "syscall\n" > > > + "pop %rax\n" > > > + "pop %r11\n" > > > + "pop %rcx\n" > > > + "ret\n" > > > + ".global uprobe_trampoline_end\n" > > > + "uprobe_trampoline_end:\n" > > > + ".popsection\n" > > > +); > > > + > > > +extern __visible u8 uprobe_trampoline_entry[]; > > > +extern __visible u8 uprobe_trampoline_end[]; > > > + > > > +const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void) > > > +{ > > > + struct pt_regs *regs = task_pt_regs(current); > > > + > > > + return user_64bit_mode(regs) ? &tramp_mapping : NULL; > > > +} > > > + > > > +static int __init arch_uprobes_init(void) > > > +{ > > > + unsigned long size = uprobe_trampoline_end - uprobe_trampoline_entry; > > > + static struct page *pages[2]; > > > + struct page *page; > > > + > > > + page = alloc_page(GFP_HIGHUSER); > > > > That page could be in static memory, removing the need for the explicit > > allocation. It could also be __ro_after_init. > > Then tramp_mapping itself can be const. > > hum, how would that look like? I think that to get proper page object > you have to call alloc_page or some other page alloc family function.. > what do I miss? static u8 trampoline_page[PAGE_SIZE] __ro_after_init __aligned(PAGE_SIZE); static struct page *tramp_mapping_pages[2] __ro_after_init; static const struct vm_special_mapping tramp_mapping = { .name = "[uprobes-trampoline]", .pages = tramp_mapping_pages, .mremap = tramp_mremap, }; static int __init arch_uprobes_init(void) { ... trampoline_pages[0] = virt_to_page(trampoline_page); ... } Untested, but it's similar to the stuff the vDSO implementations are doing which I am working with at the moment. > > > > Also this seems to waste the page on 32bit kernels. > > it's inside CONFIG_X86_64 ifdef > > > > > > + if (!page) > > > + return -ENOMEM; > > > + pages[0] = page; > > > + tramp_mapping.pages = (struct page **) &pages; > > > > tramp_mapping.pages = pages; ? > > I think the compiler will cry about *pages[2] vs **pages types mismatch, > but I'll double check that It compiles for me. > thanks, > jirka > > > > > > + arch_uprobe_copy_ixol(page, 0, uprobe_trampoline_entry, size); > > > + return 0; > > > +} > > > + > > > +late_initcall(arch_uprobes_init); > > > + > > > /* > > > * If arch_uprobe->insn doesn't use rip-relative addressing, return > > > * immediately. Otherwise, rewrite the instruction so that it accesses > > > > [..]