On Sun, Jul 30, 2023 at 9:43 PM Jiri Olsa <jolsa@xxxxxxxxxx> wrote: > > Adding new multi uprobe link that allows to attach bpf program > to multiple uprobes. > > Uprobes to attach are specified via new link_create uprobe_multi > union: > > struct { > __aligned_u64 path; > __aligned_u64 offsets; > __aligned_u64 ref_ctr_offsets; > __u32 cnt; > __u32 flags; > } uprobe_multi; > > Uprobes are defined for single binary specified in path and multiple > calling sites specified in offsets array with optional reference > counters specified in ref_ctr_offsets array. All specified arrays > have length of 'cnt'. > > The 'flags' supports single bit for now that marks the uprobe as > return probe. > > Acked-by: Andrii Nakryiko <andrii@xxxxxxxxxx> > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> LGTM! Acked-by: Yafang Shao <laoar.shao@xxxxxxxxx> > --- > include/linux/trace_events.h | 6 + > include/uapi/linux/bpf.h | 16 +++ > kernel/bpf/syscall.c | 14 +- > kernel/trace/bpf_trace.c | 237 +++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 16 +++ > 5 files changed, 286 insertions(+), 3 deletions(-) > > diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h > index e66d04dbe56a..5b85cf18c350 100644 > --- a/include/linux/trace_events.h > +++ b/include/linux/trace_events.h > @@ -752,6 +752,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, > u32 *fd_type, const char **buf, > u64 *probe_offset, u64 *probe_addr); > int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); > +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); > #else > static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) > { > @@ -798,6 +799,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > { > return -EOPNOTSUPP; > } > +static inline int > +bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > +{ > + return -EOPNOTSUPP; > +} > #endif > > enum { > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 7abb382dc6c1..f112a0b948f3 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -1039,6 +1039,7 @@ enum bpf_attach_type { > BPF_NETFILTER, > BPF_TCX_INGRESS, > BPF_TCX_EGRESS, > + BPF_TRACE_UPROBE_MULTI, > __MAX_BPF_ATTACH_TYPE > }; > > @@ -1057,6 +1058,7 @@ enum bpf_link_type { > BPF_LINK_TYPE_STRUCT_OPS = 9, > BPF_LINK_TYPE_NETFILTER = 10, > BPF_LINK_TYPE_TCX = 11, > + BPF_LINK_TYPE_UPROBE_MULTI = 12, > MAX_BPF_LINK_TYPE, > }; > > @@ -1190,6 +1192,13 @@ enum { > BPF_F_KPROBE_MULTI_RETURN = (1U << 0) > }; > > +/* link_create.uprobe_multi.flags used in LINK_CREATE command for > + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. > + */ > +enum { > + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) > +}; > + > /* link_create.netfilter.flags used in LINK_CREATE command for > * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. > */ > @@ -1626,6 +1635,13 @@ union bpf_attr { > }; > __u64 expected_revision; > } tcx; > + struct { > + __aligned_u64 path; > + __aligned_u64 offsets; > + __aligned_u64 ref_ctr_offsets; > + __u32 cnt; > + __u32 flags; > + } uprobe_multi; > }; > } link_create; > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 7c01186d4078..75c83300339e 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -2815,10 +2815,12 @@ static void bpf_link_free_id(int id) > > /* Clean up bpf_link and corresponding anon_inode file and FD. After > * anon_inode is created, bpf_link can't be just kfree()'d due to deferred > - * anon_inode's release() call. This helper marksbpf_link as > + * anon_inode's release() call. This helper marks bpf_link as > * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt > * is not decremented, it's the responsibility of a calling code that failed > * to complete bpf_link initialization. > + * This helper eventually calls link's dealloc callback, but does not call > + * link's release callback. > */ > void bpf_link_cleanup(struct bpf_link_primer *primer) > { > @@ -3757,8 +3759,12 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, > if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && > attach_type != BPF_TRACE_KPROBE_MULTI) > return -EINVAL; > + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && > + attach_type != BPF_TRACE_UPROBE_MULTI) > + return -EINVAL; > if (attach_type != BPF_PERF_EVENT && > - attach_type != BPF_TRACE_KPROBE_MULTI) > + attach_type != BPF_TRACE_KPROBE_MULTI && > + attach_type != BPF_TRACE_UPROBE_MULTI) > return -EINVAL; > return 0; > case BPF_PROG_TYPE_SCHED_CLS: > @@ -4954,8 +4960,10 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) > case BPF_PROG_TYPE_KPROBE: > if (attr->link_create.attach_type == BPF_PERF_EVENT) > ret = bpf_perf_link_attach(attr, prog); > - else > + else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI) > ret = bpf_kprobe_multi_link_attach(attr, prog); > + else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI) > + ret = bpf_uprobe_multi_link_attach(attr, prog); > break; > default: > ret = -EINVAL; > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index c92eb8c6ff08..10284fd46f98 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -23,6 +23,7 @@ > #include <linux/sort.h> > #include <linux/key.h> > #include <linux/verification.h> > +#include <linux/namei.h> > > #include <net/bpf_sk_storage.h> > > @@ -2965,3 +2966,239 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) > return 0; > } > #endif > + > +#ifdef CONFIG_UPROBES > +struct bpf_uprobe_multi_link; > + > +struct bpf_uprobe { > + struct bpf_uprobe_multi_link *link; > + loff_t offset; > + struct uprobe_consumer consumer; > +}; > + > +struct bpf_uprobe_multi_link { > + struct path path; > + struct bpf_link link; > + u32 cnt; > + struct bpf_uprobe *uprobes; > +}; > + > +struct bpf_uprobe_multi_run_ctx { > + struct bpf_run_ctx run_ctx; > + unsigned long entry_ip; > +}; > + > +static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, > + u32 cnt) > +{ > + u32 i; > + > + for (i = 0; i < cnt; i++) { > + uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, > + &uprobes[i].consumer); > + } > +} > + > +static void bpf_uprobe_multi_link_release(struct bpf_link *link) > +{ > + struct bpf_uprobe_multi_link *umulti_link; > + > + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); > + bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); > +} > + > +static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) > +{ > + struct bpf_uprobe_multi_link *umulti_link; > + > + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); > + path_put(&umulti_link->path); > + kvfree(umulti_link->uprobes); > + kfree(umulti_link); > +} > + > +static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { > + .release = bpf_uprobe_multi_link_release, > + .dealloc = bpf_uprobe_multi_link_dealloc, > +}; > + > +static int uprobe_prog_run(struct bpf_uprobe *uprobe, > + unsigned long entry_ip, > + struct pt_regs *regs) > +{ > + struct bpf_uprobe_multi_link *link = uprobe->link; > + struct bpf_uprobe_multi_run_ctx run_ctx = { > + .entry_ip = entry_ip, > + }; > + struct bpf_prog *prog = link->link.prog; > + bool sleepable = prog->aux->sleepable; > + struct bpf_run_ctx *old_run_ctx; > + int err = 0; > + > + might_fault(); > + > + migrate_disable(); > + > + if (sleepable) > + rcu_read_lock_trace(); > + else > + rcu_read_lock(); > + > + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); > + err = bpf_prog_run(link->link.prog, regs); > + bpf_reset_run_ctx(old_run_ctx); > + > + if (sleepable) > + rcu_read_unlock_trace(); > + else > + rcu_read_unlock(); > + > + migrate_enable(); > + return err; > +} > + > +static int > +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) > +{ > + struct bpf_uprobe *uprobe; > + > + uprobe = container_of(con, struct bpf_uprobe, consumer); > + return uprobe_prog_run(uprobe, instruction_pointer(regs), regs); > +} > + > +static int > +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) > +{ > + struct bpf_uprobe *uprobe; > + > + uprobe = container_of(con, struct bpf_uprobe, consumer); > + return uprobe_prog_run(uprobe, func, regs); > +} > + > +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > +{ > + struct bpf_uprobe_multi_link *link = NULL; > + unsigned long __user *uref_ctr_offsets; > + unsigned long *ref_ctr_offsets = NULL; > + struct bpf_link_primer link_primer; > + struct bpf_uprobe *uprobes = NULL; > + unsigned long __user *uoffsets; > + void __user *upath; > + u32 flags, cnt, i; > + struct path path; > + char *name; > + int err; > + > + /* no support for 32bit archs yet */ > + if (sizeof(u64) != sizeof(void *)) > + return -EOPNOTSUPP; > + > + if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI) > + return -EINVAL; > + > + flags = attr->link_create.uprobe_multi.flags; > + if (flags & ~BPF_F_UPROBE_MULTI_RETURN) > + return -EINVAL; > + > + /* > + * path, offsets and cnt are mandatory, > + * ref_ctr_offsets is optional > + */ > + upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); > + uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); > + cnt = attr->link_create.uprobe_multi.cnt; > + > + if (!upath || !uoffsets || !cnt) > + return -EINVAL; > + > + uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); > + > + name = strndup_user(upath, PATH_MAX); > + if (IS_ERR(name)) { > + err = PTR_ERR(name); > + return err; > + } > + > + err = kern_path(name, LOOKUP_FOLLOW, &path); > + kfree(name); > + if (err) > + return err; > + > + if (!d_is_reg(path.dentry)) { > + err = -EBADF; > + goto error_path_put; > + } > + > + err = -ENOMEM; > + > + link = kzalloc(sizeof(*link), GFP_KERNEL); > + uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL); > + > + if (!uprobes || !link) > + goto error_free; > + > + if (uref_ctr_offsets) { > + ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL); > + if (!ref_ctr_offsets) > + goto error_free; > + } > + > + for (i = 0; i < cnt; i++) { > + if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) { > + err = -EFAULT; > + goto error_free; > + } > + if (__get_user(uprobes[i].offset, uoffsets + i)) { > + err = -EFAULT; > + goto error_free; > + } > + > + uprobes[i].link = link; > + > + if (flags & BPF_F_UPROBE_MULTI_RETURN) > + uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; > + else > + uprobes[i].consumer.handler = uprobe_multi_link_handler; > + } > + > + link->cnt = cnt; > + link->uprobes = uprobes; > + link->path = path; > + > + bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, > + &bpf_uprobe_multi_link_lops, prog); > + > + err = bpf_link_prime(&link->link, &link_primer); > + if (err) > + goto error_free; > + > + for (i = 0; i < cnt; i++) { > + err = uprobe_register_refctr(d_real_inode(link->path.dentry), > + uprobes[i].offset, > + ref_ctr_offsets ? ref_ctr_offsets[i] : 0, > + &uprobes[i].consumer); > + if (err) { > + bpf_uprobe_unregister(&path, uprobes, i); > + bpf_link_cleanup(&link_primer); > + kvfree(ref_ctr_offsets); > + return err; > + } > + } > + > + kvfree(ref_ctr_offsets); > + return bpf_link_settle(&link_primer); > + > +error_free: > + kvfree(ref_ctr_offsets); > + kvfree(uprobes); > + kfree(link); > +error_path_put: > + path_put(&path); > + return err; > +} > +#else /* !CONFIG_UPROBES */ > +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > +{ > + return -EOPNOTSUPP; > +} > +#endif /* CONFIG_UPROBES */ > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 7abb382dc6c1..f112a0b948f3 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -1039,6 +1039,7 @@ enum bpf_attach_type { > BPF_NETFILTER, > BPF_TCX_INGRESS, > BPF_TCX_EGRESS, > + BPF_TRACE_UPROBE_MULTI, > __MAX_BPF_ATTACH_TYPE > }; > > @@ -1057,6 +1058,7 @@ enum bpf_link_type { > BPF_LINK_TYPE_STRUCT_OPS = 9, > BPF_LINK_TYPE_NETFILTER = 10, > BPF_LINK_TYPE_TCX = 11, > + BPF_LINK_TYPE_UPROBE_MULTI = 12, > MAX_BPF_LINK_TYPE, > }; > > @@ -1190,6 +1192,13 @@ enum { > BPF_F_KPROBE_MULTI_RETURN = (1U << 0) > }; > > +/* link_create.uprobe_multi.flags used in LINK_CREATE command for > + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. > + */ > +enum { > + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) > +}; > + > /* link_create.netfilter.flags used in LINK_CREATE command for > * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. > */ > @@ -1626,6 +1635,13 @@ union bpf_attr { > }; > __u64 expected_revision; > } tcx; > + struct { > + __aligned_u64 path; > + __aligned_u64 offsets; > + __aligned_u64 ref_ctr_offsets; > + __u32 cnt; > + __u32 flags; > + } uprobe_multi; > }; > } link_create; > > -- > 2.41.0 > -- Regards Yafang