On Tue, Apr 25, 2023 at 04:56:00PM -0700, Yonghong Song wrote: > > > On 4/24/23 9:04 AM, Jiri Olsa wrote: > > Adding new multi uprobe link that allows to attach bpf program > > to multiple uprobes. > > > > Uprobes to attach are specified via new link_create uprobe_multi > > union: > > > > struct { > > __u32 flags; > > __u32 cnt; > > __aligned_u64 paths; > > __aligned_u64 offsets; > > __aligned_u64 ref_ctr_offsets; > > } uprobe_multi; > > > > Uprobes are defined in paths/offsets/ref_ctr_offsets arrays with > > the same 'cnt' length. Each uprobe is defined with a single index > > in all three arrays: > > > > paths[idx], offsets[idx] and/or ref_ctr_offsets[idx] > > paths[idx], offsets[idx] and optional ref_ctr_offsets[idx]? yes > > > > > The 'flags' supports single bit for now that marks the uprobe as > > return probe. > > > > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> > > --- > > include/linux/trace_events.h | 6 + > > include/uapi/linux/bpf.h | 14 +++ > > kernel/bpf/syscall.c | 16 ++- > > kernel/trace/bpf_trace.c | 231 +++++++++++++++++++++++++++++++++++ > > 4 files changed, 265 insertions(+), 2 deletions(-) > > > > diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h > > index 0e373222a6df..b0db245fc0f5 100644 > > --- a/include/linux/trace_events.h > > +++ b/include/linux/trace_events.h > > @@ -749,6 +749,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, > > u32 *fd_type, const char **buf, > > u64 *probe_offset, u64 *probe_addr); > > int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); > > +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); > > #else > > static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) > > { > > @@ -795,6 +796,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > > { > > return -EOPNOTSUPP; > > } > > +static inline int > > +bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > > +{ > > + return -EOPNOTSUPP; > > +} > > #endif > > enum { > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index 1bb11a6ee667..debc041c6ca5 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -1035,6 +1035,7 @@ enum bpf_attach_type { > > BPF_TRACE_KPROBE_MULTI, > > BPF_LSM_CGROUP, > > BPF_STRUCT_OPS, > > + BPF_TRACE_UPROBE_MULTI, > > __MAX_BPF_ATTACH_TYPE > > }; > > @@ -1052,6 +1053,7 @@ enum bpf_link_type { > > BPF_LINK_TYPE_KPROBE_MULTI = 8, > > BPF_LINK_TYPE_STRUCT_OPS = 9, > > BPF_LINK_TYPE_NETFILTER = 10, > > + BPF_LINK_TYPE_UPROBE_MULTI = 11, > > MAX_BPF_LINK_TYPE, > > }; > > @@ -1169,6 +1171,11 @@ enum bpf_link_type { > > */ > > #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) > > +/* link_create.uprobe_multi.flags used in LINK_CREATE command for > > + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. > > + */ > > +#define BPF_F_UPROBE_MULTI_RETURN (1U << 0) > > + > > /* When BPF ldimm64's insn[0].src_reg != 0 then this can have > > * the following extensions: > > * > > @@ -1568,6 +1575,13 @@ union bpf_attr { > > __s32 priority; > > __u32 flags; > > } netfilter; > > + struct { > > + __u32 flags; > > + __u32 cnt; > > + __aligned_u64 paths; > > + __aligned_u64 offsets; > > + __aligned_u64 ref_ctr_offsets; > > + } uprobe_multi; > > }; > > } link_create; > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > > index 14f39c1e573e..0b789a33317b 100644 > > --- a/kernel/bpf/syscall.c > > +++ b/kernel/bpf/syscall.c > > @@ -4601,7 +4601,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) > > break; > > case BPF_PROG_TYPE_KPROBE: > > if (attr->link_create.attach_type != BPF_PERF_EVENT && > > - attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { > > + attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI && > > + attr->link_create.attach_type != BPF_TRACE_UPROBE_MULTI) { > > ret = -EINVAL; > > goto out; > > } > > @@ -4666,10 +4667,21 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) > > ret = bpf_perf_link_attach(attr, prog); > > break; > > case BPF_PROG_TYPE_KPROBE: > > + /* Ensure that program with eBPF_TRACE_UPROBE_MULTI attach type can > > + * attach only to uprobe_multi link. It has its own runtime context > > + * which is specific for get_func_ip/get_attach_cookie helpers. > > + */ > > + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && > > + attr->link_create.attach_type != BPF_TRACE_UPROBE_MULTI) { > > + ret = -EINVAL; > > + goto out; > > + } > > The above seems redundant since it is checked in > bpf_uprobe_multi_link_attach(). > That is why the BPF_TRACE_KPROBE_MULTI is not checked here since > bpf_kprobe_multi_link_attach() checks it. for standard kprobe type program we do not check expected_attach_type, but get_func_ip/get_attach_cookie helpers functions are picked based on that: case BPF_FUNC_get_attach_cookie: if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) return &bpf_get_attach_cookie_proto_kmulti; if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) return &bpf_get_attach_cookie_proto_umulti; return &bpf_get_attach_cookie_proto_trace; so standard kprobe attached through BPF_PERF_EVENT would run BPF_TRACE_UPROBE_MULTI version of the helper and crash, because there's different context used it's probably a problem for kprobe_multi as well, I'll check and have separate patch for that > > +static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) > > +{ > > + struct bpf_uprobe_multi_link *umulti_link; > > + > > + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); > > + kvfree(umulti_link->uprobes); > > + kfree(umulti_link); > > +} > > + > > +static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { > > + .release = bpf_uprobe_multi_link_release, > > + .dealloc = bpf_uprobe_multi_link_dealloc, > > +}; > > + > > +static int uprobe_prog_run(struct bpf_uprobe *uprobe, > > + unsigned long entry_ip, > > + struct pt_regs *regs) > > +{ > > + struct bpf_uprobe_multi_link *link = uprobe->link; > > + struct bpf_uprobe_multi_run_ctx run_ctx = { > > + .entry_ip = entry_ip, > > + }; > > + struct bpf_run_ctx *old_run_ctx; > > + int err; > > + > > + preempt_disable(); > > Alexei has pointed out here. > preempt_disable() is not favored. > We should use migrate_disable/enable(). > For non sleepable program, the below rcu_read_lock() is okay. > For sleepable program, use rcu_read_lock_trace(). > See __bpf_prog_enter_sleepable_recur() in trampoline.c as > an example. yes, I'll fix that thanks, jirka