Re: [RFC/PATCH bpf-next 01/20] bpf: Add multi uprobe link

Jiri Olsa <olsajiri@xxxxxxxxx> · Wed, 26 Apr 2023 09:37:55 +0200



On Tue, Apr 25, 2023 at 04:56:00PM -0700, Yonghong Song wrote:
> 
> 
> On 4/24/23 9:04 AM, Jiri Olsa wrote:
> > Adding new multi uprobe link that allows to attach bpf program
> > to multiple uprobes.
> > 
> > Uprobes to attach are specified via new link_create uprobe_multi
> > union:
> > 
> >    struct {
> >            __u32           flags;
> >            __u32           cnt;
> >            __aligned_u64   paths;
> >            __aligned_u64   offsets;
> >            __aligned_u64   ref_ctr_offsets;
> >    } uprobe_multi;
> > 
> > Uprobes are defined in paths/offsets/ref_ctr_offsets arrays with
> > the same 'cnt' length. Each uprobe is defined with a single index
> > in all three arrays:
> > 
> >    paths[idx], offsets[idx] and/or ref_ctr_offsets[idx]
> 
> paths[idx], offsets[idx] and optional ref_ctr_offsets[idx]?

yes

> 
> > 
> > The 'flags' supports single bit for now that marks the uprobe as
> > return probe.
> > 
> > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> > ---
> >   include/linux/trace_events.h |   6 +
> >   include/uapi/linux/bpf.h     |  14 +++
> >   kernel/bpf/syscall.c         |  16 ++-
> >   kernel/trace/bpf_trace.c     | 231 +++++++++++++++++++++++++++++++++++
> >   4 files changed, 265 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> > index 0e373222a6df..b0db245fc0f5 100644
> > --- a/include/linux/trace_events.h
> > +++ b/include/linux/trace_events.h
> > @@ -749,6 +749,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> >   			    u32 *fd_type, const char **buf,
> >   			    u64 *probe_offset, u64 *probe_addr);
> >   int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
> > +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
> >   #else
> >   static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
> >   {
> > @@ -795,6 +796,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >   {
> >   	return -EOPNOTSUPP;
> >   }
> > +static inline int
> > +bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> >   #endif
> >   enum {
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 1bb11a6ee667..debc041c6ca5 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -1035,6 +1035,7 @@ enum bpf_attach_type {
> >   	BPF_TRACE_KPROBE_MULTI,
> >   	BPF_LSM_CGROUP,
> >   	BPF_STRUCT_OPS,
> > +	BPF_TRACE_UPROBE_MULTI,
> >   	__MAX_BPF_ATTACH_TYPE
> >   };
> > @@ -1052,6 +1053,7 @@ enum bpf_link_type {
> >   	BPF_LINK_TYPE_KPROBE_MULTI = 8,
> >   	BPF_LINK_TYPE_STRUCT_OPS = 9,
> >   	BPF_LINK_TYPE_NETFILTER = 10,
> > +	BPF_LINK_TYPE_UPROBE_MULTI = 11,
> >   	MAX_BPF_LINK_TYPE,
> >   };
> > @@ -1169,6 +1171,11 @@ enum bpf_link_type {
> >    */
> >   #define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
> > +/* link_create.uprobe_multi.flags used in LINK_CREATE command for
> > + * BPF_TRACE_UPROBE_MULTI attach type to create return probe.
> > + */
> > +#define BPF_F_UPROBE_MULTI_RETURN	(1U << 0)
> > +
> >   /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
> >    * the following extensions:
> >    *
> > @@ -1568,6 +1575,13 @@ union bpf_attr {
> >   				__s32		priority;
> >   				__u32		flags;
> >   			} netfilter;
> > +			struct {
> > +				__u32		flags;
> > +				__u32		cnt;
> > +				__aligned_u64	paths;
> > +				__aligned_u64	offsets;
> > +				__aligned_u64	ref_ctr_offsets;
> > +			} uprobe_multi;
> >   		};
> >   	} link_create;
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 14f39c1e573e..0b789a33317b 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -4601,7 +4601,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
> >   		break;
> >   	case BPF_PROG_TYPE_KPROBE:
> >   		if (attr->link_create.attach_type != BPF_PERF_EVENT &&
> > -		    attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) {
> > +		    attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI &&
> > +		    attr->link_create.attach_type != BPF_TRACE_UPROBE_MULTI) {
> >   			ret = -EINVAL;
> >   			goto out;
> >   		}
> > @@ -4666,10 +4667,21 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
> >   		ret = bpf_perf_link_attach(attr, prog);
> >   		break;
> >   	case BPF_PROG_TYPE_KPROBE:
> > +		/* Ensure that program with eBPF_TRACE_UPROBE_MULTI attach type can
> > +		 * attach only to uprobe_multi link. It has its own runtime context
> > +		 * which is specific for get_func_ip/get_attach_cookie helpers.
> > +		 */
> > +		if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI &&
> > +		    attr->link_create.attach_type != BPF_TRACE_UPROBE_MULTI) {
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> 
> The above seems redundant since it is checked in
> bpf_uprobe_multi_link_attach().
> That is why the BPF_TRACE_KPROBE_MULTI is not checked here since
> bpf_kprobe_multi_link_attach() checks it.

for standard kprobe type program we do not check expected_attach_type,
but get_func_ip/get_attach_cookie helpers functions are picked based on
that:

        case BPF_FUNC_get_attach_cookie:
                if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI)
                        return &bpf_get_attach_cookie_proto_kmulti;
                if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
                        return &bpf_get_attach_cookie_proto_umulti;
                return &bpf_get_attach_cookie_proto_trace;

so standard kprobe attached through BPF_PERF_EVENT would run BPF_TRACE_UPROBE_MULTI
version of the helper and crash, because there's different context used

it's probably a problem for kprobe_multi as well, I'll check and have
separate patch for that

> > +static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
> > +{
> > +	struct bpf_uprobe_multi_link *umulti_link;
> > +
> > +	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
> > +	kvfree(umulti_link->uprobes);
> > +	kfree(umulti_link);
> > +}
> > +
> > +static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
> > +	.release = bpf_uprobe_multi_link_release,
> > +	.dealloc = bpf_uprobe_multi_link_dealloc,
> > +};
> > +
> > +static int uprobe_prog_run(struct bpf_uprobe *uprobe,
> > +			   unsigned long entry_ip,
> > +			   struct pt_regs *regs)
> > +{
> > +	struct bpf_uprobe_multi_link *link = uprobe->link;
> > +	struct bpf_uprobe_multi_run_ctx run_ctx = {
> > +		.entry_ip = entry_ip,
> > +	};
> > +	struct bpf_run_ctx *old_run_ctx;
> > +	int err;
> > +
> > +	preempt_disable();
> 
> Alexei has pointed out here.
> preempt_disable() is not favored.
> We should use migrate_disable/enable().
> For non sleepable program, the below rcu_read_lock() is okay.
> For sleepable program, use rcu_read_lock_trace().
> See __bpf_prog_enter_sleepable_recur() in trampoline.c as
> an example.

yes, I'll fix that

thanks,
jirka