On 4/24/23 9:04 AM, Jiri Olsa wrote:
Adding new multi uprobe link that allows to attach bpf program
to multiple uprobes.
Uprobes to attach are specified via new link_create uprobe_multi
union:
struct {
__u32 flags;
__u32 cnt;
__aligned_u64 paths;
__aligned_u64 offsets;
__aligned_u64 ref_ctr_offsets;
} uprobe_multi;
Uprobes are defined in paths/offsets/ref_ctr_offsets arrays with
the same 'cnt' length. Each uprobe is defined with a single index
in all three arrays:
paths[idx], offsets[idx] and/or ref_ctr_offsets[idx]
paths[idx], offsets[idx] and optional ref_ctr_offsets[idx]?
The 'flags' supports single bit for now that marks the uprobe as
return probe.
Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
---
include/linux/trace_events.h | 6 +
include/uapi/linux/bpf.h | 14 +++
kernel/bpf/syscall.c | 16 ++-
kernel/trace/bpf_trace.c | 231 +++++++++++++++++++++++++++++++++++
4 files changed, 265 insertions(+), 2 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 0e373222a6df..b0db245fc0f5 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -749,6 +749,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
u64 *probe_offset, u64 *probe_addr);
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -795,6 +796,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
+static inline int
+bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1bb11a6ee667..debc041c6ca5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1035,6 +1035,7 @@ enum bpf_attach_type {
BPF_TRACE_KPROBE_MULTI,
BPF_LSM_CGROUP,
BPF_STRUCT_OPS,
+ BPF_TRACE_UPROBE_MULTI,
__MAX_BPF_ATTACH_TYPE
};
@@ -1052,6 +1053,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_KPROBE_MULTI = 8,
BPF_LINK_TYPE_STRUCT_OPS = 9,
BPF_LINK_TYPE_NETFILTER = 10,
+ BPF_LINK_TYPE_UPROBE_MULTI = 11,
MAX_BPF_LINK_TYPE,
};
@@ -1169,6 +1171,11 @@ enum bpf_link_type {
*/
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
+/* link_create.uprobe_multi.flags used in LINK_CREATE command for
+ * BPF_TRACE_UPROBE_MULTI attach type to create return probe.
+ */
+#define BPF_F_UPROBE_MULTI_RETURN (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -1568,6 +1575,13 @@ union bpf_attr {
__s32 priority;
__u32 flags;
} netfilter;
+ struct {
+ __u32 flags;
+ __u32 cnt;
+ __aligned_u64 paths;
+ __aligned_u64 offsets;
+ __aligned_u64 ref_ctr_offsets;
+ } uprobe_multi;
};
} link_create;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 14f39c1e573e..0b789a33317b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4601,7 +4601,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
break;
case BPF_PROG_TYPE_KPROBE:
if (attr->link_create.attach_type != BPF_PERF_EVENT &&
- attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) {
+ attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI &&
+ attr->link_create.attach_type != BPF_TRACE_UPROBE_MULTI) {
ret = -EINVAL;
goto out;
}
@@ -4666,10 +4667,21 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_perf_link_attach(attr, prog);
break;
case BPF_PROG_TYPE_KPROBE:
+ /* Ensure that program with eBPF_TRACE_UPROBE_MULTI attach type can
+ * attach only to uprobe_multi link. It has its own runtime context
+ * which is specific for get_func_ip/get_attach_cookie helpers.
+ */
+ if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI &&
+ attr->link_create.attach_type != BPF_TRACE_UPROBE_MULTI) {
+ ret = -EINVAL;
+ goto out;
+ }
The above seems redundant since it is checked in
bpf_uprobe_multi_link_attach().
That is why the BPF_TRACE_KPROBE_MULTI is not checked here since
bpf_kprobe_multi_link_attach() checks it.
if (attr->link_create.attach_type == BPF_PERF_EVENT)
ret = bpf_perf_link_attach(attr, prog);
- else
+ else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI)
ret = bpf_kprobe_multi_link_attach(attr, prog);
+ else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI)
+ ret = bpf_uprobe_multi_link_attach(attr, prog);
break;
default:
ret = -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index bcf91bc7bf71..b84a7d01abf4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -23,6 +23,7 @@
#include <linux/sort.h>
#include <linux/key.h>
#include <linux/verification.h>
+#include <linux/namei.h>
#include <net/bpf_sk_storage.h>
@@ -2901,3 +2902,233 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
return 0;
}
#endif
+
+#ifdef CONFIG_UPROBES
+struct bpf_uprobe_multi_link;
+
+struct bpf_uprobe {
+ struct bpf_uprobe_multi_link *link;
+ struct inode *inode;
+ loff_t offset;
+ loff_t ref_ctr_offset;
+ struct uprobe_consumer consumer;
+};
+
+struct bpf_uprobe_multi_link {
+ struct bpf_link link;
+ u32 cnt;
+ struct bpf_uprobe *uprobes;
+};
+
+struct bpf_uprobe_multi_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ unsigned long entry_ip;
+};
+
+static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ uprobe_unregister(uprobes[i].inode, uprobes[i].offset,
+ &uprobes[i].consumer);
+ }
+}
+
+static void bpf_uprobe_multi_link_release(struct bpf_link *link)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt);
+}
+
+static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ kvfree(umulti_link->uprobes);
+ kfree(umulti_link);
+}
+
+static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
+ .release = bpf_uprobe_multi_link_release,
+ .dealloc = bpf_uprobe_multi_link_dealloc,
+};
+
+static int uprobe_prog_run(struct bpf_uprobe *uprobe,
+ unsigned long entry_ip,
+ struct pt_regs *regs)
+{
+ struct bpf_uprobe_multi_link *link = uprobe->link;
+ struct bpf_uprobe_multi_run_ctx run_ctx = {
+ .entry_ip = entry_ip,
+ };
+ struct bpf_run_ctx *old_run_ctx;
+ int err;
+
+ preempt_disable();
Alexei has pointed out here.
preempt_disable() is not favored.
We should use migrate_disable/enable().
For non sleepable program, the below rcu_read_lock() is okay.
For sleepable program, use rcu_read_lock_trace().
See __bpf_prog_enter_sleepable_recur() in trampoline.c as
an example.
+
+ if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+ err = 0;
+ goto out;
+ }
+
+ rcu_read_lock();
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ err = bpf_prog_run(link->link.prog, regs);
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+
+ out:
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ return err;
+}
+
[...]