On Mon, Aug 30, 2021 at 2:42 PM Song Liu <songliubraving@xxxxxx> wrote: > > Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get > branch trace from hardware (e.g. Intel LBR). To use the feature, the > user need to create perf_event with proper branch_record filtering > on each cpu, and then calls bpf_get_branch_snapshot in the bpf function. > On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this. > > Signed-off-by: Song Liu <songliubraving@xxxxxx> > --- > include/linux/bpf.h | 2 ++ > include/linux/filter.h | 3 ++- > include/uapi/linux/bpf.h | 16 +++++++++++++ > kernel/bpf/trampoline.c | 13 ++++++++++ > kernel/bpf/verifier.c | 12 ++++++++++ > kernel/trace/bpf_trace.c | 43 ++++++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 16 +++++++++++++ > 7 files changed, 104 insertions(+), 1 deletion(-) > [...] > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 206c221453cfa..72e8b49da0bf9 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -6446,6 +6446,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn > env->prog->call_get_func_ip = true; > } > > + if (func_id == BPF_FUNC_get_branch_snapshot) { > + if (env->prog->aux->sleepable) { > + verbose(env, "sleepable progs cannot call get_branch_snapshot\n"); > + return -ENOTSUPP; > + } > + if (!IS_ENABLED(CONFIG_PERF_EVENTS)) { > + verbose(env, "func %s#%d not supported without CONFIG_PERF_EVENTS\n", > + func_id_name(func_id), func_id); > + return -ENOTSUPP; > + } > + env->prog->call_get_branch = true; > + } > if (changes_data) > clear_all_pkt_pointers(env); > return 0; > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 8e2eb950aa829..a01f26b7877e6 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -1017,6 +1017,33 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { > .arg1_type = ARG_PTR_TO_CTX, > }; > > +BPF_CALL_2(bpf_get_branch_snapshot, void *, buf, u32, size) I bet we'll need u64 flags over time, let's add it right now. It's similar to bpf_read_branch_records(). > +{ > +#ifdef CONFIG_PERF_EVENTS > + u32 max_size; > + > + if (this_cpu_ptr(&bpf_perf_branch_snapshot)->nr == 0) > + return -EOPNOTSUPP; > + > + max_size = this_cpu_ptr(&bpf_perf_branch_snapshot)->nr * > + sizeof(struct perf_branch_entry); > + memcpy(buf, this_cpu_ptr(&bpf_perf_branch_snapshot)->entries, > + min_t(u32, size, max_size)); > + Check bpf_read_branch_records() implementation and it's argument validation logic. Let's keep them consistent (e.g., it enforces that size is a multiple of sizeof(struct perf_branch_entry)). Another difference is that bpf_read_branch_records() returns number of bytes filled, not number of records. That's consistent with accepting size as number of bytes. Let's stick to this convention then, so bytes everywhere. > + return this_cpu_ptr(&bpf_perf_branch_snapshot)->nr; > +#else > + return -EOPNOTSUPP; > +#endif > +} > + > +static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { > + .func = bpf_get_branch_snapshot, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_UNINIT_MEM, > + .arg2_type = ARG_CONST_SIZE_OR_ZERO, > +}; > + [...]