> On Aug 23, 2022, at 2:03 PM, Namhyung Kim <namhyung@xxxxxxxxxx> wrote: > > The helper is for BPF programs attached to perf_event in order to read > event-specific raw data. I followed the convention of the > bpf_read_branch_records() helper so that it can tell the size of > record using BPF_F_GET_RAW_RECORD flag. > > The use case is to filter perf event samples based on the HW provided > data which have more detailed information about the sample. > > Note that it only reads the first fragment of the raw record. But it > seems mostly ok since all the existing PMU raw data have only single > fragment and the multi-fragment records are only for BPF output attached > to sockets. So unless it's used with such an extreme case, it'd work > for most of tracing use cases. > > Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx> > --- > I don't know how to test this. As the raw data is available on some > hardware PMU only (e.g. AMD IBS). I tried a tracepoint event but it was > rejected by the verifier. Actually it needs a bpf_perf_event_data > context so that's not an option IIUC. Can we add a software event that generates raw data for testing? Thanks, Song > > include/uapi/linux/bpf.h | 23 ++++++++++++++++++++++ > kernel/trace/bpf_trace.c | 41 ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 64 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 934a2a8beb87..af7f70564819 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -5355,6 +5355,23 @@ union bpf_attr { > * Return > * Current *ktime*. > * > + * long bpf_read_raw_record(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) > + * Description > + * For an eBPF program attached to a perf event, retrieve the > + * raw record associated to *ctx* and store it in the buffer > + * pointed by *buf* up to size *size* bytes. > + * Return > + * On success, number of bytes written to *buf*. On error, a > + * negative value. > + * > + * The *flags* can be set to **BPF_F_GET_RAW_RECORD_SIZE** to > + * instead return the number of bytes required to store the raw > + * record. If this flag is set, *buf* may be NULL. > + * > + * **-EINVAL** if arguments invalid or **size** not a multiple > + * of **sizeof**\ (u64\ ). > + * > + * **-ENOENT** if the event does not have raw records. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5566,6 +5583,7 @@ union bpf_attr { > FN(tcp_raw_check_syncookie_ipv4), \ > FN(tcp_raw_check_syncookie_ipv6), \ > FN(ktime_get_tai_ns), \ > + FN(read_raw_record), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > @@ -5749,6 +5767,11 @@ enum { > BPF_F_EXCLUDE_INGRESS = (1ULL << 4), > }; > > +/* BPF_FUNC_read_raw_record flags. */ > +enum { > + BPF_F_GET_RAW_RECORD_SIZE = (1ULL << 0), > +}; > + > #define __bpf_md_ptr(type, name) \ > union { \ > type name; \ > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 68e5cdd24cef..db172b12e5f8 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -20,6 +20,7 @@ > #include <linux/fprobe.h> > #include <linux/bsearch.h> > #include <linux/sort.h> > +#include <linux/perf_event.h> > > #include <net/bpf_sk_storage.h> > > @@ -1532,6 +1533,44 @@ static const struct bpf_func_proto bpf_read_branch_records_proto = { > .arg4_type = ARG_ANYTHING, > }; > > +BPF_CALL_4(bpf_read_raw_record, struct bpf_perf_event_data_kern *, ctx, > + void *, buf, u32, size, u64, flags) > +{ > + struct perf_raw_record *raw = ctx->data->raw; > + struct perf_raw_frag *frag; > + u32 to_copy; > + > + if (unlikely(flags & ~BPF_F_GET_RAW_RECORD_SIZE)) > + return -EINVAL; > + > + if (unlikely(!raw)) > + return -ENOENT; > + > + if (flags & BPF_F_GET_RAW_RECORD_SIZE) > + return raw->size; > + > + if (!buf || (size % sizeof(u32) != 0)) > + return -EINVAL; > + > + frag = &raw->frag; > + WARN_ON_ONCE(!perf_raw_frag_last(frag)); > + > + to_copy = min_t(u32, frag->size, size); > + memcpy(buf, frag->data, to_copy); > + > + return to_copy; > +} > + > +static const struct bpf_func_proto bpf_read_raw_record_proto = { > + .func = bpf_read_raw_record, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, > + .arg3_type = ARG_CONST_SIZE_OR_ZERO, > + .arg4_type = ARG_ANYTHING, > +}; > + > static const struct bpf_func_proto * > pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > { > @@ -1548,6 +1587,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_read_branch_records_proto; > case BPF_FUNC_get_attach_cookie: > return &bpf_get_attach_cookie_proto_pe; > + case BPF_FUNC_read_raw_record: > + return &bpf_read_raw_record_proto; > default: > return bpf_tracing_func_proto(func_id, prog); > } > -- > 2.37.2.609.g9ff673ca1a-goog >