On Thu, Sep 01, 2022 at 06:09:56AM -0700, kan.liang@xxxxxxxxxxxxxxx wrote: > From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> > > Use the new sample_flags to indicate whether the branch stack is filled > by the PMU driver. > > Remove the br_stack from the perf_sample_data_init() to minimize the number > of cache lines touched. > > Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> > --- > arch/powerpc/perf/core-book3s.c | 1 + > arch/x86/events/amd/core.c | 4 +++- > arch/x86/events/core.c | 4 +++- > arch/x86/events/intel/core.c | 4 +++- > arch/x86/events/intel/ds.c | 5 ++++- > include/linux/perf_event.h | 4 ++-- > kernel/events/core.c | 4 ++-- > 7 files changed, 18 insertions(+), 8 deletions(-) > > diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c > index 13919eb96931..1ad1efdb33f9 100644 > --- a/arch/powerpc/perf/core-book3s.c > +++ b/arch/powerpc/perf/core-book3s.c > @@ -2297,6 +2297,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, > cpuhw = this_cpu_ptr(&cpu_hw_events); > power_pmu_bhrb_read(event, cpuhw); > data.br_stack = &cpuhw->bhrb_stack; > + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; > } > > if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && > diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c > index 9fbdfbcaf25a..8b70237c33f7 100644 > --- a/arch/x86/events/amd/core.c > +++ b/arch/x86/events/amd/core.c > @@ -929,8 +929,10 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) > if (!x86_perf_event_set_period(event)) > continue; > > - if (has_branch_stack(event)) > + if (has_branch_stack(event)) { > data.br_stack = &cpuc->lbr_stack; > + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; > + } > > if (perf_event_overflow(event, &data, regs)) > x86_pmu_stop(event, 0); > diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c > index c3d6d139a3c1..b30b8bbcd1e2 100644 > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -1701,8 +1701,10 @@ int x86_pmu_handle_irq(struct pt_regs *regs) > > perf_sample_data_init(&data, 0, event->hw.last_period); > > - if (has_branch_stack(event)) > + if (has_branch_stack(event)) { > data.br_stack = &cpuc->lbr_stack; > + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; > + } > > if (perf_event_overflow(event, &data, regs)) > x86_pmu_stop(event, 0); > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c > index 7c1e3d36bc65..b5c02627a155 100644 > --- a/arch/x86/events/intel/core.c > +++ b/arch/x86/events/intel/core.c > @@ -3017,8 +3017,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) > > perf_sample_data_init(&data, 0, event->hw.last_period); > > - if (has_branch_stack(event)) > + if (has_branch_stack(event)) { > data.br_stack = &cpuc->lbr_stack; > + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; > + } > > if (perf_event_overflow(event, &data, regs)) > x86_pmu_stop(event, 0); > diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c > index 01cbe26225c2..5dcfd2de6ef8 100644 > --- a/arch/x86/events/intel/ds.c > +++ b/arch/x86/events/intel/ds.c > @@ -1648,8 +1648,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, > data->sample_flags |= PERF_SAMPLE_TIME; > } > > - if (has_branch_stack(event)) > + if (has_branch_stack(event)) { > data->br_stack = &cpuc->lbr_stack; > + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; > + } > } > > static void adaptive_pebs_save_regs(struct pt_regs *regs, > @@ -1799,6 +1801,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, > if (has_branch_stack(event)) { > intel_pmu_store_pebs_lbrs(lbr); > data->br_stack = &cpuc->lbr_stack; > + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; > } > } > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 0978165a2d87..1e12e79454e0 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -1011,7 +1011,6 @@ struct perf_sample_data { > u64 sample_flags; > u64 addr; > struct perf_raw_record *raw; > - struct perf_branch_stack *br_stack; > u64 period; > union perf_sample_weight weight; > u64 txn; > @@ -1021,6 +1020,8 @@ struct perf_sample_data { > * The other fields, optionally {set,used} by > * perf_{prepare,output}_sample(). > */ > + struct perf_branch_stack *br_stack; > + > u64 type; > u64 ip; > struct { > @@ -1061,7 +1062,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, > data->sample_flags = 0; > data->addr = addr; > data->raw = NULL; > - data->br_stack = NULL; hi, there's one more place, I'll send full patch for that jirka --- diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b05f0310dbd3..98abc6ebb8ea 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1687,6 +1687,9 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) return -EINVAL; + if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK))) + return -ENOENT; + if (unlikely(!br_stack)) return -ENOENT;