On Tue, Jul 14, 2020 at 11:08 PM Song Liu <songliubraving@xxxxxx> wrote: > > Calling get_perf_callchain() on perf_events from PEBS entries may cause > unwinder errors. To fix this issue, the callchain is fetched early. Such > perf_events are marked with __PERF_SAMPLE_CALLCHAIN_EARLY. > > Similarly, calling bpf_get_[stack|stackid] on perf_events from PEBS may > also cause unwinder errors. To fix this, add separate version of these > two helpers, bpf_get_[stack|stackid]_pe. These two hepers use callchain in > bpf_perf_event_data_kern->data->callchain. > > Signed-off-by: Song Liu <songliubraving@xxxxxx> > --- > include/linux/bpf.h | 2 + > kernel/bpf/stackmap.c | 204 +++++++++++++++++++++++++++++++++++---- > kernel/trace/bpf_trace.c | 4 +- > 3 files changed, 190 insertions(+), 20 deletions(-) > Glad this approach worked out! Few minor bugs below, though. [...] > + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | > + BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) > + return -EINVAL; > + > + user = flags & BPF_F_USER_STACK; > + kernel = !user; > + > + has_kernel = !event->attr.exclude_callchain_kernel; > + has_user = !event->attr.exclude_callchain_user; > + > + if ((kernel && !has_kernel) || (user && !has_user)) > + return -EINVAL; > + > + trace = ctx->data->callchain; > + if (!trace || (!has_kernel && !has_user)) (!has_kernel && !has_user) can never happen, it's checked by if above (one of kernel or user is always true => one of has_user or has_kernel is always true). > + return -EFAULT; > + > + if (has_kernel && has_user) { > + __u64 nr_kernel = count_kernel_ip(trace); > + int ret; > + > + if (kernel) { > + __u64 nr = trace->nr; > + > + trace->nr = nr_kernel; > + ret = __bpf_get_stackid(map, trace, flags); > + > + /* restore nr */ > + trace->nr = nr; > + } else { /* user */ > + u64 skip = flags & BPF_F_SKIP_FIELD_MASK; > + > + skip += nr_kernel; > + if (skip > ~BPF_F_SKIP_FIELD_MASK) something fishy here: ~BPF_F_SKIP_FIELD_MASK is a really big number, were you going to check that skip is not bigger than 255 (i.e., fits within BPF_F_SKIP_FIELD_MASK)? > + return -EFAULT; > + > + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | > + (skip & BPF_F_SKIP_FIELD_MASK); > + ret = __bpf_get_stackid(map, trace, flags); > + } > + return ret; > + } > + return __bpf_get_stackid(map, trace, flags); > +} > + [...] > + > + has_kernel = !event->attr.exclude_callchain_kernel; > + has_user = !event->attr.exclude_callchain_user; > + > + if ((kernel && !has_kernel) || (user && !has_user)) > + goto clear; > + > + err = -EFAULT; > + trace = ctx->data->callchain; > + if (!trace || (!has_kernel && !has_user)) > + goto clear; same as above for bpf_get_stackid, probably can be simplified > + > + if (has_kernel && has_user) { > + __u64 nr_kernel = count_kernel_ip(trace); > + int ret; > + > + if (kernel) { > + __u64 nr = trace->nr; > + > + trace->nr = nr_kernel; > + ret = __bpf_get_stack(ctx->regs, NULL, trace, buf, > + size, flags); > + > + /* restore nr */ > + trace->nr = nr; > + } else { /* user */ > + u64 skip = flags & BPF_F_SKIP_FIELD_MASK; > + > + skip += nr_kernel; > + if (skip > ~BPF_F_SKIP_FIELD_MASK) > + goto clear; > + and here > + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | > + (skip & BPF_F_SKIP_FIELD_MASK); actually if you check that skip <= BPF_F_SKIP_FIELD_MASK, you don't need to mask it here, just `| skip` > + ret = __bpf_get_stack(ctx->regs, NULL, trace, buf, > + size, flags); > + } > + return ret; > + } > + return __bpf_get_stack(ctx->regs, NULL, trace, buf, size, flags); > +clear: > + memset(buf, 0, size); > + return err; > + > +} > + [...]