On Tue, Sep 24, 2019 at 10:33:42AM +0200, Jiri Olsa wrote: > On Tue, Sep 17, 2019 at 06:30:52AM -0700, Daniel Xu wrote: > > SNIP > > > + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ > > }; > > > > #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ > > diff --git a/kernel/events/core.c b/kernel/events/core.c > > index 0463c1151bae..ee08d3ed6299 100644 > > --- a/kernel/events/core.c > > +++ b/kernel/events/core.c > > @@ -1715,6 +1715,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings) > > if (event->attr.read_format & PERF_FORMAT_ID) > > entry += sizeof(u64); > > > > + if (event->attr.read_format & PERF_FORMAT_LOST) > > + entry += sizeof(u64); > > + > > if (event->attr.read_format & PERF_FORMAT_GROUP) { > > nr += nr_siblings; > > size += sizeof(u64); > > @@ -4734,6 +4737,24 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) > > } > > EXPORT_SYMBOL_GPL(perf_event_read_value); > > > > +static struct pmu perf_kprobe; > > +static u64 perf_event_lost(struct perf_event *event) > > +{ > > + struct ring_buffer *rb; > > + u64 lost = 0; > > + > > + rcu_read_lock(); > > + rb = rcu_dereference(event->rb); > > + if (likely(!!rb)) > > + lost += local_read(&rb->lost); > > + rcu_read_unlock(); > > + > > + if (event->attr.type == perf_kprobe.type) > > + lost += perf_kprobe_missed(event); > > not sure what was the peterz's suggestion, but here you are mixing > ring buffer's lost count with kprobes missed count, seems wrong Jiri is right, this isn't quite what I meant. The below is what I was thinking of (I also renamed everything to missing, to avoid confusion). But now that I wrote it, I'm a little scared of what I had to do for __perf_sw_event(). Let me ponder that a little bit more. --- include/linux/perf_event.h | 1 + include/linux/trace_events.h | 1 + include/uapi/linux/perf_event.h | 5 ++++- kernel/events/core.c | 42 +++++++++++++++++++++++++++++++++-------- kernel/trace/trace_event_perf.c | 4 +++- kernel/trace/trace_kprobe.c | 8 ++++++++ 6 files changed, 51 insertions(+), 10 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9ef8be8c83a..ec6c867203c3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -625,6 +625,7 @@ struct perf_event { unsigned int attach_state; local64_t count; atomic64_t child_count; + local64_t missed; /* * These are the total time in nanoseconds that the event diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a379255c14a9..18d315a0f0f9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -603,6 +603,7 @@ extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, bool perf_type_tracepoint); +extern u64 perf_kprobe_missed(const struct perf_event *event); #endif #ifdef CONFIG_UPROBE_EVENTS extern int perf_uprobe_init(struct perf_event *event, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index bb7b271397a6..2dd3c3f21087 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -273,6 +273,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 missed; } && PERF_FORMAT_MISSED * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -280,6 +281,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 missed; } && PERF_FORMAT_MISSED * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -289,8 +291,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_MISSED = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ diff --git a/kernel/events/core.c b/kernel/events/core.c index d8b9034857d7..7e72f919d2e7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1817,6 +1817,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings) if (event->attr.read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_MISSED) + entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); @@ -4994,6 +4997,15 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); +static struct pmu perf_kprobe; +static u64 perf_event_missed(struct perf_event *event) +{ + if (event->attr.type == perf_kprobe.type) + return perf_kprobe_missed(event); + + return local64_read(&event->missed); +} + static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { @@ -5030,11 +5042,15 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_MISSED) + values[n++] = perf_event_missed(leader); for_each_sibling_event(sub, leader) { values[n++] += perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_MISSED) + values[n++] = perf_event_missed(sub); } raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -5091,7 +5107,7 @@ static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = __perf_event_read_value(event, &enabled, &running); @@ -5101,6 +5117,8 @@ static int perf_read_one(struct perf_event *event, values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_MISSED) + values[n++] = perf_event_lost(event); if (copy_to_user(buf, values, n * sizeof(u64))) return -EFAULT; @@ -6427,7 +6445,7 @@ static void perf_output_read_one(struct perf_output_handle *handle, u64 enabled, u64 running) { u64 read_format = event->attr.read_format; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = perf_event_count(event); @@ -6441,6 +6459,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, } if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_MISSED) + values[n++] = perf_event_lost(event); __output_copy(handle, values, n * sizeof(u64)); } @@ -6451,7 +6471,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; - u64 values[5]; + u64 values[6]; int n = 0; values[n++] = 1 + leader->nr_siblings; @@ -6469,6 +6489,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_MISSED) + values[n++] = perf_event_lost(leader); __output_copy(handle, values, n * sizeof(u64)); @@ -6482,6 +6504,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_MISSED) + values[n++] = perf_event_lost(sub); __output_copy(handle, values, n * sizeof(u64)); } @@ -8500,7 +8524,6 @@ static int perf_exclude_event(struct perf_event *event, static int perf_swevent_match(struct perf_event *event, enum perf_type_id type, u32 event_id, - struct perf_sample_data *data, struct pt_regs *regs) { if (event->attr.type != type) @@ -8579,8 +8602,12 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, goto end; hlist_for_each_entry_rcu(event, head, hlist_entry) { - if (perf_swevent_match(event, type, event_id, data, regs)) - perf_swevent_event(event, nr, data, regs); + if (perf_swevent_match(event, type, event_id, regs)) { + if (nr == ~0ULL) + local64_inc(&event->missed); + else + perf_swevent_event(event, nr, data, regs); + } } end: rcu_read_unlock(); @@ -8621,12 +8648,11 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) preempt_disable_notrace(); rctx = perf_swevent_get_recursion_context(); if (unlikely(rctx < 0)) - goto fail; + nr = ~0ULL; ___perf_sw_event(event_id, nr, regs, addr); perf_swevent_put_recursion_context(rctx); -fail: preempt_enable_notrace(); } diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0917fee6ee7c..73a0de204d7a 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -458,8 +458,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, perf_fetch_caller_regs(®s); entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx); - if (!entry) + if (!entry) { + local64_inc(&event->missed); return; + } entry->ip = ip; entry->parent_ip = parent_ip; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 66e0a8ff1c01..5e1889c161e3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -233,6 +233,14 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call) false; } +u64 perf_kprobe_missed(const struct perf_event *event) +{ + struct trace_event_call *call = event->tp_event; + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + + return tk->rp.kp.nmissed; +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk);