This is a note to let you know that I've just added the patch titled perf trace: Handle unpaired raw_syscalls:sys_exit event to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: perf-trace-handle-unpaired-raw_syscalls-sys_exit-event.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From foo@baz Sun Mar 18 16:55:33 CET 2018 From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Date: Wed, 29 Mar 2017 16:37:51 -0300 Subject: perf trace: Handle unpaired raw_syscalls:sys_exit event From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> [ Upstream commit fd2b2975149f5f7099693027cece81b16842964a ] Which may happen when we start a tracing session and a thread is waiting for something like "poll" to return, in which case we better print "?" both for the syscall entry timestamp and for the duration. E.g.: Tracing existing mutt session: # perf trace -p `pidof mutt` ? ( ? ): mutt/17135 ... [continued]: poll()) = 1 0.027 ( 0.013 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 0.047 ( 0.008 ms): mutt/17135 poll(ufds: 0x7ffcb3c42c50, nfds: 1, timeout_msecs: 1000) = 1 0.059 ( 0.008 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 <SNIP> Before it would print a large number because we'd do: ttrace->entry_time - trace->base_time And entry_time would be 0, while base_time would be the timestamp for the first event 'perf trace' reads, oops. Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx> Cc: David Ahern <dsahern@xxxxxxxxx> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> Cc: Luis Claudio Gonçalves <lclaudio@xxxxxxxxxx> Cc: Namhyung Kim <namhyung@xxxxxxxxxx> Cc: Wang Nan <wangnan0@xxxxxxxxxx> Link: http://lkml.kernel.org/n/tip-wbcb93ofva2qdjd5ltn5eeqq@xxxxxxxxxxxxxx Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- tools/perf/builtin-trace.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -822,12 +822,21 @@ struct syscall { void **arg_parm; }; -static size_t fprintf_duration(unsigned long t, FILE *fp) +/* + * We need to have this 'calculated' boolean because in some cases we really + * don't know what is the duration of a syscall, for instance, when we start + * a session and some threads are waiting for a syscall to finish, say 'poll', + * in which case all we can do is to print "( ? ) for duration and for the + * start timestamp. + */ +static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) { double duration = (double)t / NSEC_PER_MSEC; size_t printed = fprintf(fp, "("); - if (duration >= 1.0) + if (!calculated) + printed += fprintf(fp, " ? "); + else if (duration >= 1.0) printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); else if (duration >= 0.01) printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); @@ -1030,13 +1039,27 @@ static bool trace__filter_duration(struc return t < (trace->duration_filter * NSEC_PER_MSEC); } -static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) { double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; return fprintf(fp, "%10.3f ", ts); } +/* + * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are + * using ttrace->entry_time for a thread that receives a sys_exit without + * first having received a sys_enter ("poll" issued before tracing session + * starts, lost sys_enter exit due to ring buffer overflow). + */ +static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +{ + if (tstamp > 0) + return __trace__fprintf_tstamp(trace, tstamp, fp); + + return fprintf(fp, " ? "); +} + static bool done = false; static bool interrupted = false; @@ -1047,10 +1070,10 @@ static void sig_handler(int sig) } static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, - u64 duration, u64 tstamp, FILE *fp) + u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) { size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); - printed += fprintf_duration(duration, fp); + printed += fprintf_duration(duration, duration_calculated, fp); if (trace->multiple_threads) { if (trace->show_comm) @@ -1452,7 +1475,7 @@ static int trace__printf_interrupted_ent duration = sample->time - ttrace->entry_time; - printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1499,7 +1522,7 @@ static int trace__sys_enter(struct trace if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { - trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { @@ -1547,6 +1570,7 @@ static int trace__sys_exit(struct trace { long ret; u64 duration = 0; + bool duration_calculated = false; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; struct syscall *sc = trace__syscall_info(trace, evsel, id); @@ -1577,6 +1601,7 @@ static int trace__sys_exit(struct trace duration = sample->time - ttrace->entry_time; if (trace__filter_duration(trace, duration)) goto out; + duration_calculated = true; } else if (trace->duration_filter) goto out; @@ -1592,7 +1617,7 @@ static int trace__sys_exit(struct trace if (trace->summary_only) goto out; - trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); if (ttrace->entry_pending) { fprintf(trace->output, "%-70s", ttrace->entry_str); @@ -1855,7 +1880,7 @@ static int trace__pgfault(struct trace * thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); fprintf(trace->output, "%sfault [", evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? Patches currently in stable-queue which might be from acme@xxxxxxxxxx are queue-4.9/perf-buildid-do-not-assume-that-readlink-returns-a-null-terminated-string.patch queue-4.9/perf-session-don-t-rely-on-evlist-in-pipe-mode.patch queue-4.9/perf-tools-make-perf_event__synthesize_mmap_events-scale.patch queue-4.9/perf-annotate-fix-a-bug-following-symbolic-link-of-a-build-id-file.patch queue-4.9/perf-stat-issue-a-hw-watchdog-disable-hint.patch queue-4.9/perf-sort-fix-segfault-with-basic-block-cycles-sort-dimension.patch queue-4.9/perf-trace-handle-unpaired-raw_syscalls-sys_exit-event.patch queue-4.9/perf-probe-fix-concat_probe_trace_events.patch queue-4.9/perf-inject-copy-events-when-reordering-events-in-pipe-mode.patch queue-4.9/perf-probe-return-errno-when-not-hitting-any-event.patch queue-4.9/perf-evsel-return-exact-sub-event-which-failed-with-eperm-for-wildcards.patch queue-4.9/perf-stat-fix-bug-in-handling-events-in-error-state.patch