From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> [ Upstream commit fd2b2975149f5f7099693027cece81b16842964a ] Which may happen when we start a tracing session and a thread is waiting for something like "poll" to return, in which case we better print "?" both for the syscall entry timestamp and for the duration. E.g.: Tracing existing mutt session: # perf trace -p `pidof mutt` ? ( ? ): mutt/17135 ... [continued]: poll()) = 1 0.027 ( 0.013 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 0.047 ( 0.008 ms): mutt/17135 poll(ufds: 0x7ffcb3c42c50, nfds: 1, timeout_msecs: 1000) = 1 0.059 ( 0.008 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1 <SNIP> Before it would print a large number because we'd do: ttrace->entry_time - trace->base_time And entry_time would be 0, while base_time would be the timestamp for the first event 'perf trace' reads, oops. Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx> Cc: David Ahern <dsahern@xxxxxxxxx> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> Cc: Luis Claudio Gonçalves <lclaudio@xxxxxxxxxx> Cc: Namhyung Kim <namhyung@xxxxxxxxxx> Cc: Wang Nan <wangnan0@xxxxxxxxxx> Link: http://lkml.kernel.org/n/tip-wbcb93ofva2qdjd5ltn5eeqq@xxxxxxxxxxxxxx Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- tools/perf/builtin-trace.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 21f8a81797a0..4c596ba310cb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -822,12 +822,21 @@ struct syscall { void **arg_parm; }; -static size_t fprintf_duration(unsigned long t, FILE *fp) +/* + * We need to have this 'calculated' boolean because in some cases we really + * don't know what is the duration of a syscall, for instance, when we start + * a session and some threads are waiting for a syscall to finish, say 'poll', + * in which case all we can do is to print "( ? ) for duration and for the + * start timestamp. + */ +static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) { double duration = (double)t / NSEC_PER_MSEC; size_t printed = fprintf(fp, "("); - if (duration >= 1.0) + if (!calculated) + printed += fprintf(fp, " ? "); + else if (duration >= 1.0) printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); else if (duration >= 0.01) printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); @@ -1030,13 +1039,27 @@ static bool trace__filter_duration(struct trace *trace, double t) return t < (trace->duration_filter * NSEC_PER_MSEC); } -static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) { double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; return fprintf(fp, "%10.3f ", ts); } +/* + * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are + * using ttrace->entry_time for a thread that receives a sys_exit without + * first having received a sys_enter ("poll" issued before tracing session + * starts, lost sys_enter exit due to ring buffer overflow). + */ +static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +{ + if (tstamp > 0) + return __trace__fprintf_tstamp(trace, tstamp, fp); + + return fprintf(fp, " ? "); +} + static bool done = false; static bool interrupted = false; @@ -1047,10 +1070,10 @@ static void sig_handler(int sig) } static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, - u64 duration, u64 tstamp, FILE *fp) + u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) { size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); - printed += fprintf_duration(duration, fp); + printed += fprintf_duration(duration, duration_calculated, fp); if (trace->multiple_threads) { if (trace->show_comm) @@ -1452,7 +1475,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp duration = sample->time - ttrace->entry_time; - printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1499,7 +1522,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { - trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { @@ -1547,6 +1570,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, { long ret; u64 duration = 0; + bool duration_calculated = false; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; struct syscall *sc = trace__syscall_info(trace, evsel, id); @@ -1577,6 +1601,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, duration = sample->time - ttrace->entry_time; if (trace__filter_duration(trace, duration)) goto out; + duration_calculated = true; } else if (trace->duration_filter) goto out; @@ -1592,7 +1617,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (trace->summary_only) goto out; - trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); if (ttrace->entry_pending) { fprintf(trace->output, "%-70s", ttrace->entry_str); @@ -1855,7 +1880,7 @@ static int trace__pgfault(struct trace *trace, thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); fprintf(trace->output, "%sfault [", evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? -- 2.14.1