Patch "perf trace: Handle unpaired raw_syscalls:sys_exit event" has been added to the 4.9-stable tree

<gregkh@xxxxxxxxxxxxxxxxxxx> · Sun, 18 Mar 2018 17:03:36 +0100

This is a note to let you know that I've just added the patch titled

    perf trace: Handle unpaired raw_syscalls:sys_exit event

to the 4.9-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     perf-trace-handle-unpaired-raw_syscalls-sys_exit-event.patch
and it can be found in the queue-4.9 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.


>From foo@baz Sun Mar 18 16:55:33 CET 2018
From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Date: Wed, 29 Mar 2017 16:37:51 -0300
Subject: perf trace: Handle unpaired raw_syscalls:sys_exit event

From: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>


[ Upstream commit fd2b2975149f5f7099693027cece81b16842964a ]

Which may happen when we start a tracing session and a thread is waiting
for something like "poll" to return, in which case we better print "?"
both for the syscall entry timestamp and for the duration.

E.g.:

Tracing existing mutt session:

  # perf trace -p `pidof mutt`
          ? (     ?   ): mutt/17135  ... [continued]: poll()) = 1
      0.027 ( 0.013 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1
      0.047 ( 0.008 ms): mutt/17135 poll(ufds: 0x7ffcb3c42c50, nfds: 1, timeout_msecs: 1000) = 1
      0.059 ( 0.008 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1
  <SNIP>

Before it would print a large number because we'd do:

  ttrace->entry_time - trace->base_time

And entry_time would be 0, while base_time would be the timestamp for
the first event 'perf trace' reads, oops.

Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: David Ahern <dsahern@xxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Luis Claudio Gonçalves <lclaudio@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Wang Nan <wangnan0@xxxxxxxxxx>
Link: http://lkml.kernel.org/n/tip-wbcb93ofva2qdjd5ltn5eeqq@xxxxxxxxxxxxxx
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 tools/perf/builtin-trace.c |   43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -822,12 +822,21 @@ struct syscall {
 	void		    **arg_parm;
 };
 
-static size_t fprintf_duration(unsigned long t, FILE *fp)
+/*
+ * We need to have this 'calculated' boolean because in some cases we really
+ * don't know what is the duration of a syscall, for instance, when we start
+ * a session and some threads are waiting for a syscall to finish, say 'poll',
+ * in which case all we can do is to print "( ? ) for duration and for the
+ * start timestamp.
+ */
+static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
 {
 	double duration = (double)t / NSEC_PER_MSEC;
 	size_t printed = fprintf(fp, "(");
 
-	if (duration >= 1.0)
+	if (!calculated)
+		printed += fprintf(fp, "     ?   ");
+	else if (duration >= 1.0)
 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
 	else if (duration >= 0.01)
 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
@@ -1030,13 +1039,27 @@ static bool trace__filter_duration(struc
 	return t < (trace->duration_filter * NSEC_PER_MSEC);
 }
 
-static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
 {
 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
 
 	return fprintf(fp, "%10.3f ", ts);
 }
 
+/*
+ * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
+ * using ttrace->entry_time for a thread that receives a sys_exit without
+ * first having received a sys_enter ("poll" issued before tracing session
+ * starts, lost sys_enter exit due to ring buffer overflow).
+ */
+static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+	if (tstamp > 0)
+		return __trace__fprintf_tstamp(trace, tstamp, fp);
+
+	return fprintf(fp, "         ? ");
+}
+
 static bool done = false;
 static bool interrupted = false;
 
@@ -1047,10 +1070,10 @@ static void sig_handler(int sig)
 }
 
 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
-					u64 duration, u64 tstamp, FILE *fp)
+					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
 {
 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
-	printed += fprintf_duration(duration, fp);
+	printed += fprintf_duration(duration, duration_calculated, fp);
 
 	if (trace->multiple_threads) {
 		if (trace->show_comm)
@@ -1452,7 +1475,7 @@ static int trace__printf_interrupted_ent
 
 	duration = sample->time - ttrace->entry_time;
 
-	printed  = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output);
+	printed  = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
 	ttrace->entry_pending = false;
 
@@ -1499,7 +1522,7 @@ static int trace__sys_enter(struct trace
 
 	if (sc->is_exit) {
 		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
-			trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output);
+			trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
 			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
 		}
 	} else {
@@ -1547,6 +1570,7 @@ static int trace__sys_exit(struct trace
 {
 	long ret;
 	u64 duration = 0;
+	bool duration_calculated = false;
 	struct thread *thread;
 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
@@ -1577,6 +1601,7 @@ static int trace__sys_exit(struct trace
 		duration = sample->time - ttrace->entry_time;
 		if (trace__filter_duration(trace, duration))
 			goto out;
+		duration_calculated = true;
 	} else if (trace->duration_filter)
 		goto out;
 
@@ -1592,7 +1617,7 @@ static int trace__sys_exit(struct trace
 	if (trace->summary_only)
 		goto out;
 
-	trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output);
+	trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
 
 	if (ttrace->entry_pending) {
 		fprintf(trace->output, "%-70s", ttrace->entry_str);
@@ -1855,7 +1880,7 @@ static int trace__pgfault(struct trace *
 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
 
-	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
+	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
 
 	fprintf(trace->output, "%sfault [",
 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?


Patches currently in stable-queue which might be from acme@xxxxxxxxxx are

queue-4.9/perf-buildid-do-not-assume-that-readlink-returns-a-null-terminated-string.patch
queue-4.9/perf-session-don-t-rely-on-evlist-in-pipe-mode.patch
queue-4.9/perf-tools-make-perf_event__synthesize_mmap_events-scale.patch
queue-4.9/perf-annotate-fix-a-bug-following-symbolic-link-of-a-build-id-file.patch
queue-4.9/perf-stat-issue-a-hw-watchdog-disable-hint.patch
queue-4.9/perf-sort-fix-segfault-with-basic-block-cycles-sort-dimension.patch
queue-4.9/perf-trace-handle-unpaired-raw_syscalls-sys_exit-event.patch
queue-4.9/perf-probe-fix-concat_probe_trace_events.patch
queue-4.9/perf-inject-copy-events-when-reordering-events-in-pipe-mode.patch
queue-4.9/perf-probe-return-errno-when-not-hitting-any-event.patch
queue-4.9/perf-evsel-return-exact-sub-event-which-failed-with-eperm-for-wildcards.patch
queue-4.9/perf-stat-fix-bug-in-handling-events-in-error-state.patch