Waiting for data to be available on the trace ring-buffers may trigger IPIs. This might generate unacceptable trace noise when debugging low latency or real time systems. So introduce the poll option. When enabled, it forces trace-cmd to use O_NONBLOCK. The drawback to using it is that traces will be extracted by busy waiting, which will unnecessarily hog the CPUs, so only use when really needed. Signed-off-by: Nicolas Saenz Julienne <nsaenzju@xxxxxxxxxx> --- NOTE: I'm aware that TRACECMD_RECORD_POLL kind of clashes with TRACECMD_RECORD_BLOCK. That's why I renamed it. There might be a clean way to merge them into a single flag, but it's not clear to me why was TRACECMD_RECORD_BLOCK needed in the first place (since it only affects splicing). .../trace-cmd/trace-cmd-record.1.txt | 7 +++++ .../include/private/trace-cmd-private.h | 3 +- lib/trace-cmd/trace-recorder.c | 29 ++++++++++--------- tracecmd/trace-record.c | 8 ++++- tracecmd/trace-usage.c | 1 + 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/Documentation/trace-cmd/trace-cmd-record.1.txt b/Documentation/trace-cmd/trace-cmd-record.1.txt index 55a8891..5d063cc 100644 --- a/Documentation/trace-cmd/trace-cmd-record.1.txt +++ b/Documentation/trace-cmd/trace-cmd-record.1.txt @@ -360,6 +360,13 @@ OPTIONS executed will not be changed. This is useful if you want to monitor the output of the command being executed, but not see the output from trace-cmd. +*--poll*:: + Waiting for data to be available on the trace ring-buffers may trigger + IPIs. This might generate unacceptable trace noise when tracing low latency + or real time systems. The poll option forces trace-cmd to use O_NONBLOCK. + Traces are extracted by busy waiting, which will hog the CPUs, so only use + when really needed. + EXAMPLES -------- diff --git a/lib/trace-cmd/include/private/trace-cmd-private.h b/lib/trace-cmd/include/private/trace-cmd-private.h index 42e739f..5c2ab4c 100644 --- a/lib/trace-cmd/include/private/trace-cmd-private.h +++ b/lib/trace-cmd/include/private/trace-cmd-private.h @@ -319,8 +319,9 @@ struct tracecmd_output *tracecmd_get_output_handle_fd(int fd); enum { TRACECMD_RECORD_NOSPLICE = (1 << 0), /* Use read instead of splice */ TRACECMD_RECORD_SNAPSHOT = (1 << 1), /* Extract from snapshot */ - TRACECMD_RECORD_BLOCK = (1 << 2), /* Block on splice write */ + TRACECMD_RECORD_BLOCK_SPLICE = (1 << 2), /* Block on splice write */ TRACECMD_RECORD_NOBRASS = (1 << 3), /* Splice directly without a brass pipe */ + TRACECMD_RECORD_POLL = (1 << 4), /* Use O_NONBLOCK, poll trace buffers */ }; void tracecmd_free_recorder(struct tracecmd_recorder *recorder); diff --git a/lib/trace-cmd/trace-recorder.c b/lib/trace-cmd/trace-recorder.c index 0caa124..c833378 100644 --- a/lib/trace-cmd/trace-recorder.c +++ b/lib/trace-cmd/trace-recorder.c @@ -112,6 +112,18 @@ void tracecmd_free_recorder(struct tracecmd_recorder *recorder) free(recorder); } +static void set_nonblock(struct tracecmd_recorder *recorder) +{ + long flags; + + /* Do not block on reads */ + flags = fcntl(recorder->trace_fd, F_GETFL); + fcntl(recorder->trace_fd, F_SETFL, flags | O_NONBLOCK); + + /* Do not block on streams */ + recorder->fd_flags |= SPLICE_F_NONBLOCK; +} + struct tracecmd_recorder * tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags, const char *buffer, int maxkb) @@ -130,7 +142,7 @@ tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags, recorder->fd_flags = SPLICE_F_MOVE; - if (!(recorder->flags & TRACECMD_RECORD_BLOCK)) + if (!(recorder->flags & TRACECMD_RECORD_BLOCK_SPLICE)) recorder->fd_flags |= SPLICE_F_NONBLOCK; recorder->trace_fd_flags = SPLICE_F_MOVE; @@ -198,6 +210,9 @@ tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags, recorder->pipe_size = pipe_size; } + if (recorder->flags & TRACECMD_RECORD_POLL) + set_nonblock(recorder); + return recorder; out_free: @@ -504,18 +519,6 @@ static long move_data(struct tracecmd_recorder *recorder) return splice_data(recorder); } -static void set_nonblock(struct tracecmd_recorder *recorder) -{ - long flags; - - /* Do not block on reads for flushing */ - flags = fcntl(recorder->trace_fd, F_GETFL); - fcntl(recorder->trace_fd, F_SETFL, flags | O_NONBLOCK); - - /* Do not block on streams for write */ - recorder->fd_flags |= SPLICE_F_NONBLOCK; -} - long tracecmd_flush_recording(struct tracecmd_recorder *recorder) { char buf[recorder->page_size]; diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c index bf91189..60ee5fb 100644 --- a/tracecmd/trace-record.c +++ b/tracecmd/trace-record.c @@ -3333,7 +3333,7 @@ create_recorder_instance_pipe(struct buffer_instance *instance, int cpu, int *brass) { struct tracecmd_recorder *recorder; - unsigned flags = recorder_flags | TRACECMD_RECORD_BLOCK; + unsigned flags = recorder_flags | TRACECMD_RECORD_BLOCK_SPLICE; char *path; path = tracefs_instance_get_dir(instance->tracefs); @@ -5757,6 +5757,7 @@ enum { OPT_module = 256, OPT_nofifos = 257, OPT_cmdlines_size = 258, + OPT_poll = 259, }; void trace_stop(int argc, char **argv) @@ -6172,6 +6173,7 @@ static void parse_record_options(int argc, {"tsync-interval", required_argument, NULL, OPT_tsyncinterval}, {"fork", no_argument, NULL, OPT_fork}, {"tsc2nsec", no_argument, NULL, OPT_tsc2nsec}, + {"poll", no_argument, NULL, OPT_poll}, {NULL, 0, NULL, 0} }; @@ -6593,6 +6595,10 @@ static void parse_record_options(int argc, die("TSC to nanosecond is not supported"); ctx->instance->flags |= BUFFER_FL_TSC2NSEC; break; + case OPT_poll: + cmd_check_die(ctx, CMD_set, *(argv+1), "--poll"); + recorder_flags |= TRACECMD_RECORD_POLL; + break; case OPT_quiet: case 'q': quiet = true; diff --git a/tracecmd/trace-usage.c b/tracecmd/trace-usage.c index 998f5a2..c0033ce 100644 --- a/tracecmd/trace-usage.c +++ b/tracecmd/trace-usage.c @@ -68,6 +68,7 @@ static struct usage_help usage_help[] = { " If a negative number is specified, timestamps synchronization is disabled" " If 0 is specified, no loop is performed - timestamps offset is calculated only twice," " at the beginnig and at the end of the trace\n" + " --poll don't block while reading from the trace buffer\n" }, { "set", -- 2.31.1