The following commit has been merged into the perf/core branch of tip: Commit-ID: 2a4b51666af8bf0b67ccc2e53120bad27351917c Gitweb: https://git.kernel.org/tip/2a4b51666af8bf0b67ccc2e53120bad27351917c Author: Ian Rogers <irogers@xxxxxxxxxx> AuthorDate: Thu, 02 Apr 2020 08:43:53 -07:00 Committer: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> CommitterDate: Thu, 16 Apr 2020 12:19:12 -03:00 perf bench: Add event synthesis benchmark Event synthesis may occur at the start or end (tail) of a perf command. In system-wide mode it can scan every process in /proc, which may add seconds of latency before event recording. Add a new benchmark that times how long event synthesis takes with and without data synthesis. An example execution looks like: $ perf bench internals synthesize # Running 'internals/synthesize' benchmark: Average synthesis took: 168.253800 usec Average data synthesis took: 208.104700 usec Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx> Acked-by: Jiri Olsa <jolsa@xxxxxxxxxx> Tested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx> Cc: Andrey Zhizhikin <andrey.z@xxxxxxxxx> Cc: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> Cc: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx> Cc: Mark Rutland <mark.rutland@xxxxxxx> Cc: Namhyung Kim <namhyung@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Petr Mladek <pmladek@xxxxxxxx> Cc: Stephane Eranian <eranian@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Link: http://lore.kernel.org/lkml/20200402154357.107873-2-irogers@xxxxxxxxxx Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> --- tools/perf/Documentation/perf-bench.txt | 8 ++- tools/perf/bench/Build | 2 +- tools/perf/bench/bench.h | 2 +- tools/perf/bench/synthesize.c | 101 +++++++++++++++++++++++- tools/perf/builtin-bench.c | 6 +- 5 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 tools/perf/bench/synthesize.c diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 0921a3c..bad1651 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -61,6 +61,9 @@ SUBSYSTEM 'epoll':: Eventpoll (epoll) stressing benchmarks. +'internals':: + Benchmark internal perf functionality. + 'all':: All benchmark subsystems. @@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls. *ctl*:: Suite for evaluating multiple epoll_ctl calls. +SUITES FOR 'internals' +~~~~~~~~~~~~~~~~~~~~~~ +*synthesize*:: +Suite for evaluating perf's event synthesis performance. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index e4e321b..0428273 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -6,9 +6,9 @@ perf-y += futex-wake.o perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o - perf-y += epoll-wait.o perf-y += epoll-ctl.o +perf-y += synthesize.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 4aa6de1..4d669c8 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -41,9 +41,9 @@ int bench_futex_wake_parallel(int argc, const char **argv); int bench_futex_requeue(int argc, const char **argv); /* pi futexes */ int bench_futex_lock_pi(int argc, const char **argv); - int bench_epoll_wait(int argc, const char **argv); int bench_epoll_ctl(int argc, const char **argv); +int bench_synthesize(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c new file mode 100644 index 0000000..6291257 --- /dev/null +++ b/tools/perf/bench/synthesize.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark synthesis of perf events such as at the start of a 'perf + * record'. Synthesis is done on the current process and the 'dummy' event + * handlers are invoked that support dump_trace but otherwise do nothing. + * + * Copyright 2019 Google LLC. + */ +#include <stdio.h> +#include "bench.h" +#include "../util/debug.h" +#include "../util/session.h" +#include "../util/synthetic-events.h" +#include "../util/target.h" +#include "../util/thread_map.h" +#include "../util/tool.h" +#include <linux/err.h> +#include <linux/time64.h> +#include <subcmd/parse-options.h> + +static unsigned int iterations = 10000; + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average"), + OPT_END() +}; + +static const char *const usage[] = { + "perf bench internals synthesize <options>", + NULL +}; + + +static int do_synthesize(struct perf_session *session, + struct perf_thread_map *threads, + struct target *target, bool data_mmap) +{ + const unsigned int nr_threads_synthesize = 1; + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double average; + int err; + + gettimeofday(&start, NULL); + for (i = 0; i < iterations; i++) { + err = machine__synthesize_threads(&session->machines.host, + target, threads, data_mmap, + nr_threads_synthesize); + if (err) + return err; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + average = (double)runtime_us/(double)iterations; + printf("Average %ssynthesis took: %f usec\n", + data_mmap ? "data " : "", average); + return 0; +} + +int bench_synthesize(int argc, const char **argv) +{ + struct perf_tool tool; + struct perf_session *session; + struct target target = { + .pid = "self", + }; + struct perf_thread_map *threads; + int err; + + argc = parse_options(argc, argv, options, usage, 0); + + session = perf_session__new(NULL, false, NULL); + if (IS_ERR(session)) { + pr_err("Session creation failed.\n"); + return PTR_ERR(session); + } + threads = thread_map__new_by_pid(getpid()); + if (!threads) { + pr_err("Thread map creation failed.\n"); + err = -ENOMEM; + goto err_out; + } + perf_tool__fill_defaults(&tool); + + err = do_synthesize(session, threads, &target, false); + if (err) + goto err_out; + + err = do_synthesize(session, threads, &target, true); + +err_out: + if (threads) + perf_thread_map__put(threads); + + perf_session__delete(session); + return err; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index c06fe21..11c79a8 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -76,6 +76,11 @@ static struct bench epoll_benchmarks[] = { }; #endif // HAVE_EVENTFD +static struct bench internals_benchmarks[] = { + { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, + { NULL, NULL, NULL } +}; + struct collection { const char *name; const char *summary; @@ -92,6 +97,7 @@ static struct collection collections[] = { #ifdef HAVE_EVENTFD {"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, #endif + { "internals", "Perf-internals benchmarks", internals_benchmarks }, { "all", "All benchmarks", NULL }, { NULL, NULL, NULL } };