Hi Song, Thanks for this work! Some remarks (mostly nitpicks) inline. 2020-02-28 15:40 UTC-0800 ~ Song Liu <songliubraving@xxxxxx> > With fentry/fexit programs, it is possible to profile BPF program with > hardware counters. Introduce bpftool "prog profile", which measures key > metrics of a BPF program. > > bpftool prog profile command creates per-cpu perf events. Then it attaches > fentry/fexit programs to the target BPF program. The fentry program saves > perf event value to a map. The fexit program reads the perf event again, > and calculates the difference, which is the instructions/cycles used by > the target program. > > Example input and output: > > ./bpftool prog profile 3 id 337 cycles instructions llc_misses > > 4228 run_cnt > 3403698 cycles (84.08%) > 3525294 instructions # 1.04 insn per cycle (84.05%) > 13 llc_misses # 3.69 LLC misses per million isns (83.50%) > > This command measures cycles and instructions for BPF program with id > 337 for 3 seconds. The program has triggered 4228 times. The rest of the > output is similar to perf-stat. In this example, the counters were only > counting ~84% of the time because of time multiplexing of perf counters. > > Note that, this approach measures cycles and instructions in very small > increments. So the fentry/fexit programs introduce noticeable errors to > the measurement results. > > The fentry/fexit programs are generated with BPF skeletons. Therefore, we > build bpftool twice. The first time _bpftool is built without skeletons. > Then, _bpftool is used to generate the skeletons. The second time, bpftool > is built with skeletons. > > Signed-off-by: Song Liu <songliubraving@xxxxxx> > --- [...] > diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c > index 1996e67a2f00..39f0f14464ad 100644 > --- a/tools/bpf/bpftool/prog.c > +++ b/tools/bpf/bpftool/prog.c > @@ -10,12 +10,16 @@ > #include <string.h> > #include <time.h> > #include <unistd.h> > +#include <signal.h> > #include <net/if.h> > #include <sys/types.h> > #include <sys/stat.h> > +#include <sys/ioctl.h> > +#include <sys/syscall.h> > > #include <linux/err.h> > #include <linux/sizes.h> > +#include <linux/perf_event.h> Nit: Could you please keep the includes sorted alphabetically in each category (standard, sys/, linux/)? > > #include <bpf/bpf.h> > #include <bpf/btf.h> > @@ -1537,6 +1541,425 @@ static int do_loadall(int argc, char **argv) > return load_with_options(argc, argv, false); > } > > +#ifdef BPFTOOL_WITHOUT_SKELETONS > + > +static int do_profile(int argc, char **argv) > +{ > + return 0; > +} > + > +#else /* BPFTOOL_WITHOUT_SKELETONS */ > + > +#include "profiler.skel.h" > + > +#define SAMPLE_PERIOD 0x7fffffffffffffffULL > +struct profile_metric { > + const char *name; > + struct perf_event_attr attr; > + bool selected; > + struct bpf_perf_event_value val; > + > + /* calculate ratios like instructions per cycle */ > + const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */ Nit: Double space " */" > + const float ratio_mul; > + const char *ratio_desc; > +} metrics[] = { > + { > + .name = "cycles", > + .attr = { > + .freq = 0, > + .sample_period = SAMPLE_PERIOD, > + .inherit = 0, > + .type = PERF_TYPE_HARDWARE, > + .read_format = 0, > + .sample_type = 0, > + .config = PERF_COUNT_HW_CPU_CYCLES, > + }, > + }, > + { > + .name = "instructions", > + .attr = { > + .freq = 0, > + .sample_period = SAMPLE_PERIOD, > + .inherit = 0, > + .type = PERF_TYPE_HARDWARE, > + .read_format = 0, > + .sample_type = 0, > + .config = PERF_COUNT_HW_INSTRUCTIONS, > + }, > + .ratio_metric = 1, > + .ratio_mul = 1.0, > + .ratio_desc = "insn per cycle", > + }, > + { > + .name = "l1d_loads", > + .attr = { > + .freq = 0, > + .sample_period = SAMPLE_PERIOD, > + .inherit = 0, > + .type = PERF_TYPE_HW_CACHE, > + .read_format = 0, > + .sample_type = 0, > + .config = > + PERF_COUNT_HW_CACHE_L1D | > + (PERF_COUNT_HW_CACHE_OP_READ << 8) | > + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), > + }, > + }, > + { > + .name = "llc_misses", > + .attr = { > + .freq = 0, > + .sample_period = SAMPLE_PERIOD, > + .inherit = 0, > + .type = PERF_TYPE_HW_CACHE, > + .read_format = 0, > + .sample_type = 0, > + .config = > + PERF_COUNT_HW_CACHE_LL | > + (PERF_COUNT_HW_CACHE_OP_READ << 8) | > + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), > + }, > + .ratio_metric = 2, > + .ratio_mul = 1e6, > + .ratio_desc = "LLC misses per million isns", s/isns/insns/, to remain consistent with "insn per cycle"? (And if you change it, please update the example in man page) > + }, > +}; > + > +u64 profile_total_count; We should probably stick to __u64 (and __u32, etc.) in bpftool. > + > +#define MAX_NUM_PROFILE_METRICS 4 > + > +static int profile_parse_metrics(int argc, char **argv) > +{ > + unsigned int metric_cnt; > + int selected_cnt = 0; > + unsigned int i; > + > + metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); > + > + while (argc > 0) { > + for (i = 0; i < metric_cnt; i++) { > + if (strcmp(argv[0], metrics[i].name) == 0) { Could we use prefixing (is_prefix() instead of strcmp()), as for the other bpftool commands and keywords? > + if (!metrics[i].selected) > + selected_cnt++; > + metrics[i].selected = true; > + break; > + } > + } > + if (i == metric_cnt) { > + p_err("unknown metric %s", argv[0]); > + return -1; > + } > + NEXT_ARG(); > + } > + if (selected_cnt > MAX_NUM_PROFILE_METRICS) { > + p_err("too many (%d) metrics, please specify no more than %d metrics at at time", > + selected_cnt, MAX_NUM_PROFILE_METRICS); > + return -1; > + } > + return selected_cnt; > +} [...] > + > +static int do_profile(int argc, char **argv) > +{ > + int num_metric, num_cpu, err = -1; > + struct bpf_program *prog; > + unsigned long duration; > + char *endptr; > + > + /* we at least need: <duration>, "id", <id>, <metric> */ Not necessarily "id" (can be "tag", "pinned", "name") but ok, probably doesn't matter in the comment. More important: I thought "duration" was optional? > + if (argc < 4) > + usage(); Or, as in other subcommands: if (!REQ_ARGS(4)) return -EINVAL; > + > + /* parse profiling duration */ > + duration = strtoul(*argv, &endptr, 0); > + if (*endptr) > + duration = PROFILE_DEFAULT_LONG_DURATION; > + else > + NEXT_ARG(); If the field is optional, maybe it could be interesting to add a keyword to the command line before passing it ("duration <n>"). It may turn helpful if we add other optional parameters in the future, to know which one we are passing (and might even help with bash completion). > + > + /* parse target fd */ > + profile_tgt_fd = prog_parse_fd(&argc, &argv); > + if (profile_tgt_fd < 0) { > + p_err("failed to parse fd"); > + return -1; > + } I would be tempted to have program handle _before_ duration on the command line, what do you think? bpftool prog profile <mandatory_prog> <profile_options> <metrics> > + > + num_metric = profile_parse_metrics(argc, argv); > + if (num_metric <= 0) > + goto out; > + > + num_cpu = libbpf_num_possible_cpus(); > + if (num_cpu <= 0) { > + p_err("failed to identify number of CPUs"); > + goto out; > + } > + > + profile_obj = profiler_bpf__open(); > + if (!profile_obj) { > + p_err("failed to open and/or load BPF object"); > + goto out; > + } > + > + profile_obj->rodata->num_cpu = num_cpu; > + profile_obj->rodata->num_metric = num_metric; > + > + /* adjust map sizes */ > + bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu); > + bpf_map__resize(profile_obj->maps.fentry_readings, num_metric); > + bpf_map__resize(profile_obj->maps.accum_readings, num_metric); > + bpf_map__resize(profile_obj->maps.counts, 1); > + > + /* change target name */ > + profile_tgt_name = profile_target_name(profile_tgt_fd); > + if (!profile_tgt_name) { > + p_err("failed to load target function name"); > + goto out; > + } > + > + bpf_object__for_each_program(prog, profile_obj->obj) { > + err = bpf_program__set_attach_target(prog, profile_tgt_fd, > + profile_tgt_name); > + if (err) { > + p_err("failed to set attach target\n"); > + goto out; > + } > + } > + > + set_max_rlimit(); > + err = profiler_bpf__load(profile_obj); > + if (err) { > + p_err("failed to load profile_obj"); > + goto out; > + } > + > + profile_open_perf_events(profile_obj); > + > + err = profiler_bpf__attach(profile_obj); > + if (err) { > + p_err("failed to attach profile_obj"); > + goto out; > + } > + signal(SIGINT, int_exit); > + > + sleep(duration); > + profile_print_and_cleanup(); > + return 0; > +out: > + close(profile_tgt_fd); > + free(profile_tgt_name); > + return err; > +} > + > +#endif /* BPFTOOL_WITHOUT_SKELETONS */ > + > static int do_help(int argc, char **argv) > { > if (json_output) { > @@ -1560,6 +1983,7 @@ static int do_help(int argc, char **argv) > " [data_out FILE [data_size_out L]] \\\n" > " [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n" > " [repeat N]\n" > + " %s %s profile [DURATION] PROG METRICs\n" > " %s %s tracelog\n" > " %s %s help\n" > "\n" > @@ -1578,11 +2002,12 @@ static int do_help(int argc, char **argv) > " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" > " flow_dissector }\n" > " " HELP_SPEC_OPTIONS "\n" > + " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n" Can we keep HELP_SPEC_OPTIONS at the end (below METRICS) please? > "", > bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], > bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], > bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], > - bin_name, argv[-2]); > + bin_name, argv[-2], bin_name, argv[-2]); > > return 0; > } > @@ -1599,6 +2024,7 @@ static const struct cmd cmds[] = { > { "detach", do_detach }, > { "tracelog", do_tracelog }, > { "run", do_run }, > + { "profile", do_profile }, > { 0 } > }; > > diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c > new file mode 100644 > index 000000000000..abd3a7aacc1f > --- /dev/null > +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c > @@ -0,0 +1,171 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright (c) 2019 Facebook 2019? > +#include "profiler.h" > +#include <linux/bpf.h> > +#include <bpf/bpf_helpers.h> [...] > diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h > new file mode 100644 > index 000000000000..ae15cb0c4d43 > --- /dev/null > +++ b/tools/bpf/bpftool/skeleton/profiler.h > @@ -0,0 +1,47 @@ > +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ > +#ifndef __PROFILER_H > +#define __PROFILER_H > + > +/* useful typedefs from vimlinux.h */ Typo: vimlinux.h