Add benchmark to measure the overhead of the bpf_for_each call for a specified number of iterations. Testing this on qemu on my dev machine on 1 thread, the data is as follows: nr_iterations: 1 bpf_for_each helper - total callbacks called: 42.949 ± 1.404M/s nr_iterations: 10 bpf_for_each helper - total callbacks called: 73.645 ± 2.077M/s nr_iterations: 100 bpf_for_each helper - total callbacks called: 73.058 ± 1.256M/s nr_iterations: 500 bpf_for_each helper - total callbacks called: 78.255 ± 2.845M/s nr_iterations: 1000 bpf_for_each helper - total callbacks called: 79.439 ± 1.805M/s nr_iterations: 5000 bpf_for_each helper - total callbacks called: 81.639 ± 2.053M/s nr_iterations: 10000 bpf_for_each helper - total callbacks called: 80.577 ± 1.824M/s nr_iterations: 50000 bpf_for_each helper - total callbacks called: 76.773 ± 1.578M/s nr_iterations: 100000 bpf_for_each helper - total callbacks called: 77.073 ± 2.200M/s nr_iterations: 500000 bpf_for_each helper - total callbacks called: 75.136 ± 0.552M/s nr_iterations: 1000000 bpf_for_each helper - total callbacks called: 76.364 ± 1.690M/s >From this data, we can see that we are able to run the loop at least 40 million times per second on an empty callback function. >From this data, we can also see that as the number of iterations increases, the overhead per iteration decreases and steadies towards a constant value. Signed-off-by: Joanne Koong <joannekoong@xxxxxx> --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/bench.c | 4 + .../selftests/bpf/benchs/bench_for_each.c | 105 ++++++++++++++++++ .../bpf/benchs/run_bench_for_each.sh | 16 +++ .../selftests/bpf/progs/for_each_helper.c | 13 +++ 5 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_for_each.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_for_each.sh diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f49cb5fc85af..b55fc72b8ef0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -537,7 +537,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o \ - $(OUTPUT)/bench_bloom_filter_map.o + $(OUTPUT)/bench_bloom_filter_map.o \ + $(OUTPUT)/bench_for_each.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index cc4722f693e9..d8b3d537a700 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -171,10 +171,12 @@ static const struct argp_option opts[] = { extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; +extern struct argp bench_for_each_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, + { &bench_for_each_argp, 0, "bpf_for_each helper benchmark", 0 }, {}, }; @@ -368,6 +370,7 @@ extern const struct bench bench_bloom_update; extern const struct bench bench_bloom_false_positive; extern const struct bench bench_hashmap_without_bloom; extern const struct bench bench_hashmap_with_bloom; +extern const struct bench bench_for_each_helper; static const struct bench *benchs[] = { &bench_count_global, @@ -394,6 +397,7 @@ static const struct bench *benchs[] = { &bench_bloom_false_positive, &bench_hashmap_without_bloom, &bench_hashmap_with_bloom, + &bench_for_each_helper, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/benchs/bench_for_each.c b/tools/testing/selftests/bpf/benchs/bench_for_each.c new file mode 100644 index 000000000000..3372d5b7d67b --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_for_each.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include "bench.h" +#include "for_each_helper.skel.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct for_each_helper *skel; +} ctx; + +static struct { + __u32 nr_iters; +} args = { + .nr_iters = 10, +}; + +enum { + ARG_NR_ITERS = 4000, +}; + +static const struct argp_option opts[] = { + { "nr_iters", ARG_NR_ITERS, "nr_iters", 0, + "Set number of iterations for the bpf_for_each helper"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_ITERS: + args.nr_iters = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_for_each_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) + /* trigger the bpf program */ + syscall(__NR_getpgid); + + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void setup(void) +{ + struct bpf_link *link; + + setup_libbpf(); + + ctx.skel = for_each_helper__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + ctx.skel->bss->nr_iterations = args.nr_iters; +} + +const struct bench bench_for_each_helper = { + .name = "for-each-helper", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_for_each.sh b/tools/testing/selftests/bpf/benchs/run_bench_for_each.sh new file mode 100755 index 000000000000..5f11a1ad66d3 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_for_each.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for t in 1 4 8 12 16; do +printf "\n" +for i in 1 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do +subtitle "nr_iterations: $i, nr_threads: $t" + summarize "bpf_for_each helper - total callbacks called: " \ + "$($RUN_BENCH -p $t --nr_iters $i for-each-helper)" + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/progs/for_each_helper.c b/tools/testing/selftests/bpf/progs/for_each_helper.c index 4404d0cb32a6..b95551d99f75 100644 --- a/tools/testing/selftests/bpf/progs/for_each_helper.c +++ b/tools/testing/selftests/bpf/progs/for_each_helper.c @@ -14,6 +14,8 @@ struct callback_ctx { u32 nr_iterations; u32 stop_index = -1; +long hits; + /* Making these global variables so that the userspace program * can verify the output through the skeleton */ @@ -67,3 +69,14 @@ int prog_invalid_flags(struct __sk_buff *skb) return 0; } + +SEC("fentry/__x64_sys_getpgid") +int benchmark(void *ctx) +{ + for (int i = 0; i < 1000; i++) { + bpf_for_each(nr_iterations, empty_callback_fn, NULL, 0); + + __sync_add_and_fetch(&hits, nr_iterations); + } + return 0; +} -- 2.30.2