On Wed, Nov 17, 2021 at 5:07 PM Joanne Koong <joannekoong@xxxxxx> wrote: > > In this patch - > 1) Add a new prog "for_each_helper" which tests the basic functionality of > the bpf_for_each helper. > > 2) Add pyperf600_foreach and strobemeta_foreach to test the performance > of using bpf_for_each instead of a for loop > > The results of pyperf600 and strobemeta are as follows: > > ~strobemeta~ > > Baseline > verification time 6808200 usec > stack depth 496 > processed 592132 insns (limit 1000000) max_states_per_insn 14 > total_states 16018 peak_states 13684 mark_read 3132 > #188 verif_scale_strobemeta:OK (unrolled loop) > > Using bpf_for_each > verification time 31589 usec > stack depth 96+408 > processed 1630 insns (limit 1000000) max_states_per_insn 4 > total_states 107 peak_states 107 mark_read 60 > #189 verif_scale_strobemeta_foreach:OK > > ~pyperf600~ > > Baseline > verification time 29702486 usec > stack depth 368 > processed 626838 insns (limit 1000000) max_states_per_insn 7 > total_states 30368 peak_states 30279 mark_read 748 > #182 verif_scale_pyperf600:OK (unrolled loop) > > Using bpf_for_each > verification time 148488 usec 200x, this is awesome > stack depth 320+40 > processed 10518 insns (limit 1000000) max_states_per_insn 10 > total_states 705 peak_states 517 mark_read 38 > #183 verif_scale_pyperf600_foreach:OK > > Using the bpf_for_each helper led to approximately a 100% decrease > in the verification time and in the number of instructions. > > Signed-off-by: Joanne Koong <joannekoong@xxxxxx> > --- > .../bpf/prog_tests/bpf_verif_scale.c | 12 +++ > .../selftests/bpf/prog_tests/for_each.c | 61 ++++++++++++++++ > .../selftests/bpf/progs/for_each_helper.c | 69 ++++++++++++++++++ > tools/testing/selftests/bpf/progs/pyperf.h | 70 +++++++++++++++++- > .../selftests/bpf/progs/pyperf600_foreach.c | 5 ++ > .../testing/selftests/bpf/progs/strobemeta.h | 73 ++++++++++++++++++- > .../selftests/bpf/progs/strobemeta_foreach.c | 9 +++ let's split out strobemeta and pyperf refactorings into a separate patch from dedicated tests for bpf_for_each helper, they are logically quite different and independent > 7 files changed, 295 insertions(+), 4 deletions(-) > create mode 100644 tools/testing/selftests/bpf/progs/for_each_helper.c > create mode 100644 tools/testing/selftests/bpf/progs/pyperf600_foreach.c > create mode 100644 tools/testing/selftests/bpf/progs/strobemeta_foreach.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c > index 867349e4ed9e..77396484fde7 100644 > --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c > @@ -115,6 +115,12 @@ void test_verif_scale_pyperf600() > scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); > } > > +void test_verif_scale_pyperf600_foreach(void) > +{ > + /* use the bpf_for_each helper*/ > + scale_test("pyperf600_foreach.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); > +} > + > void test_verif_scale_pyperf600_nounroll() > { > /* no unroll at all. > @@ -165,6 +171,12 @@ void test_verif_scale_strobemeta() > scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); > } > > +void test_verif_scale_strobemeta_foreach(void) > +{ > + /* use the bpf_for_each helper*/ > + scale_test("strobemeta_foreach.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); > +} > + > void test_verif_scale_strobemeta_nounroll1() > { > /* no unroll, tiny loops */ > diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c > index 68eb12a287d4..529573a82334 100644 > --- a/tools/testing/selftests/bpf/prog_tests/for_each.c > +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c > @@ -4,6 +4,7 @@ > #include <network_helpers.h> > #include "for_each_hash_map_elem.skel.h" > #include "for_each_array_map_elem.skel.h" > +#include "for_each_helper.skel.h" > > static unsigned int duration; > > @@ -121,10 +122,70 @@ static void test_array_map(void) > for_each_array_map_elem__destroy(skel); > } > > +static void test_for_each_helper(void) > +{ > + struct for_each_helper *skel; > + __u32 retval; > + int err; > + > + skel = for_each_helper__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "for_each_helper__open_and_load")) > + return; > + > + skel->bss->nr_iterations = 100; > + err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_prog), > + 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, > + &retval, &duration); > + if (CHECK(err || retval, "bpf_for_each helper test_prog", please don't use CHECK() in new test, stick to ASSERT_XXX() > + "err %d errno %d retval %d\n", err, errno, retval)) > + goto out; > + ASSERT_EQ(skel->bss->nr_iterations_completed, skel->bss->nr_iterations, > + "nr_iterations mismatch"); > + ASSERT_EQ(skel->bss->g_output, (100 * 99) / 2, "wrong output"); > + > + /* test callback_fn returning 1 to stop iteration */ > + skel->bss->nr_iterations = 400; > + skel->data->stop_index = 50; > + err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_prog), > + 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, > + &retval, &duration); > + if (CHECK(err || retval, "bpf_for_each helper test_prog", > + "err %d errno %d retval %d\n", err, errno, retval)) > + goto out; > + ASSERT_EQ(skel->bss->nr_iterations_completed, skel->data->stop_index + 1, > + "stop_index not followed"); > + ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2, "wrong output"); > + > + /* test passing in a null ctx */ > + skel->bss->nr_iterations = 10; > + err = bpf_prog_test_run(bpf_program__fd(skel->progs.prog_null_ctx), > + 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, > + &retval, &duration); > + if (CHECK(err || retval, "bpf_for_each helper prog_null_ctx", > + "err %d errno %d retval %d\n", err, errno, retval)) > + goto out; > + ASSERT_EQ(skel->bss->nr_iterations_completed, skel->bss->nr_iterations, > + "nr_iterations mismatch"); > + > + /* test invalid flags */ > + err = bpf_prog_test_run(bpf_program__fd(skel->progs.prog_invalid_flags), > + 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, > + &retval, &duration); > + if (CHECK(err || retval, "bpf_for_each helper prog_invalid_flags", > + "err %d errno %d retval %d\n", err, errno, retval)) > + goto out; > + ASSERT_EQ(skel->bss->err, -EINVAL, "invalid_flags"); > + > +out: > + for_each_helper__destroy(skel); > +} > + > void test_for_each(void) > { > if (test__start_subtest("hash_map")) > test_hash_map(); > if (test__start_subtest("array_map")) > test_array_map(); > + if (test__start_subtest("for_each_helper")) > + test_for_each_helper(); those hash_map and array_map are conceptually very different tests, it's probably best to have a separate file under prog_tests dedicated to this new helper. > } > diff --git a/tools/testing/selftests/bpf/progs/for_each_helper.c b/tools/testing/selftests/bpf/progs/for_each_helper.c > new file mode 100644 > index 000000000000..4404d0cb32a6 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/for_each_helper.c > @@ -0,0 +1,69 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2021 Facebook */ > + > +#include "vmlinux.h" > +#include <bpf/bpf_helpers.h> > + > +char _license[] SEC("license") = "GPL"; > + > +struct callback_ctx { > + int output; > +}; > + > +/* This should be set by the user program */ > +u32 nr_iterations; > +u32 stop_index = -1; > + > +/* Making these global variables so that the userspace program > + * can verify the output through the skeleton > + */ > +int nr_iterations_completed; > +int g_output; > +int err; > + > +static int callback_fn(__u32 index, void *data) > +{ > + struct callback_ctx *ctx = data; > + > + if (index >= stop_index) > + return 1; > + > + ctx->output += index; > + > + return 0; > +} > + > +static int empty_callback_fn(__u32 index, void *data) > +{ > + return 0; > +} > + > +SEC("tc") > +int test_prog(struct __sk_buff *skb) > +{ > + struct callback_ctx data = {}; > + > + nr_iterations_completed = bpf_for_each(nr_iterations, callback_fn, &data, 0); > + > + g_output = data.output; > + > + return 0; > +} > + > +SEC("tc") > +int prog_null_ctx(struct __sk_buff *skb) > +{ > + nr_iterations_completed = bpf_for_each(nr_iterations, empty_callback_fn, NULL, 0); > + > + return 0; > +} > + > +SEC("tc") > +int prog_invalid_flags(struct __sk_buff *skb) > +{ > + struct callback_ctx data = {}; > + > + err = bpf_for_each(nr_iterations, callback_fn, &data, 1); > + > + return 0; > +} You mentioned that nested loops works, let's have a test for that. [...]