Re: [PATCH bpf-next v4 4/5] bpf/benchs: Add benchmark tests for bloom filter throughput + false positive

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Fri, 8 Oct 2021 19:39:39 -0700

On Wed, Oct 6, 2021 at 3:27 PM Joanne Koong <joannekoong@xxxxxx> wrote:
>
> This patch adds benchmark tests for the throughput (for lookups + updates)
> and the false positive rate of bloom filter lookups, as well as some
> minor refactoring of the bash script for running the benchmarks.
>
> These benchmarks show that as the number of hash functions increases,
> the throughput and the false positive rate of the bloom filter decreases.
> From the benchmark data, the approximate average false-positive rates for
> 8-byte values are roughly as follows:
>
> 1 hash function = ~30%
> 2 hash functions = ~15%
> 3 hash functions = ~5%
> 4 hash functions = ~2.5%
> 5 hash functions = ~1%
> 6 hash functions = ~0.5%
> 7 hash functions  = ~0.35%
> 8 hash functions = ~0.15%
> 9 hash functions = ~0.1%
> 10 hash functions = ~0%
>
> Signed-off-by: Joanne Koong <joannekoong@xxxxxx>
> ---
>  tools/testing/selftests/bpf/Makefile          |   6 +-
>  tools/testing/selftests/bpf/bench.c           |  37 ++
>  tools/testing/selftests/bpf/bench.h           |   3 +
>  .../bpf/benchs/bench_bloom_filter_map.c       | 411 ++++++++++++++++++
>  .../bpf/benchs/run_bench_bloom_filter_map.sh  |  28 ++
>  .../bpf/benchs/run_bench_ringbufs.sh          |  30 +-
>  .../selftests/bpf/benchs/run_common.sh        |  48 ++
>  tools/testing/selftests/bpf/bpf_util.h        |  11 +
>  .../selftests/bpf/progs/bloom_filter_bench.c  | 146 +++++++
>  9 files changed, 690 insertions(+), 30 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
>  create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
>  create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh
>  create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_bench.c
>

[...]

> +static struct ctx {
> +       struct bloom_filter_bench *skel;
> +
> +       int bloom_filter_fd;
> +       int hashmap_fd;
> +       int array_map_fd;
> +
> +       pthread_mutex_t map_done_mtx;
> +       pthread_cond_t map_done;
> +       bool map_prepare_err;
> +
> +       __u32 next_map_idx;
> +

nit: unnecessary empty line

> +} ctx = {
> +       .map_done_mtx = PTHREAD_MUTEX_INITIALIZER,
> +       .map_done = PTHREAD_COND_INITIALIZER,
> +};
> +

[...]

> +
> +static void populate_maps(void)
> +{
> +       unsigned int nr_cpus = bpf_num_possible_cpus();
> +       pthread_t map_thread;
> +       int i, err;
> +
> +       ctx.bloom_filter_fd = bpf_map__fd(ctx.skel->maps.bloom_filter_map);
> +       ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap);
> +       ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map);
> +
> +       for (i = 0; i < nr_cpus; i++) {
> +               err = pthread_create(&map_thread, NULL, map_prepare_thread,
> +                                    NULL);
> +               if (err) {
> +                       fprintf(stderr, "failed to create pthread: %d\n", -errno);
> +                       exit(1);
> +               }
> +       }
> +
> +       pthread_mutex_lock(&ctx.map_done_mtx);
> +       pthread_cond_wait(&ctx.map_done, &ctx.map_done_mtx);

This is a fragile way to use cond_wait. If prepare finishes faster
than you get to this cond_wait, you'll be stuck forevere. Also
cond_var can spuriously wake up, if I remember correctly. So the
pattern is usually to do
checking of some condition in a loop  (inside the locked region) and
if the condition doesn't hold, cond_wait on it (I renamed ctx.map_done
into ctx.map_done_cv):

pthread_mutex_lock(&ctx.map_done_mtx);
while (!ctx.map_done /* this is bool now */)
    pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx);
pthread_mutex_unlock(&ctx.map_done_mtx);

> +       pthread_mutex_unlock(&ctx.map_done_mtx);
> +
> +       if (ctx.map_prepare_err)
> +               exit(1);
> +}
> +
> +static struct bloom_filter_bench *setup_skeleton(bool hashmap_use_bloom_filter)
> +{
> +       struct bloom_filter_bench *skel;
> +       int err;
> +
> +       setup_libbpf();
> +
> +       skel = bloom_filter_bench__open();
> +       if (!skel) {
> +               fprintf(stderr, "failed to open skeleton\n");
> +               exit(1);
> +       }
> +
> +       skel->rodata->hashmap_use_bloom_filter = hashmap_use_bloom_filter;
> +
> +       /* Resize number of entries */
> +       err = bpf_map__resize(skel->maps.hashmap, args.nr_entries);
> +       if (err) {

These errors can't happen unless args.nr_entries is zero, so I'd just
drop them. But please use bpf_map__set_max_entries() instead,
bpf_map__resize() is going to be deprecated.

> +               fprintf(stderr, "failed to resize hashmap\n");
> +               exit(1);
> +       }
> +
> +       err = bpf_map__resize(skel->maps.array_map, args.nr_entries);
> +       if (err) {
> +               fprintf(stderr, "failed to resize array map\n");
> +               exit(1);
> +       }
> +
> +       err = bpf_map__resize(skel->maps.bloom_filter_map,
> +                             BPF_BLOOM_FILTER_BITSET_SZ(args.nr_entries,
> +                                                        args.nr_hash_funcs));
> +       if (err) {
> +               fprintf(stderr, "failed to resize bloom filter\n");
> +               exit(1);
> +       }
> +
> +       /* Set value size */
> +       err = bpf_map__set_value_size(skel->maps.array_map, args.value_size);
> +       if (err) {

same here, error can only happen if the map is already created in the
kernel, so be pragmatic and skip that (especially in benchmarks)

> +               fprintf(stderr, "failed to set array map value size\n");
> +               exit(1);
> +       }
> +
> +       err = bpf_map__set_value_size(skel->maps.bloom_filter_map, args.value_size);
> +       if (err) {
> +               fprintf(stderr, "failed to set bloom filter map value size\n");
> +               exit(1);
> +       }
> +

[...]

> diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
> index a3352a64c067..a260a963efda 100644
> --- a/tools/testing/selftests/bpf/bpf_util.h
> +++ b/tools/testing/selftests/bpf/bpf_util.h
> @@ -40,4 +40,15 @@ static inline unsigned int bpf_num_possible_cpus(void)
>         (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
>  #endif
>
> +/* Helper macro for computing the optimal number of bits for a
> + * bloom filter map.
> + *
> + * Mathematically, the optimal bitset size that minimizes the
> + * false positive probability is n * k / ln(2) where n is the expected
> + * number of unique entries in the bloom filter and k is the number of
> + * hash functions. We use 7 / 5 to approximate 1 / ln(2).
> + */
> +#define BPF_BLOOM_FILTER_BITSET_SZ(nr_uniq_entries, nr_hash_funcs) \
> +       ((nr_uniq_entries) * (nr_hash_funcs) / 5 * 7)

hm.. I thought you were going to add into include/linux/uapi/bpf.h,
why did you change your mind?

> +
>  #endif /* __BPF_UTIL__ */
> diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
> new file mode 100644
> index 000000000000..a44a47ddc4d7
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
> @@ -0,0 +1,146 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2021 Facebook */
> +
> +#include <errno.h>
> +#include <linux/bpf.h>
> +#include <stdbool.h>
> +#include <bpf/bpf_helpers.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +struct bpf_map;
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_ARRAY);
> +       __uint(key_size, sizeof(__u32));
> +       /* max entries and value_size will be set programmatically.
> +        * They are configurable from the userspace bench program.
> +        */
> +} array_map SEC(".maps");
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_BITSET);
> +       /* max entries,  value_size, and # of hash functions will be set
> +        * programmatically. They are configurable from the userspace
> +        * bench program.
> +        */
> +} bloom_filter_map SEC(".maps");
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_HASH);
> +       /* max entries, key_size, and value_size, will be set
> +        * programmatically. They are configurable from the userspace
> +        * bench program.
> +        */
> +} hashmap SEC(".maps");
> +
> +struct callback_ctx {
> +       struct bpf_map *map;
> +       bool update;
> +};
> +
> +/* Tracks the number of hits, drops, and false hits */
> +struct {
> +       __u32 stats[3];
> +} __attribute__((__aligned__(256))) percpu_stats[256];
> +
> +__u8 value_sz_nr_u32s;
> +
> +const __u32 hit_key  = 0;
> +const __u32 drop_key  = 1;
> +const __u32 false_hit_key = 2;
> +
> +const volatile bool hashmap_use_bloom_filter = true;
> +
> +int error = 0;
> +
> +static __always_inline void log_result(__u32 key)
> +{
> +       __u32 cpu = bpf_get_smp_processor_id();
> +
> +       percpu_stats[cpu & 255].stats[key]++;
> +}
> +
> +static __u64
> +bloom_filter_callback(struct bpf_map *map, __u32 *key, void *val,
> +                     struct callback_ctx *data)
> +{
> +       int err;
> +
> +       if (data->update)
> +               err = bpf_map_push_elem(data->map, val, 0);
> +       else
> +               err = bpf_map_peek_elem(data->map, val);
> +
> +       if (err) {
> +               error |= 1;
> +               return 1; /* stop the iteration */
> +       }
> +
> +       log_result(hit_key);
> +
> +       return 0;
> +}
> +
> +SEC("fentry/__x64_sys_getpgid")
> +int prog_bloom_filter_lookup(void *ctx)
> +{
> +       struct callback_ctx data;
> +
> +       data.map = (struct bpf_map *)&bloom_filter_map;
> +       data.update = false;
> +
> +       bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0);
> +
> +       return 0;
> +}
> +
> +SEC("fentry/__x64_sys_getpgid")
> +int prog_bloom_filter_update(void *ctx)
> +{
> +       struct callback_ctx data;
> +
> +       data.map = (struct bpf_map *)&bloom_filter_map;
> +       data.update = true;
> +
> +       bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0);
> +
> +       return 0;
> +}
> +
> +SEC("fentry/__x64_sys_getpgid")
> +int prog_bloom_filter_hashmap_lookup(void *ctx)
> +{
> +       __u64 *result;
> +       int i, j, err;
> +
> +       __u32 val[64] = {0};
> +
> +       for (i = 0; i < 1024; i++) {
> +               for (j = 0; j < value_sz_nr_u32s && j < 64; j++)
> +                       val[j] = bpf_get_prandom_u32();
> +
> +               if (hashmap_use_bloom_filter) {

this is purely subjective, so take it for what it is worth. Using full
"bloom_filter" everywhere is a bit mouthful and causes unnecessarily
long identifiers. I think "bloom" itself is very recognizable and
doesn't detract from readability (I'd claim it actually improves it).
When using a bench tool manually, having to type "bloom-filter-update"
if the equivalent "bloom-update" is just as good, would get old pretty
fast for me.

Similarly program names above, why "prog_" prefix? What does it
contribute except causes longer identifiers in skeleton?

> +                       err = bpf_map_peek_elem(&bloom_filter_map, val);
> +                       if (err) {
> +                               if (err != -ENOENT) {
> +                                       error |= 3;
> +                                       return 0;
> +                               }
> +                               log_result(hit_key);
> +                               continue;
> +                       }
> +               }
> +
> +               result = bpf_map_lookup_elem(&hashmap, val);
> +               if (result) {
> +                       log_result(hit_key);
> +               } else {
> +                       if (hashmap_use_bloom_filter)
> +                               log_result(false_hit_key);
> +                       log_result(drop_key);
> +               }
> +       }
> +
> +       return 0;
> +}
> --
> 2.30.2
>