The benchmark test is a tool to measure the different methods of iterating through bpf hashmaps, specifically the function bpf_map_lookup_batch() and the combination of the two functions bpf_get_next_key()/bpf_map_lookup_elem(). The test will be extended to also measure bpf_iter. The shell script bench_bpf_htab_batch_ops.sh runs the benchmark test with a range of parameters (e.g. the capacity of the hashmap, the number of entries put in the map, and the setting of the n_prefetch module parameter) Signed-off-by: Sagarika Sharma <sharmasagarika@xxxxxxxxxx> --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/bench.c | 26 +- .../bpf/benchs/bench_bpf_htab_batch_ops.c | 237 ++++++++++++++++++ .../benchs/run_bench_bpf_htab_batch_ops.sh | 28 +++ 4 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_htab_batch_ops.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bpf_htab_batch_ops.sh diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8d59ec7f4c2d..772d8339c400 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -589,7 +589,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_strncmp.o \ $(OUTPUT)/bench_bpf_hashmap_full_update.o \ $(OUTPUT)/bench_local_storage.o \ - $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o + $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \ + $(OUTPUT)/bench_bpf_htab_batch_ops.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index c1f20a147462..55714e8071c8 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -12,6 +12,8 @@ #include "bench.h" #include "testing_helpers.h" +#define STK_SIZE (0xfffffff) + struct env env = { .warmup_sec = 1, .duration_sec = 5, @@ -275,6 +277,7 @@ extern struct argp bench_bpf_loop_argp; extern struct argp bench_local_storage_argp; extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; +extern struct argp bench_bpf_htab_batch_ops_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -284,6 +287,7 @@ static const struct argp_child bench_parsers[] = { { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, { &bench_local_storage_rcu_tasks_trace_argp, 0, "local_storage RCU Tasks Trace slowdown benchmark", 0 }, + { &bench_bpf_htab_batch_ops_argp, 0, "bpf_htab_ops benchmark", 0}, {}, }; @@ -490,6 +494,8 @@ extern const struct bench bench_local_storage_cache_seq_get; extern const struct bench bench_local_storage_cache_interleaved_get; extern const struct bench bench_local_storage_cache_hashmap_control; extern const struct bench bench_local_storage_tasks_trace; +extern const struct bench bench_bpf_htab_batch_ops; +extern const struct bench bench_bpf_htab_element_ops; static const struct bench *benchs[] = { &bench_count_global, @@ -529,6 +535,8 @@ static const struct bench *benchs[] = { &bench_local_storage_cache_interleaved_get, &bench_local_storage_cache_hashmap_control, &bench_local_storage_tasks_trace, + &bench_bpf_htab_batch_ops, + &bench_bpf_htab_element_ops, }; static void setup_benchmark() @@ -585,7 +593,23 @@ static void setup_benchmark() env.prod_cpus.next_cpu = env.cons_cpus.next_cpu; for (i = 0; i < env.producer_cnt; i++) { - err = pthread_create(&state.producers[i], NULL, + pthread_attr_t attr_producer; + + err = pthread_attr_init(&attr_producer); + if (err) { + fprintf(stderr, "failed to initialize pthread attr #%d: %d\n", + i, -errno); + exit(1); + } + + err = pthread_attr_setstacksize(&attr_producer, STK_SIZE); + if (err) { + fprintf(stderr, "failed to set pthread stacksize #%d: %d\n", + i, -errno); + exit(1); + } + + err = pthread_create(&state.producers[i], &attr_producer, bench->producer_thread, (void *)(long)i); if (err) { fprintf(stderr, "failed to create producer thread #%d: %d\n", diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_htab_batch_ops.c b/tools/testing/selftests/bpf/benchs/bench_bpf_htab_batch_ops.c new file mode 100644 index 000000000000..ea98c2e97bff --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_htab_batch_ops.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <argp.h> +#include "bench.h" +#include <bpf_util.h> + +/* A hash table of the size DEFAULT_NUM_ENTRIES + * makes evident the effect of optimizing + * functions that iterate through the map + */ +#define DEFAULT_NUM_ENTRIES 40000 +#define VALUE_SIZE 4 + +int map_fd, method_flag, hits; + +static struct { + __u32 capacity; + __u32 num_entries; +} args = { + .capacity = DEFAULT_NUM_ENTRIES, + .num_entries = DEFAULT_NUM_ENTRIES, +}; + +enum { + ARG_CAPACITY = 8000, + ARG_NUM_ENTRIES = 8001, +}; + +static const struct argp_option opts[] = { + { "capacity", ARG_CAPACITY, "capacity", 0, + "Set hashtable capacity"}, + {"num_entries", ARG_NUM_ENTRIES, "num_entries", 0, + "Set number of entries in the hashtable"}, + {} +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_CAPACITY: + args.capacity = strtol(arg, NULL, 10); + break; + case ARG_NUM_ENTRIES: + args.num_entries = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_bpf_htab_batch_ops_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (args.num_entries > args.capacity) { + fprintf(stderr, "num_entries must be less than hash table capacity"); + exit(1); + } + + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static inline void loop_bpf_map_lookup_batch(void) +{ + int num_cpus = bpf_num_possible_cpus(); + typedef struct { int v[VALUE_SIZE]; /* padding */ } __bpf_percpu_val_align value[num_cpus]; + int offset = 0, out_batch = 0, in_batch = 0; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, operts, + .elem_flags = 0, + .flags = 0, + ); + value pcpu_values[args.num_entries]; + __u32 count = args.num_entries; + double keys[args.num_entries]; + int *in_batch_ptr = NULL; + int err; + + while (true) { + err = bpf_map_lookup_batch(map_fd, in_batch_ptr, &out_batch, + keys + offset, pcpu_values + offset, &count, &operts); + + if (err && errno != ENOENT) { + fprintf(stderr, "Failed to lookup entries using bpf_map_lookup_batch\n"); + exit(1); + } + + hits += count; + + if (count == args.num_entries) { + count = args.num_entries; + offset = out_batch = 0; + in_batch_ptr = NULL; + } else { + offset = count; + count = args.num_entries - count; + in_batch = out_batch; + in_batch_ptr = &in_batch; + } + } + +} + +static inline void loop_bpf_element_lookup(void) +{ + int num_cpus = bpf_num_possible_cpus(); + typedef struct { int v[VALUE_SIZE]; /* padding */ } __bpf_percpu_val_align value[num_cpus]; + double prev_key = -1, key; + value value_of_key; + int err; + + while (true) { + + while (bpf_map_get_next_key(map_fd, &prev_key, &key) == 0) { + err = bpf_map_lookup_elem(map_fd, &key, &value_of_key); + if (err) { + fprintf(stderr, "failed to lookup element using bpf_map_lookup_elem\n"); + exit(1); + } + hits += 1; + prev_key = key; + } + prev_key = -1; + + } + +} + +static void *producer(void *input) +{ + switch (method_flag) { + case 0: + loop_bpf_map_lookup_batch(); + break; + case 1: + loop_bpf_element_lookup(); + break; + } + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ + res->hits = hits; + hits = 0; +} + + +static void setup(void) +{ + + typedef struct { int v[VALUE_SIZE]; /* padding */ } __bpf_percpu_val_align value[bpf_num_possible_cpus()]; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, operts, + .elem_flags = 0, + .flags = 0, + ); + value pcpu_values[args.num_entries]; + __u32 count = args.num_entries; + double keys[args.num_entries]; + int err; + + map_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, "hash_map", sizeof(double), + (VALUE_SIZE*sizeof(int)), args.capacity, NULL); + if (map_fd < 0) { + fprintf(stderr, "error creating map using bpf_map_create\n"); + exit(1); + } + + for (double i = 0; i < args.num_entries; i++) { + keys[(int)i] = i + 1; + for (int j = 0; j < bpf_num_possible_cpus(); j++) { + for (int k = 0; k < VALUE_SIZE; k++) + bpf_percpu(pcpu_values[(int)i], j)[k] = (int)i + j + k; + } + } + + err = bpf_map_update_batch(map_fd, keys, pcpu_values, &count, &operts); + if (err < 0) { + fprintf(stderr, "Failed to populate map using bpf_map_update_batch\n"); + exit(1); + } + +} + +static void bench_bpf_map_lookup_batch_setup(void) +{ + setup(); + method_flag = 0; +} + +static void bench_element_lookup_setup(void) +{ + setup(); + method_flag = 1; +} + +const struct bench bench_bpf_htab_batch_ops = { + .name = "htab-batch-ops", + .validate = validate, + .setup = bench_bpf_map_lookup_batch_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +const struct bench bench_bpf_htab_element_ops = { + .name = "htab-element-ops", + .validate = validate, + .setup = bench_element_lookup_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_htab_batch_ops.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_htab_batch_ops.sh new file mode 100755 index 000000000000..624f403c1865 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_htab_batch_ops.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +map_capacity=40000 +header "bpf_get_next_key & bpf_map_lookup_elem" +for t in 40000 10000 2500; do +subtitle "map capacity: $map_capacity, num_entries: $t" + summarize_ops "bpf_element_ops: " \ + "$($RUN_BENCH -p 1 --num_entries $t htab-element-ops)" + printf "\n" +done + +header "bpf_map_lookup_batch with prefetch" +for t in 40000 10000 2500; do +for n in {0..20}; do +#this range of n_prefetch shows the speedup and subsequent +#deterioration as n_prefetch grows larger +subtitle "map capacity: $map_capacity, num_entries: $t, n_prefetch: $n" + echo $n > /sys/module/hashtab/parameters/n_prefetch + summarize_ops "bpf_batch_ops: " \ + "$($RUN_BENCH -p 1 --num_entries $t htab-batch-ops)" + printf "\n" +done +done -- 2.37.1.595.g718a3a8f04-goog