From: Hou Tao <houtao1@xxxxxxxxxx> Under full preemptible kernel, task local storage lookup operations on the same CPU may update per-cpu bpf_task_storage_busy concurrently. If the update of bpf_task_storage_busy is not preemption safe, the final value of bpf_task_storage_busy may become not-zero forever and bpf_task_storage_trylock() will always fail. So add a test case to ensure the update of bpf_task_storage_busy is preemption safe. Will skip the test case when CONFIG_PREEMPT is disabled, and it can only reproduce the problem probabilistically. By increasing TASK_STORAGE_MAP_NR_LOOP and running it under ARM64 VM with 4-cpus, it takes about four rounds to reproduce: > test_maps is modified to only run test_task_storage_map_stress_lookup() $ export TASK_STORAGE_MAP_NR_THREAD=256 $ export TASK_STORAGE_MAP_NR_LOOP=81920 $ export TASK_STORAGE_MAP_PIN_CPU=1 $ time ./test_maps test_task_storage_map_stress_lookup(135):FAIL:bad bpf_task_storage_busy got -2 real 0m24.743s user 0m6.772s sys 0m17.966s Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> --- .../bpf/map_tests/task_storage_map.c | 122 ++++++++++++++++++ .../bpf/progs/read_bpf_task_storage_busy.c | 39 ++++++ 2 files changed, 161 insertions(+) create mode 100644 tools/testing/selftests/bpf/map_tests/task_storage_map.c create mode 100644 tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c new file mode 100644 index 000000000000..1adc9c292eb2 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "test_maps.h" +#include "task_local_storage_helpers.h" +#include "read_bpf_task_storage_busy.skel.h" + +struct lookup_ctx { + bool start; + bool stop; + int pid_fd; + int map_fd; + int loop; +}; + +static void *lookup_fn(void *arg) +{ + struct lookup_ctx *ctx = arg; + long value; + int i = 0; + + while (!ctx->start) + usleep(1); + + while (!ctx->stop && i++ < ctx->loop) + bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value); + return NULL; +} + +static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr) +{ + unsigned int i; + + ctx->stop = true; + ctx->start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +void test_task_storage_map_stress_lookup(void) +{ +#define MAX_NR_THREAD 4096 + unsigned int i, nr = 256, loop = 8192, cpu = 0; + struct read_bpf_task_storage_busy *skel; + pthread_t tids[MAX_NR_THREAD]; + struct lookup_ctx ctx; + cpu_set_t old, new; + const char *cfg; + int err; + + cfg = getenv("TASK_STORAGE_MAP_NR_THREAD"); + if (cfg) { + nr = atoi(cfg); + if (nr > MAX_NR_THREAD) + nr = MAX_NR_THREAD; + } + cfg = getenv("TASK_STORAGE_MAP_NR_LOOP"); + if (cfg) + loop = atoi(cfg); + cfg = getenv("TASK_STORAGE_MAP_PIN_CPU"); + if (cfg) + cpu = atoi(cfg); + + skel = read_bpf_task_storage_busy__open_and_load(); + err = libbpf_get_error(skel); + CHECK(err, "open_and_load", "error %d\n", err); + + /* Only for a fully preemptible kernel */ + if (!skel->kconfig->CONFIG_PREEMPT) + return; + + /* Save the old affinity setting */ + sched_getaffinity(getpid(), sizeof(old), &old); + + /* Pinned on a specific CPU */ + CPU_ZERO(&new); + CPU_SET(cpu, &new); + sched_setaffinity(getpid(), sizeof(new), &new); + + ctx.start = false; + ctx.stop = false; + ctx.pid_fd = sys_pidfd_open(getpid(), 0); + ctx.map_fd = bpf_map__fd(skel->maps.task); + ctx.loop = loop; + for (i = 0; i < nr; i++) { + err = pthread_create(&tids[i], NULL, lookup_fn, &ctx); + if (err) { + abort_lookup(&ctx, tids, i); + CHECK(err, "pthread_create", "error %d\n", err); + goto out; + } + } + + ctx.start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); + + skel->bss->pid = getpid(); + err = read_bpf_task_storage_busy__attach(skel); + CHECK(err, "attach", "error %d\n", err); + + /* Trigger program */ + syscall(SYS_gettid); + skel->bss->pid = 0; + + CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); +out: + read_bpf_task_storage_busy__destroy(skel); + /* Restore affinity setting */ + sched_setaffinity(getpid(), sizeof(old), &old); +} diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c new file mode 100644 index 000000000000..a47bb0120719 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern bool CONFIG_PREEMPT __kconfig __weak; +extern const int bpf_task_storage_busy __ksym; + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +int busy = 0; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} task SEC(".maps"); + +SEC("raw_tp/sys_enter") +int BPF_PROG(read_bpf_task_storage_busy) +{ + int *value; + int key; + + if (!CONFIG_PREEMPT) + return 0; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + value = bpf_this_cpu_ptr(&bpf_task_storage_busy); + if (value) + busy = *value; + + return 0; +} -- 2.29.2