Re: [PATCH bpf v2 2/2] selftests/bpf: Test race between map uref release and bpf timer init

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 10/20/2023 10:50 AM, Hsin-Wei Hung wrote:
> On Thu, Oct 19, 2023 at 6:41 PM Hou Tao <houtao@xxxxxxxxxxxxxxx> wrote:
>> From: Hou Tao <houtao1@xxxxxxxxxx>
>>
>> Test race between the release of map ref and bpf_timer_init():
>> 1) create one thread to add array map with bpf_timer into array of
>>    arrays map repeatedly.
>> 2) create another thread to call getpgid() and call bpf_timer_init()
>>    in the attached bpf program repeatedly.
>> 3) synchronize these two threads through pthread barrier.
>>
>> It is a bit hard to trigger the kmemleak by only running the test. I
>> managed to reproduce the kmemleak by injecting a delay between
>> t->timer.function = bpf_timer_cb and timer->timer = t in
>> bpf_timer_init().
> I figured out that to trigger this issue reliably, I can insert
> different delays using large bpf_loop() in allocation and release
> paths. I have some extra code to filter out unwanted events. The
> userspace program is similar. It just needs to try to call close(fd)
> and syscall(SYS_getpgid) at the same time without delay. It is not a
> stable test though due to the reference to the function.
>
> SEC("tp/syscalls/sys_enter_close")
> {
>         ...
>         bpf_loop(1000000, &delay_loop, NULL, 0);
> }
>
> SEC("fexit/bpf_map_kmalloc_node")gmai
> {
>         ...
>         bpf_loop(2000000, &delay_loop, NULL, 0);
> }

Thanks for sharing another way to reproduce the problem.
>
> I can confirm that the v1 patch fixes memleak in v5.15. However, this
> issue doesn't seem to affect net-next. At least since db559117828d
> (bpf: Consolidate spin_lock, timer management into btf_record), the
> leaked memory caused by the race would be freed in array_map_free().

I think you are partially right, because array_map indeed doesn't have
such problem but array-in-array map still has the problem and I can
reproduce the problem in bpf tree (see the kmemleak report below). After
reading the related code carefully, I think the proposed fix in the
patch is not right, because the root cause is the release of map-in-map
is not correct (e.g, don't wait for a RCU GP) but the patch only fixes
the phenomenon. Will update the patchset to fix the problem again.

Regards,
Hou
>
>> The following is the output of kmemleak after reproducing:
>>
>> unreferenced object 0xffff8881163d3780 (size 96):
>>   comm "test_progs", pid 539, jiffies 4295358164 (age 23.276s)
>>   hex dump (first 32 bytes):
>>     80 37 3d 16 81 88 ff ff 00 00 00 00 00 00 00 00  .7=.............
>>     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>>   backtrace:
>>     [<00000000bbc3f059>] __kmem_cache_alloc_node+0x3b1/0x4a0
>>     [<00000000a24ddf4d>] __kmalloc_node+0x57/0x140
>>     [<000000004d577dbf>] bpf_map_kmalloc_node+0x5f/0x180
>>     [<00000000bd8428d3>] bpf_timer_init+0xf6/0x1b0
>>     [<0000000086d87323>] 0xffffffffc000c94e
>>     [<000000005a09e655>] trace_call_bpf+0xc5/0x1c0
>>     [<0000000051ab837b>] kprobe_perf_func+0x51/0x260
>>     [<000000000069bbd1>] kprobe_dispatcher+0x61/0x70
>>     [<000000007dceb75b>] kprobe_ftrace_handler+0x168/0x240
>>     [<00000000d8721bd7>] 0xffffffffc02010f7
>>     [<00000000e885b809>] __x64_sys_getpgid+0x1/0x20
>>     [<000000007be835d8>] entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>>
>> Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx>
>> ---
>>  .../bpf/prog_tests/timer_init_race.c          | 138 ++++++++++++++++++
>>  .../selftests/bpf/progs/timer_init_race.c     |  56 +++++++
>>  2 files changed, 194 insertions(+)
>>  create mode 100644 tools/testing/selftests/bpf/prog_tests/timer_init_race.c
>>  create mode 100644 tools/testing/selftests/bpf/progs/timer_init_race.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/timer_init_race.c b/tools/testing/selftests/bpf/prog_tests/timer_init_race.c
>> new file mode 100644
>> index 0000000000000..7bd57459e5048
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/prog_tests/timer_init_race.c
>> @@ -0,0 +1,138 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
>> +#define _GNU_SOURCE
>> +#include <unistd.h>
>> +#include <sys/syscall.h>
>> +#include <test_progs.h>
>> +#include <bpf/btf.h>
>> +#include "timer_init_race.skel.h"
>> +
>> +struct thread_ctx {
>> +       struct bpf_map_create_opts opts;
>> +       pthread_barrier_t barrier;
>> +       int outer_map_fd;
>> +       int start, abort;
>> +       int loop, err;
>> +};
>> +
>> +static int wait_for_start_or_abort(struct thread_ctx *ctx)
>> +{
>> +       while (!ctx->start && !ctx->abort)
>> +               usleep(1);
>> +       return ctx->abort ? -1 : 0;
>> +}
>> +
>> +static void *close_map_fn(void *data)
>> +{
>> +       struct thread_ctx *ctx = data;
>> +       int loop = ctx->loop, err = 0;
>> +
>> +       if (wait_for_start_or_abort(ctx) < 0)
>> +               return NULL;
>> +
>> +       while (loop-- > 0) {
>> +               int fd, zero = 0, i;
>> +               volatile int s = 0;
>> +
>> +               fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, sizeof(struct bpf_timer),
>> +                                   1, &ctx->opts);
>> +               if (fd < 0) {
>> +                       err |= 1;
>> +                       pthread_barrier_wait(&ctx->barrier);
>> +                       continue;
>> +               }
>> +
>> +               if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0)
>> +                       err |= 2;
>> +
>> +               pthread_barrier_wait(&ctx->barrier);
>> +               /* let bpf_timer_init run first */
>> +               for (i = 0; i < 5000; i++)
>> +                       s++;
>> +               close(fd);
>> +       }
>> +
>> +       ctx->err = err;
>> +
>> +       return NULL;
>> +}
>> +
>> +static void *init_timer_fn(void *data)
>> +{
>> +       struct thread_ctx *ctx = data;
>> +       int loop = ctx->loop;
>> +
>> +       if (wait_for_start_or_abort(ctx) < 0)
>> +               return NULL;
>> +
>> +       while (loop-- > 0) {
>> +               pthread_barrier_wait(&ctx->barrier);
>> +               syscall(SYS_getpgid);
>> +       }
>> +
>> +       return NULL;
>> +}
>> +
>> +void test_timer_init_race(void)
>> +{
>> +       struct timer_init_race *skel;
>> +       struct thread_ctx ctx;
>> +       pthread_t tid[2];
>> +       struct btf *btf;
>> +       int err;
>> +
>> +       skel = timer_init_race__open();
>> +       if (!ASSERT_OK_PTR(skel, "timer_init_race open"))
>> +               return;
>> +
>> +       err = timer_init_race__load(skel);
>> +       if (!ASSERT_EQ(err, 0, "timer_init_race load"))
>> +               goto out;
>> +
>> +       memset(&ctx, 0, sizeof(ctx));
>> +
>> +       btf = bpf_object__btf(skel->obj);
>> +       if (!ASSERT_OK_PTR(btf, "timer_init_race btf"))
>> +               goto out;
>> +
>> +       LIBBPF_OPTS_RESET(ctx.opts);
>> +       ctx.opts.btf_fd = bpf_object__btf_fd(skel->obj);
>> +       if (!ASSERT_GE((int)ctx.opts.btf_fd, 0, "btf_fd"))
>> +               goto out;
>> +       ctx.opts.btf_key_type_id = btf__find_by_name(btf, "int");
>> +       if (!ASSERT_GT(ctx.opts.btf_key_type_id, 0, "key_type_id"))
>> +               goto out;
>> +       ctx.opts.btf_value_type_id = btf__find_by_name_kind(btf, "inner_value", BTF_KIND_STRUCT);
>> +       if (!ASSERT_GT(ctx.opts.btf_value_type_id, 0, "value_type_id"))
>> +               goto out;
>> +
>> +       err = timer_init_race__attach(skel);
>> +       if (!ASSERT_EQ(err, 0, "timer_init_race attach"))
>> +               goto out;
>> +
>> +       skel->bss->tgid = getpid();
>> +
>> +       pthread_barrier_init(&ctx.barrier, NULL, 2);
>> +       ctx.outer_map_fd = bpf_map__fd(skel->maps.outer_map);
>> +       ctx.loop = 8;
>> +
>> +       err = pthread_create(&tid[0], NULL, close_map_fn, &ctx);
>> +       if (!ASSERT_OK(err, "close_thread"))
>> +               goto out;
>> +
>> +       err = pthread_create(&tid[1], NULL, init_timer_fn, &ctx);
>> +       if (!ASSERT_OK(err, "init_thread")) {
>> +               ctx.abort = 1;
>> +               pthread_join(tid[0], NULL);
>> +               goto out;
>> +       }
>> +
>> +       ctx.start = 1;
>> +       pthread_join(tid[0], NULL);
>> +       pthread_join(tid[1], NULL);
>> +
>> +       ASSERT_EQ(ctx.err, 0, "error");
>> +       ASSERT_EQ(skel->bss->cnt, 8, "cnt");
>> +out:
>> +       timer_init_race__destroy(skel);
>> +}
>> diff --git a/tools/testing/selftests/bpf/progs/timer_init_race.c b/tools/testing/selftests/bpf/progs/timer_init_race.c
>> new file mode 100644
>> index 0000000000000..ba67cb1786399
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/timer_init_race.c
>> @@ -0,0 +1,56 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
>> +#include <linux/bpf.h>
>> +#include <time.h>
>> +#include <bpf/bpf_helpers.h>
>> +
>> +#include "bpf_misc.h"
>> +
>> +struct inner_value {
>> +       struct bpf_timer timer;
>> +};
>> +
>> +struct inner_map_type {
>> +       __uint(type, BPF_MAP_TYPE_ARRAY);
>> +       __type(key, int);
>> +       __type(value, struct inner_value);
>> +       __uint(max_entries, 1);
>> +} inner_map SEC(".maps");
>> +
>> +struct {
>> +       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
>> +       __type(key, int);
>> +       __type(value, int);
>> +       __uint(max_entries, 1);
>> +       __array(values, struct inner_map_type);
>> +} outer_map SEC(".maps") = {
>> +       .values = {
>> +               [0] = &inner_map,
>> +       },
>> +};
>> +
>> +char _license[] SEC("license") = "GPL";
>> +
>> +int tgid = 0, cnt = 0;
>> +
>> +SEC("kprobe/" SYS_PREFIX "sys_getpgid")
>> +int do_timer_init(void *ctx)
>> +{
>> +       struct inner_map_type *map;
>> +       struct inner_value *value;
>> +       int zero = 0;
>> +
>> +       if ((bpf_get_current_pid_tgid() >> 32) != tgid)
>> +               return 0;
>> +
>> +       map = bpf_map_lookup_elem(&outer_map, &zero);
>> +       if (!map)
>> +               return 0;
>> +       value = bpf_map_lookup_elem(map, &zero);
>> +       if (!value)
>> +               return 0;
>> +       bpf_timer_init(&value->timer, map, CLOCK_MONOTONIC);
>> +       cnt++;
>> +
>> +       return 0;
>> +}
>> --
>> 2.29.2
>>
> .





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux