Re: [PATCH v2 bpf-next 17/20] selftests/bpf: Add unit tests for bpf_arena_alloc/free_pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 08, 2024 at 08:06:05PM -0800, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@xxxxxxxxxx>
> 
> Add unit tests for bpf_arena_alloc/free_pages() functionality
> and bpf_arena_common.h with a set of common helpers and macros that
> is used in this test and the following patches.
> 
> Also modify test_loader that didn't support running bpf_prog_type_syscall
> programs.
> 
> Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
> ---
>  tools/testing/selftests/bpf/DENYLIST.aarch64  |  1 +
>  tools/testing/selftests/bpf/DENYLIST.s390x    |  1 +
>  .../testing/selftests/bpf/bpf_arena_common.h  | 70 ++++++++++++++
>  .../selftests/bpf/prog_tests/verifier.c       |  2 +
>  .../selftests/bpf/progs/verifier_arena.c      | 91 +++++++++++++++++++
>  tools/testing/selftests/bpf/test_loader.c     |  9 +-
>  6 files changed, 172 insertions(+), 2 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/bpf_arena_common.h
>  create mode 100644 tools/testing/selftests/bpf/progs/verifier_arena.c
> 
> diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
> index 5c2cc7e8c5d0..8e70af386e52 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.aarch64
> +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
> @@ -11,3 +11,4 @@ fill_link_info/kprobe_multi_link_info            # bpf_program__attach_kprobe_mu
>  fill_link_info/kretprobe_multi_link_info         # bpf_program__attach_kprobe_multi_opts unexpected error: -95
>  fill_link_info/kprobe_multi_invalid_ubuff        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
>  missed/kprobe_recursion                          # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
> +verifier_arena                                   # JIT does not support arena
> diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
> index 1a63996c0304..ded440277f6e 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.s390x
> +++ b/tools/testing/selftests/bpf/DENYLIST.s390x
> @@ -3,3 +3,4 @@
>  exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
>  get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
>  stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
> +verifier_arena                           # JIT does not support arena
> diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
> new file mode 100644
> index 000000000000..07849d502f40
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/bpf_arena_common.h
> @@ -0,0 +1,70 @@
> +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
> +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
> +#pragma once
> +
> +#ifndef WRITE_ONCE
> +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
> +#endif
> +
> +#ifndef NUMA_NO_NODE
> +#define	NUMA_NO_NODE	(-1)
> +#endif
> +
> +#ifndef arena_container_of

Why is this ifndef required if we have a pragma once above?

> +#define arena_container_of(ptr, type, member)			\
> +	({							\
> +		void __arena *__mptr = (void __arena *)(ptr);	\
> +		((type *)(__mptr - offsetof(type, member)));	\
> +	})
> +#endif
> +
> +#ifdef __BPF__ /* when compiled as bpf program */
> +
> +#ifndef PAGE_SIZE
> +#define PAGE_SIZE __PAGE_SIZE
> +/*
> + * for older kernels try sizeof(struct genradix_node)
> + * or flexible:
> + * static inline long __bpf_page_size(void) {
> + *   return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
> + * }
> + * but generated code is not great.
> + */
> +#endif
> +
> +#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
> +#define __arena __attribute__((address_space(1)))
> +#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
> +#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
> +#else
> +#define __arena
> +#define cast_kern(ptr) bpf_arena_cast(ptr, BPF_ARENA_CAST_KERN, 1)
> +#define cast_user(ptr) bpf_arena_cast(ptr, BPF_ARENA_CAST_USER, 1)
> +#endif
> +
> +void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
> +				    int node_id, __u64 flags) __ksym __weak;
> +void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
> +
> +#else /* when compiled as user space code */
> +
> +#define __arena
> +#define __arg_arena
> +#define cast_kern(ptr) /* nop for user space */
> +#define cast_user(ptr) /* nop for user space */
> +__weak char arena[1];
> +
> +#ifndef offsetof
> +#define offsetof(type, member)  ((unsigned long)&((type *)0)->member)
> +#endif
> +
> +static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
> +						  int node_id, __u64 flags)
> +{
> +	return NULL;
> +}
> +static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
> +{
> +}
> +
> +#endif
> diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
> index 9c6072a19745..985273832f89 100644
> --- a/tools/testing/selftests/bpf/prog_tests/verifier.c
> +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
> @@ -4,6 +4,7 @@
>  
>  #include "cap_helpers.h"
>  #include "verifier_and.skel.h"
> +#include "verifier_arena.skel.h"
>  #include "verifier_array_access.skel.h"
>  #include "verifier_basic_stack.skel.h"
>  #include "verifier_bitfield_write.skel.h"
> @@ -118,6 +119,7 @@ static void run_tests_aux(const char *skel_name,
>  #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
>  
>  void test_verifier_and(void)                  { RUN(verifier_and); }
> +void test_verifier_arena(void)                { RUN(verifier_arena); }
>  void test_verifier_basic_stack(void)          { RUN(verifier_basic_stack); }
>  void test_verifier_bitfield_write(void)       { RUN(verifier_bitfield_write); }
>  void test_verifier_bounds(void)               { RUN(verifier_bounds); }
> diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
> new file mode 100644
> index 000000000000..0e667132ef92
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
> @@ -0,0 +1,91 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
> +
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +#include "bpf_misc.h"
> +#include "bpf_experimental.h"
> +#include "bpf_arena_common.h"
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_ARENA);
> +	__uint(map_flags, BPF_F_MMAPABLE);
> +	__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
> +	__ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
> +} arena SEC(".maps");
> +
> +SEC("syscall")
> +__success __retval(0)
> +int basic_alloc1(void *ctx)
> +{
> +	volatile int __arena *page1, *page2, *no_page, *page3;
> +
> +	page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (!page1)
> +		return 1;
> +	*page1 = 1;
> +	page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (!page2)
> +		return 2;
> +	*page2 = 2;
> +	no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (no_page)
> +		return 3;
> +	if (*page1 != 1)
> +		return 4;
> +	if (*page2 != 2)
> +		return 5;
> +	bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
> +	if (*page1 != 1)
> +		return 6;
> +	if (*page2 != 0) /* use-after-free should return 0 */

So I know that longer term the plan is to leverage exceptions and have
us exit and unwind the program here, but I think it's somewhat important
to underscore how significant of a usability improvement that will be.
Reading 0 isn't terribly uncommon for typical scheduling use cases. For
example, if we stored a set of cpumasks in arena pages, we may AND them
together and not be concerned if there are no CPUs set as that would be
a perfectly normal state. E.g. if we're using arena cpumasks to track
idle cores and a task's allowed CPUs, and we AND them together and see
0.  We'd just assume there were no idle cores available on the system.
Another example would be scx_nest where we would incorrectly think that
a nest didn't have enough cores, seeing if a task could run in a domain,
etc.

Obviously it's way better for us to actually have arenas in the interim
so this is fine for now, but UAF bugs could potentially be pretty
painful until we get proper exception unwinding support.

Otherwise, in terms of usability, this looks really good. The only thing
to bear in mind is that I don't think we can fully get away from kptrs
that will have some duplicated logic compared to what we can enable in
an arena. For example, we will have to retain at least some of the
struct cpumask * kptrs for e.g. copying a struct task_struct's struct
cpumask *cpus_ptr field.

For now, we could iterate over the cpumask and manually set the bits, so
maybe even just supporting bpf_cpumask_test_cpu() would be enough
(though donig a bitmap_copy() would be better of course)? This is
probably fine for most use cases as we'd likely only be doing struct
cpumask * -> arena copies on slowpaths. But is there any kind of more
generalized integration we want to have between arenas and kptrs?  Not
sure, can't think of any off the top of my head.

> +		return 7;
> +	page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (!page3)
> +		return 8;
> +	*page3 = 3;
> +	if (page2 != page3)
> +		return 9;
> +	if (*page1 != 1)
> +		return 10;

Should we also test doing a store after an arena has been freed?

> +	return 0;
> +}

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux